How to create layer0 input for input images with 3 channels - theano

Hi I am following the http://deeplearning.net/tutorial/code/convolutional_mlp.py code to implement a conv neural net. I have input images where the channel is important and hence I want to have 3 channel feature map as layer 0 input.
So I need something like this
layer0_input = x.reshape((batch_size, 3, 240, 135)) # width 240, height 135, 3 channels
instead of
layer0_input = x.reshape((batch_size, 1, 28, 28)) # 28*28 normalized MNIST gray scale images
which will be used here
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 240, 135),
filter_shape=(nkerns[0], 1, 5, 5),
poolsize=(2, 2)
)
where that x is provided to theano as
train_model = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
So - my question is - how should I create (shape) that train_set_x ?
With (gray scale intensity - i.e single channel) train_set_x is created as
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
where data_x is a flattened numpy array of length 784 (for 28*28 pixels)
Thanks a lot for advice

I was able to get it working. I am pasting some code here which might help some one. Not very elegant - but works.
def shuffle_in_unison(a, b):
#courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
def createDataSet(imagefolder):
os.chdir(imagefolder)
# total number of files
number_of_files = len([item for item in os.listdir('.') if os.path.isfile(os.path.join('.', item))])
# get a shuffled list : I needed this because my image names were of the format n_x_<some details>.jpg
# where n was my target and x was a number from 0 to m-1 where m was the number of samples
# of the target value n. So I needed so shuffle and iterate while putting images in train
# test and validate arrays
image_index_array = range(0,number_of_files)
random.seed(12)
random.shuffle(image_index_array)
# split 80/10/10 - train/test/val
trainsize = int(number_of_files*.8)
testsize = int(number_of_files*.1)
valsize = number_of_files - trainsize - testsize
# create the random value arrays of train/test/val by slicing the total image index array
train_index_array = image_index_array[0:trainsize]
test_index_array = image_index_array[trainsize:trainsize+testsize]
validate_index_array = image_index_array[trainsize+testsize:]
# initialize the data structures
dataset = {'train':[[],[]],'test':[[],[]],'validate':[[],[]]}
i_counter = 0
train_X = []
train_y = []
test_X = []
test_y = []
val_X = []
val_y = []
for item in os.listdir('.'):
if not os.path.isfile(os.path.join('.', item)):
continue
if item.endswith('.pkl'):
continue
print 'Processing item ' + item
item_y = item.split('_')[0]
item_x = cv2.imread(item)
height, width = item_x.shape[:2]
# this was my requirement - skip it if you do not need it
if(height != 135 or width != 240):
continue
# get 3 channels
b,g,r = cv2.split(item_x)
item_x = [b,g,r]
item_x = np.array(item_x)
item_x = item_x.reshape(3,135*240)
if i_counter in test_index_array:
test_X.append(item_x)
test_y.append(item_y)
elif i_counter in validate_index_array:
val_X.append(item_x)
val_y.append(item_y)
else:
train_X.append(item_x)
train_y.append(item_y)
i_counter = i_counter + 1
# fix the dimensions. Flatten out the channel and intensity dimensions
train_X = np.array(train_X)
train_X = train_X.reshape(train_X.shape[0],train_X.shape[1]*train_X.shape[2])
test_X = np.array(test_X)
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1]*test_X.shape[2])
val_X = np.array(val_X)
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1]*val_X.shape[2])
train_y = np.array(train_y)
test_y = np.array(test_y)
val_y = np.array(val_y)
# shuffle the train and test arrays in unison
train_X,train_y = shuffle_in_unison(train_X,train_y)
test_X,test_y = shuffle_in_unison(test_X,test_y)
# pickle them
dataset['train'] = [train_X,train_y]
dataset['test'] = [test_X,test_y]
dataset['validate'] = [val_X,val_y]
output = open('pcount.pkl', 'wb')
cPickle.dump(dataset, output)
output.close`
Once you have this pickle file
You can use it in convolutional_mlp.py like this.
layer0_input = x.reshape((batch_size, 3, 135, 240))
# Construct the first convolutional pooling layer:
# filtering reduces the image size to (135-8+1 , 240-5+1) = (128, 236)
# maxpooling reduces this further to (128/2, 236/2) = (64, 118)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 64, 118)
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 135, 240),
filter_shape=(nkerns[0], 3, 8, 5),
poolsize=(2, 2)
)
The load_data function in logistic_sgd.py will need a small change as below
f = open(dataset, 'rb')
dump = cPickle.load(f)
train_set = dump['train']
valid_set = dump['validate']
test_set = dump['test']
f.close()
Hope this helps

Related

ValueError: No gradients provided for any variable while label data has already been defined

Currently, I am trying to implement MLP-based MNIST classification training code, mimicking the operational flow of computing-in-memory (CIM).
To mimic the CIM operation and match the memory-array size, I divided 3 hidden layers and 1 output layer into 13, 8, 8 and 8 layers, respectively.
After dividing each hidden and output layer, I implemented the hardware noise layers between those.
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers import Input, Lambda
from tensorflow.keras.models import Model
import larq
import sys
sys.path.append("PACKAGE_PATH")
from variation import shrinkPS_func as ShrinkPS
from variation import ADC_4bModel_func as ADC_4bModel
# Import MNIST dataset
(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.mnist.load_data()
train_images = train_images.reshape((60000, 28, 28))
test_images = test_images.reshape((10000, 28, 28))
# Normalize pixel values to be between -1 and +1, and binarize it to -1 and +1
train_images, test_images = train_images / 127.5 -1, test_images / 127.5 -1
train_images_binary = np.where(train_images > 0, 1, -1)
test_images_binary = np.where(test_images > 0, 1, -1)
del train_images, test_images
# Returns input tensor and split the tensor according to the memory array size
nonflat_input_layer = Input(shape=(28, 28, 1))
input_layer = tf.keras.layers.Flatten()(nonflat_input_layer)
input_layer0, input_layer1, input_layer2 = input_layer[:,0:64], input_layer[:,64:128], input_layer[:,128:192]
input_layer3, input_layer4, input_layer5 = input_layer[:,192:256], input_layer[:,256:320], input_layer[:,320:384]
input_layer6, input_layer7, input_layer8 = input_layer[:,384:448], input_layer[:,448:512], input_layer[:,512:576]
input_layer9, input_layer10, input_layer11 = input_layer[:,576:640], input_layer[:,640:704], input_layer[:,704:768]
input_layer12 = input_layer[:,768:784]
# First hidden layer
hidden1_0 = larq.layers.QuantDense(512,
kernel_quantizer= "ste_sign",
kernel_constraint= "weight_clip",
name = "first_hidden_layer0")(input_layer0)
## ellipsis ##
hidden1_12 = larq.layers.QuantDense(512,
kernel_quantizer= "ste_sign",
kernel_constraint= "weight_clip",
name = "first_hidden_layer12")(input_layer12)
# Shrinking the PS according to the SPICE data
shrink1_0 = Lambda(lambda x: ShrinkPS(x), name="shrink1_0")(hidden1_0)
## ellipsis ##
shrink1_12 = Lambda(lambda x: ShrinkPS(x), name="shrink1_12")(hidden1_12)
# 4-bit SAR ADC conversion with the variation measured by SPICE
ADC1_0 = Lambda(lambda x: ADC_4bModel(x), name="ADC1_0")(shrink1_0)
## ellipsis ##
ADC1_12 = Lambda(lambda x: ADC_4bModel(x), name="ADC1_12")(shrink1_12)
# Digital Sum
ADCsum_1 = layers.Add()([ADC1_0, ADC1_1, ADC1_2, ADC1_3, ADC1_4, ADC1_5, ADC1_6,
ADC1_7, ADC1_8, ADC1_9, ADC1_10, ADC1_11, ADC1_12])
# Binarize the digital sum
pre_binarize1 = layers.Subtract()([ADCsum_1, tf.constant(91, dtype = tf.float32, shape = (1))])
quantize1 = larq.quantizers.SteSign()(pre_binarize1)
# Split
quantize1_0 = quantize1[:,0:64]
## ellipsis ##
quantize1_7 = quantize1[:,448:512]
The similar code is implemented for second and third hidden layers and its output.
For the output of the model, the code is implemented as follows.
# Output layer
output_0 = larq.layers.QuantDense(10,
kernel_quantizer= "ste_sign",
kernel_constraint= "weight_clip",
name = "output_layer0")(quantize3_0)
## ellipsis ##
output_7 = larq.layers.QuantDense(10,
kernel_quantizer= "ste_sign",
kernel_constraint= "weight_clip",
name = "output_layer7")(quantize3_7)
# Shrinking the PS according to the SPICE data
shrinkOut_0 = Lambda(lambda x: ShrinkPS(x), name="shrinkOut_0")(output_0)
## ellipsis ##
shrinkOut_7 = Lambda(lambda x: ShrinkPS(x), name="shrinkOut_7")(output_7)
# 4-bit SAR ADC conversion with the variation measured by SPICE
ADCOut_0 = Lambda(lambda x: ADC_4bModel(x), name="ADCOut_0")(shrinkOut_0)
## ellipsis ##
ADCOut_7 = Lambda(lambda x: ADC_4bModel(x), name="ADCOut_7")(shrinkOut_7)
# Digital Sum
ADCsum_Out = layers.Add()([ADCOut_0, ADCOut_1, ADCOut_2, ADCOut_3, ADCOut_4, ADCOut_5, ADCOut_6, ADCOut_7])
# Activation
pre_binarizeOut = layers.Subtract()([ADCsum_Out, tf.constant(56, dtype = tf.float32, shape = (1))])
# Softmax
output = layers.Softmax(name = "output")(pre_binarizeOut)
# Implementing a model based on given functional API codes
model = Model(inputs= nonflat_input_layer, outputs= output)
# Compile & Train the model
model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate=0.001),
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics = ['accuracy'])
model.fit(train_images_binary, train_labels, batch_size = 300, epochs = 100,
validation_data=(test_images_binary, test_labels))
test_loss, test_acc = model.evaluate(test_images_binary, test_labels)
print(f"Test accuracy {test_acc * 100:.2f} %")
And code below describes the function for ShrinkPS and ADC_4bModel which is implemented via Lambda layer in the code above.
# Shrinking the partial sum according to the variation of bitcell array's Vbl output.
def shrinkPS_func(inputPS):
coeff_a = tf.constant(0.0003883012)
coeff_b = tf.constant(0.7028718)
coeff_c = tf.constant(0.16390099)
outputs = tf.math.multiply(coeff_a, tf.math.square(inputPS))
outputs = tf.math.add(outputs, tf.math.multiply(coeff_b, inputPS))
outputs = tf.math.add(outputs, coeff_c)
return outputs
# Implementing the ADC model which has fitted variation.
#tf.function
def ADC_4bModel_func(in_ADC_PS):
# Loading the data
inputPS = np.loadtxt("~/inputPS.csv", dtype = float, delimiter = ',')
PSmean_ADC = np.loadtxt("~/PSmean_ADC.csv", dtype = float, delimiter = ',')
PS_Vdiff_Mean = np.loadtxt("~/PS_Vdiff_Mean.csv", dtype = float, delimiter = ',')
PS_Vdiff_Std = np.loadtxt("~/PS_Vdiff_Std.csv", dtype = float, delimiter = ',')
P1_PSmean = PSmean_ADC[:, 1]
P2_PSmean = PSmean_ADC[:, 2]
P3_PSmean = PSmean_ADC[:, 3]
P4_PSmean = PSmean_ADC[:, 4]
# Implementing tensor-based ADC model
CompBndry = tf.constant(0.0, dtype = tf.float64)
"From Phase 0 to 1"
Func_P0to1Mean = scipy.interpolate.interp1d(inputPS, PS_Vdiff_Mean[:, 0], bounds_error = False, fill_value = 'extrapolate')
Func_P0to1Std = scipy.interpolate.interp1d(inputPS, PS_Vdiff_Std[:, 0], bounds_error = False, fill_value = 'extrapolate')
"From Phase 1 to 2"
Func_P1to2Mean = scipy.interpolate.interp1d(P1_PSmean, PS_Vdiff_Mean[:, 1], bounds_error = False, fill_value = 'extrapolate')
Func_P1to2Std = scipy.interpolate.interp1d(P1_PSmean, PS_Vdiff_Std[:, 1], bounds_error = False, fill_value = 'extrapolate')
"From Phase 2 to 3"
idxP2sort = np.argsort(P2_PSmean)
P2_SortPS = np.array([P2_PSmean[idxP2sort], PS_Vdiff_Mean[:, 2][idxP2sort], PS_Vdiff_Std[:, 2][idxP2sort]])
idxP2Low = np.where(P2_SortPS[0] < 0)
idxP2High = np.where(P2_SortPS[0] > 0)
Func_P2to3LowMean = scipy.interpolate.UnivariateSpline(P2_SortPS[0][idxP2Low], P2_SortPS[1][idxP2Low])
Func_P2to3HighMean = scipy.interpolate.UnivariateSpline(P2_SortPS[0][idxP2High], P2_SortPS[1][idxP2High])
Func_P2to3LowStd = scipy.interpolate.UnivariateSpline(P2_SortPS[0][idxP2Low], P2_SortPS[2][idxP2Low])
Func_P2to3HighStd = scipy.interpolate.UnivariateSpline(P2_SortPS[0][idxP2High], P2_SortPS[2][idxP2High])
"From Phase 3 to 4"
idxP3sort = np.argsort(P3_PSmean)
P3_SortPS = np.array([P3_PSmean[idxP3sort], PS_Vdiff_Mean[:, 3][idxP3sort], PS_Vdiff_Std[:, 3][idxP3sort]])
idxP3Low = np.where(P3_SortPS[0] < 0)
idxP3High = np.where(P3_SortPS[0] > 0)
Func_P3to4LowMean = scipy.interpolate.UnivariateSpline(P3_SortPS[0][idxP3Low], P3_SortPS[1][idxP3Low])
Func_P3to4HighMean = scipy.interpolate.UnivariateSpline(P3_SortPS[0][idxP3High], P3_SortPS[1][idxP3High])
Func_P3to4LowStd = scipy.interpolate.UnivariateSpline(P3_SortPS[0][idxP3Low], P3_SortPS[2][idxP3Low])
Func_P3to4HighStd = scipy.interpolate.UnivariateSpline(P3_SortPS[0][idxP3High], P3_SortPS[2][idxP3High])
"From Phase 4 to 5"
idxP4sort = np.argsort(P4_PSmean)
P4_SortPS = np.array([P4_PSmean[idxP4sort], PS_Vdiff_Mean[:, 4][idxP4sort], PS_Vdiff_Std[:, 4][idxP4sort]])
idxP4Low = np.where(P4_SortPS[0] < 0)
idxP4High = np.where(P4_SortPS[0] > 0)
Func_P4to5LowMean = scipy.interpolate.UnivariateSpline(P4_SortPS[0][idxP4Low], P4_SortPS[1][idxP4Low])
Func_P4to5HighMean = scipy.interpolate.UnivariateSpline(P4_SortPS[0][idxP4High], P4_SortPS[1][idxP4High])
Func_P4to5LowStd = scipy.interpolate.UnivariateSpline(P4_SortPS[0][idxP4Low], P4_SortPS[2][idxP4Low])
Func_P4to5HighStd = scipy.interpolate.UnivariateSpline(P4_SortPS[0][idxP4High], P4_SortPS[2][idxP4High])
"[Transition #1] Phase 0 -> 1"
trns0to1_var = tf.random.normal(shape = tf.shape(in_ADC_PS),
mean = tf.py_function(Func_P0to1Mean, [in_ADC_PS], tf.float64),
stddev = tf.py_function(Func_P0to1Std, [in_ADC_PS], tf.float64),
dtype = tf.float64)
P1VarOut_ADC_PS = tf.cast(tf.math.scalar_mul(-1.0, trns0to1_var), tf.float64)
"[Transition #2] Phase 1 -> 2"
trns1to2_var = tf.random.normal(shape = tf.shape(P1VarOut_ADC_PS),
mean = tf.py_function(Func_P1to2Mean, [P1VarOut_ADC_PS], tf.float64),
stddev = tf.py_function(Func_P1to2Std, [P1VarOut_ADC_PS], tf.float64),
dtype = tf.float64)
P2VarOut_ADC_PS = tf.cast(tf.math.add(P1VarOut_ADC_PS, trns1to2_var), tf.float64)
SARSET_trns1to2 = tf.math.sign(tf.math.subtract(CompBndry, P2VarOut_ADC_PS))
"[Transition #3] Phase 2 -> 3"
SARSET_trns2to3 = tf.math.sign(tf.math.subtract(CompBndry, P2VarOut_ADC_PS))
if tf.math.less(P2VarOut_ADC_PS, CompBndry) is True:
phase2to3Mean = tf.py_function(Func_P2to3LowMean, [P2VarOut_ADC_PS], tf.float64)
phase3ErrStd = tf.py_function(Func_P2to3LowStd, [P2VarOut_ADC_PS], tf.float64)
else:
phase2to3Mean = tf.py_function(Func_P2to3HighMean, [P2VarOut_ADC_PS], tf.float64)
phase3ErrStd = tf.py_function(Func_P2to3HighStd, [P2VarOut_ADC_PS], tf.float64)
trns2to3_var = tf.math.multiply(SARSET_trns2to3, tf.random.normal(shape = tf.shape(P2VarOut_ADC_PS),
mean = phase2to3Mean,
stddev = phase3ErrStd,
dtype = tf.float64))
P3VarOut_ADC_PS = tf.cast(tf.math.add(P2VarOut_ADC_PS, trns2to3_var), tf.float64)
"[Transition #4] Phase 3 -> 4"
SARSET_trns3to4 = tf.math.sign(tf.math.subtract(CompBndry, P3VarOut_ADC_PS))
if tf.math.less(P3VarOut_ADC_PS, CompBndry) is True:
phase3to4Mean = tf.py_function(Func_P3to4LowMean, [P3VarOut_ADC_PS], tf.float64)
phase4ErrStd = tf.py_function(Func_P3to4LowStd, [P3VarOut_ADC_PS], tf.float64)
else:
phase3to4Mean = tf.py_function(Func_P3to4HighMean, [P3VarOut_ADC_PS], tf.float64)
phase4ErrStd = tf.py_function(Func_P3to4HighStd, [P3VarOut_ADC_PS], tf.float64)
trns3to4_var = tf.math.multiply(SARSET_trns3to4, tf.random.normal(shape = tf.shape(P3VarOut_ADC_PS),
mean = phase3to4Mean,
stddev = phase4ErrStd,
dtype = tf.float64))
P4VarOut_ADC_PS = tf.cast(tf.math.add(P3VarOut_ADC_PS, trns3to4_var), tf.float64)
"[Transition #5] Phase 4 -> 5"
SARSET_trns4to5 = tf.math.sign(tf.math.subtract(CompBndry, P4VarOut_ADC_PS))
if tf.math.less(P4VarOut_ADC_PS, CompBndry) is True:
phase4to5Mean = tf.py_function(Func_P4to5LowMean, [P4VarOut_ADC_PS], tf.float64)
phase5ErrStd = tf.py_function(Func_P4to5LowStd, [P4VarOut_ADC_PS], tf.float64)
else:
phase4to5Mean = tf.py_function(Func_P4to5HighMean, [P4VarOut_ADC_PS], tf.float64)
phase5ErrStd = tf.py_function(Func_P4to5HighStd, [P4VarOut_ADC_PS], tf.float64)
trns4to5_var = tf.math.multiply(SARSET_trns4to5, tf.random.normal(shape = tf.shape(P4VarOut_ADC_PS),
mean = phase4to5Mean,
stddev = phase5ErrStd,
dtype = tf.float64))
P5VarOut_ADC_PS = tf.cast(tf.math.add(P4VarOut_ADC_PS, trns4to5_var), tf.float64)
"SARSET of Phase 5 & bit position setting"
SARSET_phase5 = tf.math.sign(tf.math.subtract(CompBndry, P5VarOut_ADC_PS))
Bit0 = tf.scalar_mul(8, tf.cast(tf.math.less(P2VarOut_ADC_PS, CompBndry), dtype = tf.int32))
Bit1 = tf.scalar_mul(4, tf.cast(tf.math.less(P3VarOut_ADC_PS, CompBndry), dtype = tf.int32))
Bit2 = tf.scalar_mul(2, tf.cast(tf.math.less(P4VarOut_ADC_PS, CompBndry), dtype = tf.int32))
Bit3 = tf.cast(tf.math.less(P5VarOut_ADC_PS, CompBndry), dtype = tf.int32)
ModelOut = tf.math.add(Bit3, tf.math.add(Bit2, tf.math.add(Bit1, Bit0)))
ModelOut = tf.cast(ModelOut, dtype = tf.float32)
return ModelOut
After implementing the neural network algorithm code, the error message showed up as follows.
ValueError: No gradients provided for any variable: ([],).
(in the error message's [], all of the hidden dense layers were indicated)
I know that this kind of error is obtained when I don't define the label data in model.fit() code. Though I defined the label data both for training and evaluation, I still get this error. Are there any different reasons for this error? How can I solve it?

Tensorflow: How to use a generator for fit() which runs in parallel with multiple processes

I am trying to train a model on a data set which does not fit in my RAM.
Therefore I am using a data generator which inherits from tensorflow.keras.utils.Sequence as shown below.
This is working. However because I am doing processing on the images my training is CPU bound. When looking in GPU-Z my GPU is only at 10-20% but one of my CPU Cores is at its max.
To solve this I am trying to run the generator in parallel on all my 16 cores. However when I set use_multiprocessing=True in the fit() function the program freezes. And using workers=8 does not speed up the process just produces batches in uneven intervals.
ex.:
batch 1-8 is processed immediately than there is some delay and than batch 9-16 is processed.
The code below shows what I am trying to do.
#read the dataset
x, o_y = reader.read_dataset_whole(ETLCharacterGroups.kanji)
#split data into 90/10 percent parts
percentage = round(len(x) / 100 * 80)
x_train = x[:percentage]
x_test = x[percentage:]
y_train = o_y[:percentage]
y_test = o_y[percentage:]
def distort_sample(img : Image) -> (Image, [int], [int]):
"""
Distort the given image randomly.
Randomly applies the transformations:
- rotation
- shear
- scale
- translate
- sharpen
- blur
Returns the distorted image.
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["sine"]) # "rotate", "shear", "scale",
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-30, 30))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# scale and translate image
if("scale" in t):
#scale the image
size_x = random.randrange(20, 63)
size_y = random.randrange(20, 63)
scale = (size_x, size_y)
offset = (math.ceil((64 - size_x) / 2), math.ceil((64 - size_y) / 2))
img = img.resize(scale)
# put it again on a black background (translated)
background = PImage.new('L', (64, 64))
trans_x = random.randrange(0, math.floor((64 - size_x)))
trans_y = random.randrange(0, math.floor((64 - size_y)))
offset = (trans_x, trans_y)
background.paste(img, offset)
img = background
if("sine" in t):
t_img = np.array(img)
A = t_img.shape[0] / 3.0
w = 2.0 / t_img.shape[1]
shift = lambda x: random.uniform(0.15, 0.2) * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
img = PImage.fromarray(t_img)
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, x_col, y_col, batch_size, mode="training", shuffle=True):
self.batch_size = batch_size
self.undistorted_images = batch_size // 2
self.shuffle = shuffle
self.indices = len(x_col)
self.x_col = x_col
self.y_col = y_col
def __len__(self):
return self.indices // self.batch_size
def on_epoch_end(self):
if(False):
rng_state = np.random.get_state()
np.random.shuffle(x)
np.random.set_state(rng_state)
np.random.shuffle(o_y)
def __getitem__(self, index):
X, Y = [], []
for i in range(index * self.undistorted_images, (index+1) * self.undistorted_images):
base_img = self.x_col[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
# distort_sample() creates random variations of an image
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(self.y_col[i])
# add base image
X.append(base_img)
Y.append(self.y_col[i])
return np.array(X), np.array(Y)
#instantiate generators
training_generator = DataGenerator(x_col = x_train, y_col = y_train, batch_size = 256)
validation_generator = DataGenerator(x_col = x_test, y_col = y_test, batch_size = 256)
#train the model
hist = model.fit(
x=training_generator,
epochs=100,
validation_data=training_generator,
max_queue_size=50,
workers=8,
#use_multiprocessing=True <- this freezes the program
)
In the end I needed to make the Data generator use multi processing. To do this, the arrays needed to be stored in shared memory and than used in the sub processes.
import multiprocessing as mp
import numpy as np
from PIL import Image as PImage
from PIL import ImageFilter
import random
import math
import tensorflow as tf
shared_dict = {}
def distort_sample(img : PImage) -> (PImage, [int], [int]):
"""
Distort the given image randomly.
Randomly applies the transformations:
rotation, shear, scale, translate,
Randomly applies the filter:
sharpen, blur, smooth
Returns the distorted image.
"""
offset, scale = (0, 0), (64, 64)
t = random.choice(["sine", "rotate", "shear", "scale"])
f = random.choice(["blur", "sharpen", "smooth"])
# randomly apply transformations...
# rotate image
if("rotate" in t):
img = img.rotate(random.uniform(-15, 15))
# shear image
if("shear" in t):
y_shear = random.uniform(-0.2, 0.2)
x_shear = random.uniform(-0.2, 0.2)
img = img.transform(img.size, PImage.AFFINE, (1, x_shear, 0, y_shear, 1, 0))
# scale and translate image
if("scale" in t):
#scale the image
size_x = random.randrange(25, 63)
size_y = random.randrange(25, 63)
scale = (size_x, size_y)
offset = (math.ceil((64 - size_x) / 2), math.ceil((64 - size_y) / 2))
img = img.resize(scale)
# put it again on a black background (translated)
background = PImage.new('L', (64, 64))
trans_x = random.randrange(0, math.floor((64 - size_x)))
trans_y = random.randrange(0, math.floor((64 - size_y)))
offset = (trans_x, trans_y)
background.paste(img, offset)
img = background
if("sine" in t):
t_img = np.array(img)
A = t_img.shape[0] / 3.0
w = 2.0 / t_img.shape[1]
shift_factor = random.choice([-1, 1]) * random.uniform(0.15, 0.2)
shift = lambda x: shift_factor * A * np.sin(-2*np.pi*x * w)
for i in range(t_img.shape[0]):
t_img[:,i] = np.roll(t_img[:,i], int(shift(i)))
img = PImage.fromarray(t_img)
# blur
if("blur" in f):
img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(0.5, 1.2)))
# sharpen
if("sharpen" in f):
img = img.filter(ImageFilter.SHARPEN)
# smooth
if("smooth" in f):
img = img.filter(ImageFilter.SMOOTH)
return img, offset, scale
def generator_func(start_index, end_index, x_shape, y_shape):
X, Y = [], []
x_loc = np.frombuffer(shared_dict["x"], dtype="float16").reshape(x_shape)
y_loc = np.frombuffer(shared_dict["y"], dtype="b").reshape(y_shape)
for i in range(start_index, end_index):
base_img = x_loc[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
# add base image
#X.append(base_img)
#Y.append(y_loc[i])
return X, Y
def generator_initializer(_x_shared, _y_shared):
shared_dict["x"] = _x_shared
shared_dict["y"] = _y_shared
def generator_func(start_index, end_index, x_shape, y_shape):
X, Y = [], []
x_loc = np.frombuffer(shared_dict["x"], dtype="float16").reshape(x_shape)
y_loc = np.frombuffer(shared_dict["y"], dtype="b").reshape(y_shape)
for i in range(start_index, end_index):
base_img = x_loc[i]
img = PImage.fromarray(np.uint8(base_img.reshape(64, 64) * 255))
img, *unused = distort_sample(img)
# add transformed image
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
X.append(np.array(img).reshape(64, 64, 1))
Y.append(y_loc[i])
# add base image
#X.append(base_img)
#Y.append(y_loc[i])
return X, Y
class DataGenerator(tf.keras.utils.Sequence):
def __init__(self, num_samples, batch_size,
percentage, mode,
x_shared, y_shared,
x_np_shape, y_np_shape,
processes, shuffle=True):
self.num_samples = num_samples
# 50% original images + 50% augmented images
self.batch_size = batch_size // 2
self.percentage = percentage
# an offset to devide the data set into test and train
self.start_index = 0
if(mode == "testing"):
self.start_index = num_samples - (num_samples // 100 * percentage)
# is this a train or a test generator
self.mode = mode
# how many processes should be used for this generator
self.processes = processes
# should the arrays be shuffled after each epoch
self.shuffle = shuffle
self.x_np_shape = x_np_shape
self.y_np_shape = y_np_shape
# a pool of processes for generating augmented data
self.pool = mp.Pool(processes=self.processes,
initializer=generator_initializer,
initargs=(x_shared, y_shared))
def __len__(self):
return (self.num_samples // 100 * self.percentage) // self.batch_size
def on_epoch_end(self):
if(False):
rng_state = np.random.get_state()
np.random.shuffle(x_np)
np.random.set_state(rng_state)
np.random.shuffle(y_np)
def __getitem__(self, index):
arguments = []
slice_size = self.batch_size // self.processes
current_batch = index * self.batch_size
for i in range(self.processes):
slice_start = self.start_index + (current_batch + i * slice_size)
slice_end = self.start_index + (current_batch + (i+1) * slice_size)
arguments.append([slice_start, slice_end, self.x_np_shape, self.y_np_shape])
return_values = self.pool.starmap(generator_func, arguments)
X, Y = [], []
for imgs, labels in return_values:
X.append(imgs)
Y.append(labels)
return np.concatenate(X).astype(np.float16), np.concatenate(Y).astype(np.float16)

pytorch loading model not same softmax probabilities

I cannot reproduce the same results after loading a model using pytorch.
I am training a model 'net' and in the same file, after training (kfold) then the model is saved and also tested in 1 specific testing file:
class model(nn.Module):
def __init__(self,size_net):
print('Initialize net with size: ',size_net)
self.T = size_net
# Layer 1
self.conv1 = nn.Conv2d(1, 16, (1,16), padding = 0)
self.batchnorm1 = nn.BatchNorm2d(16, False)
# Layer 2
self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
self.conv2 = nn.Conv2d(1, 4, (2, 32))
self.batchnorm2 = nn.BatchNorm2d(4, False)
self.pooling2 = nn.MaxPool2d(2, 4)
# Layer 3
self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
self.conv3 = nn.Conv2d(4, 4, (8, 4))
self.batchnorm3 = nn.BatchNorm2d(4, False)
self.pooling3 = nn.MaxPool2d((2, 4))
# FC Layer
# NOTE: This dimension will depend on the number of timestamps per sample in your data.
# I have 120 timepoints.
self.fc1 = nn.Linear(int(self.T/2), 2)
def forward(self, x):
# Layer 1
x = F.elu(self.conv1(x))
x = self.batchnorm1(x)
x = F.dropout(x, 0.25)
x = x.permute(0, 3, 1, 2)
#print "layer 1"
# Layer 2
x = self.padding1(x)
x = F.elu(self.conv2(x))
x = self.batchnorm2(x)
x = F.dropout(x, 0.25)
x = self.pooling2(x)
#print "layer 2"
# Layer 3
x = self.padding2(x)
x = F.elu(self.conv3(x))
x = self.batchnorm3(x)
x = F.dropout(x, 0.25)
x = self.pooling3(x)
#print "layer 3"
# FC Layer
#print ('view:',x.shape)
x = x.view(-1, int(self.T/2))
#x = torch.sigmoid(self.fc1(x))
x= torch.softmax(self.fc1(x),1)
#print "layer 4"
return x
#now call the model and train
net = model(SIZE_NET)
....
eval.train_Kfold_validation(n_epochs=25)
## save models state
"""
net = EEGNet(SIZE_NET)
save_path = './eeg_net_{}.pt'.format(date.today().strftime("%Y%m%d"))
torch.save(net.state_dict(), save_path)
'''
TEST
'''
testfile = '1_testonline_1_20190202-163051.csv'
kun_1 = np.genfromtxt( '../'+ testfile, delimiter=',').astype('float32')[:-1, :]
kun_1 = kun_1[:, :SIZE_NET]
X, y = prep.list_2darrays_to_3d([kun_1], -1)
print(X.shape)
array_dstack = np.array(X)
array_dstack_reshaped = np.reshape(array_dstack,(1, 1, SIZE_NET, 16))
inputs = Variable(torch.from_numpy(array_dstack_reshaped))
pred = net(inputs)
print('prob: '+str(pred)) #Converted to probabilities
For example for this file I got: pred=tensor([[0.5912, 0.4088]], grad_fn=)
When instead I load the saved model in a new script and I attempt inference again on the same testfile:
prep= Data_prep()
fileName = '1_testonline_1_20190202-163051.csv'
kun_1 = np.genfromtxt(file_dir+fileName, delimiter=',').astype('float32')[:-1,:]
kun_1 = kun_1[:,:SIZE_NET]
X , y = prep.list_2darrays_to_3d([kun_1],[-1])
# Load pre-trained model
net = model(SIZE_NET)
load_path = file_dir+'/model_colors/model_20190205.pt'
net.load_state_dict(torch.load(load_path))
net.eval()
array_dstack = np.array(X)
print(X.shape)
# (#samples, 1, #timepoints, #channels)
array_dstack_reshaped = np.reshape(array_dstack,(1, 1, SIZE_NET, 16))
inputs = Variable(torch.from_numpy(array_dstack_reshaped))
pred = net(inputs)
print(pred)
When I run the test script the prob values are different and even worse NOT stable: running multiple times give different predictions... Any help appreciated
As #Jatentaki pointed out the solution is to ALWAYS fix the seed in all scripts that need to use the model in pytorch
torch.manual_seed(0)

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

Keras Neural Style Transfer: backend.gradients returns None

I am trying to implement Neural Style Transfer using Keras and trying to keep it as simple as possible. While trying to find gradient using backend.gradients() function of keras, it returns [None]. My code is as follows:
content_image = cv2.imread("C:/Users/Max/Desktop/IMG_20170331_103755.jpg")
content_image = cv2.resize(content_image, (512,512))
style_image = cv2.imread("C:/Users/Max/Desktop/starry.jpg")
style_image = cv2.resize(style_image, (512,512))
content_array = np.asarray(content_image, dtype=np.float32)
content_array = np.expand_dims(content_array, axis=0)
style_array = np.asarray(style_image, dtype=np.float32)
style_array = np.expand_dims(style_array, axis=0)
# Constants:
epochs = 1
height = 512
width = 512
num_channels = 3
step_size = 10
content_layer = ['block2_conv2']
style_layer = ['block1_conv2', 'block2_conv2', 'block3_conv3','block4_conv3', 'block5_conv3']
loss_total = backend.variable(0.0)
# VGG16 Model:
model = VGG16(input_shape = [height, width, num_channels],weights='imagenet', include_top=False)
# Defining losses:
def content_loss(Content, Mixed):
content_loss = backend.mean(backend.square(Mixed - Content))
return content_loss
def gram(layer):
flat = backend.reshape(layer, shape=[1, -1])
gram = backend.dot(flat, backend.transpose(flat))
return gram
def style_loss(Style, Mixed):
S_G = gram(Style)
M_G = gram(Mixed)
size = height*width
return backend.sum(backend.square(S_G - M_G)) / (4. * (num_channels ** 2) * (size ** 2))
'''
def denoise(Image):
loss = backend.mean(backend.abs(Image[:,1:,:,:] - Image[:,:-1,:,:]) + backend.abs(Image[:,:,1:,:] - Image[:,:,:-1,:]))
return loss
'''
# Backend Functions:
output_c = backend.function(inputs = [model.layers[0].input] , outputs = [model.get_layer(content_layer[0]).output])
output_s = backend.function(inputs = [model.layers[0].input] , outputs = [model.get_layer(layer).output for layer in style_layer])
content_output = output_c([content_array])
style_output = output_s([style_array])
# Randomly generated image:
Mixed = np.random.uniform(0, 255, [1, height, width, 3]) - 128
# Loop:
for i in range(epochs):
mixed_c = output_c([Mixed])
mixed_c = mixed_c[0]
loss_c = content_loss(content_output[0], mixed_c)
total = []
mixed_s = output_s([Mixed])
for i in range(len(style_layer)):
style = style_loss(style_output[i], mixed_s[i])
total.append(style)
loss_s = backend.sum(total)
#loss_d = denoise(Mixed)
loss_total = w_c * loss_c + w_s * loss_s #+ w_d * loss_d
gradient = backend.gradients(loss_total, Mixed)
gradient = np.squeeze(gradient)
step_size = step_size / (np.std(gradient) + 1e-8)
Mixed -= gradient * step_size
What changes should i make to get the gradients working properly. I am clueless as to what went wrong.
Thanks!
You're taking gradient of Mixed which is a numpy array and not a variable. You need to define a tensor which will then have value of Mixed.
From Keras documentation:
gradients
keras.backend.gradients(loss, variables)
Returns the gradients of variables w.r.t. loss.
Arguments
loss: Scalar tensor to minimize.
variables: List of variables.

Resources