How to get trainable weights for a manual run of session in Keras? - keras

Because I'm manually running a session, I can't seem to collect the trainable weights of a specific layer.
x = Convolution2D(16, 3, 3, init='he_normal', border_mode='same')(img)
for i in range(0, self.blocks_per_group):
nb_filters = 16 * self.widening_factor
x = residual_block(x, nb_filters=nb_filters, subsample_factor=1)
for i in range(0, self.blocks_per_group):
nb_filters = 32 * self.widening_factor
if i == 0:
subsample_factor = 2
else:
subsample_factor = 1
x = residual_block(x, nb_filters=nb_filters, subsample_factor=subsample_factor)
for i in range(0, self.blocks_per_group):
nb_filters = 64 * self.widening_factor
if i == 0:
subsample_factor = 2
else:
subsample_factor = 1
x = residual_block(x, nb_filters=nb_filters, subsample_factor=subsample_factor)
x = BatchNormalization(axis=3)(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size=(8, 8), strides=None, border_mode='valid')(x)
x = tf.reshape(x, [-1, np.prod(x.get_shape()[1:].as_list())])
# Readout layer
preds = Dense(self.nb_classes, activation='softmax')(x)
loss = tf.reduce_mean(categorical_crossentropy(labels, preds))
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
with sess.as_default():
for i in range(10):
batch = self.next_batch(self.batch_num)
_, l = sess.run([optimizer, loss],
feed_dict={img: batch[0], labels: batch[1]})
print(l)
print(type(weights))
I'm trying to get the weights of the last convolution layer.
I tried get_trainable_weights(layer) and layer.get_weights()but I did not manage to get anywhere.
The error
AttributeError: 'Tensor' object has no attribute 'trainable_weights'

From looking at the source* it seems like your looking for layer.trainable_weights (it's a list not a member function). Please note this returns tensors.
If you want to get their actual values, you need to evaluate them in a session:
weights1, weights2 = sess.run([weight_tensor_1, weight_tensor_2])
*https://github.com/fchollet/keras/blob/master/keras/layers/convolutional.py#L401

Related

ValueError: Exception encountered when calling layer "batch_normalization_4" (type BatchNormalization)

I am trying to design densenet using model-subclass method. In which I created one block of 5 different layers which is repeated (using for loop) as per user's input. problem is after 1st iteration when output is given to first layer of block. it is giving error. Please help me with how to input give to input to the block
Densenet code is here:
class CNN(keras.Model):
def __init__(self,nfilters,sfilters):
super(CNN,self).__init__()
self.num_filters = nfilters[0]
self.dropout_rate = dropout_rate
self.eps = eps
self.num_blocks = num_blocks
#conv1
self.conv1 = tf.keras.layers.Conv2D(self.num_filters, kernel_size=(sfilters[0],sfilters[0]), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(1e-4))
#H_block
self.h_bn = tf.keras.layers.BatchNormalization(epsilon=self.eps)
self.h_act = tf.keras.layers.Activation('relu')
self.h_zp = tf.keras.layers.ZeroPadding2D((1,1))
self.h_do = tf.keras.layers.Dropout(rate=self.dropout_rate)
self.concat = tf.keras.layers.Concatenate()
#trans_block
self.compression_factor = compress_factor
# compression_factor is the 'θ'
self.tran_bn = tf.keras.layers.BatchNormalization( epsilon=eps )
self.tran_act = tf.keras.layers.Activation('relu')
#self.num_feature_maps = input.shape[1]
#The value of 'm'
self.tran_do = tf.keras.layers.Dropout(rate=self.dropout_rate)
self.tran_avgp = tf.keras.layers.AveragePooling2D(pool_size=(3,3))
self.num_layers = num_layers_per_block
self.growth_rate = growth_rate
self.globalaverage = keras.layers.GlobalAveragePooling2D()
self.dense = keras.layers.Dense(37) # Num Classes for CIFAR-10
self.activation = keras.layers.Activation( 'softmax' )
def call(self, inputs, training=False):
inputs = tf.keras.layers.Input(shape=input_shape)
x = self.conv1(inputs)
for i in range( self.num_blocks ):
#x, num_filters = dense_block( x, num_layers_per_block , num_filters, growth_rate , dropout_rate )
for i in range(self.num_layers): # num_layers is the value of 'l'
#H_block
x = self.h_bn(x)
x = self.h_act(x)
x = self.h_zp(x)
h_conv2d = tf.keras.layers.Conv2D(self.num_filters, kernel_size=(sfilters[0], sfilters[0]), use_bias=False , kernel_initializer='he_normal')
x = h_conv2d(x)
#x = self.h_conv2d(x)
x = self.h_do(x)
#inputs = tf.keras.layers.Concatenate()([conv_outputs, inputs])
x = tf.concat([x,self.conv1(inputs)],-1)
#x = self.concat(x,inputs)
#transititon
# compression_factor is the 'θ'
x = self.tran_bn(x)
x = self.tran_act(x)
#num_feature_maps = input.shape[1]
self.num_feature_maps = x.shape[1] # The value of 'm'
tran_conv2d = tf.keras.layers.Conv2D(np.floor(self.compression_factor*self.num_feature_maps).astype(np.int),kernel_size=(1,1), use_bias=False, padding='same', kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(1e-4))
x = tran_conv2d(x)
x = self.tran_do(x)
x = self.tran_avgp(x)
#num_filters += growth_rate # To increase the number of filters for each layer.
self.num_filters += self.growth_rate # To increase the number of filters for each layer.
x = self.globalaverage(x)
x = self.dense(x) # Num Classes for CIFAR-10
x = self.activation(x)
Error is here:
ValueError: Exception encountered when calling layer "cnn_3" (type CNN).
in user code:
File "<ipython-input-6-86c881729324>", line 40, in call *
x = self.h_bn(x)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler **
raise e.with_traceback(filtered_tb) from None
ValueError: Exception encountered when calling layer "batch_normalization_6" (type BatchNormalization).
Dimensions must be equal, but are 16 and 8 for '{{node batch_normalization_6/FusedBatchNormV3}} = FusedBatchNormV3[T=DT_FLOAT, U=DT_FLOAT, data_format="NHWC", epsilon=1.1e-05, exponential_avg_factor=0.01, is_training=true](Placeholder, batch_normalization_6/ReadVariableOp, batch_normalization_6/ReadVariableOp_1, batch_normalization_6/FusedBatchNormV3/ReadVariableOp, batch_normalization_6/FusedBatchNormV3/ReadVariableOp_1)' with input shapes: [?,126,126,16], [8], [8], [8], [8].
Call arguments received:
• inputs=tf.Tensor(shape=(None, 126, 126, 16), dtype=float32)
• training=True
Call arguments received:
• inputs=tf.Tensor(shape=(None, 128, 128, 1), dtype=float32)
• training=True

PyTorch: GRU, one-to-many / many-to-one

I would like to implement a GRU able to encode a sequence of vectors to one vector (many-to-one), and then another GRU able to decode a vector to a sequence of vector (one-to-many). The size of the vectors wouldn't be changed. I would like to have an opinion about what I implemented.
Here is the code:
class AEGRU(nn.Module):
def __init__(self, opt):
super(AEGRU, self).__init__()
self.length = 256
self.latent_space = 256
self.num_layers = 1
self.GRU_enc = nn.GRU(input_size=3, hidden_size=self.latent_space, num_layers=self.num_layers, batch_first=True)
self.fc_enc = nn.Linear(self.latent_space, self.latent_space)
self.GRU_dec = nn.GRU(input_size=self.latent_space, hidden_size=3, num_layers=self.num_layers, batch_first=True)
self.fc_dec = nn.Linear(3, 3)
def enc(self, x):
# x has shape: Batch_size x self.length x 3
h0 = torch.zeros(self.num_layers, x.shape[0], self.latent_space).cuda()
out, _ = self.GRU_enc(x, h0)
out = out[:, -1, :]
out = self.fc_enc(out)
return out
def dec(self, x):
# x has shape: Batch_size x self.latent_space
x = x[:, None, :]
h = torch.zeros(self.num_layers, x.shape[0], 3).cuda()
# method 1 ??
'''outputs = torch.zeros(x.shape[0], self.length, 3).cuda()
for i in range(self.length):
out, h = self.GRU_dec(x, h)
outputs[:, i, :] = out[:, 0, :]'''
# method 2 ??
x = x.repeat(1, self.length, 1)
outputs, _ = self.GRU_dec(x, h)
# linear layer
outputs = self.fc_dec(outputs)
return outputs
def forward(self, x):
self.indices = []
latent = self.enc(x)
output = self.dec(latent)
return output
I am not sure whether this is the good way to do a one-to-many GRU. Could I have some opinions about this?
Thanks for reading!

Must the input height of a 1D CNN be constant?

I'm currently doing my honours research project on online/dynamic signature verification. I am using the SVC 2004 dataset (Task 2). I have done the following data processing:
def load_dataset_normalized(path):
file_names = os.listdir(path)
num_of_persons = len(file_names)
initial_starting_point = np.zeros(np.shape([7]))
x_dataset = []
y_dataset = []
for infile in file_names:
full_file_name = os.path.join(path, infile)
file = open(full_file_name, "r")
file_lines = file.readlines()
num_of_points = int(file_lines[0])
x = []
y = []
time_stamp = []
button_status = []
azimuth_angles = []
altitude = []
pressure = []
for idx, line in enumerate(file_lines[1:]):
idx+=1
nums = line.split(' ')
if idx == 1:
nums[2] = 0
initial_starting_point = nums
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(0)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
else:
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(10)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
max_x = max(x)
max_y = max(y)
max_azimuth_angle = max(azimuth_angles)
max_altitude = max(altitude)
max_pressure = max(pressure)
min_x = min(x)
min_y = min(y)
min_azimuth_angle = min(azimuth_angles)
min_altitude = min(altitude)
min_pressure = min(pressure)
#Alignment normalization:
for i in range(num_of_points):
x[i] -= int(initial_starting_point[0])
y[i] -= int(initial_starting_point[1])
azimuth_angles[i] -= int(initial_starting_point[4])
altitude[i] -= int(initial_starting_point[5])
pressure[i] -= int(initial_starting_point[6])
#Size normalization
for i in range(num_of_points):
x[i] = ((x[i] - max_x) / (min_x - max_x))
y[i] = ((y[i] - max_y) / (min_y - max_y))
azimuth_angles[i] = ((azimuth_angles[i] - max_azimuth_angle) / (min_azimuth_angle - max_azimuth_angle))
altitude[i] = ((altitude[i] - max_altitude) / (min_altitude - max_altitude))
pressure[i] = ((pressure[i] - max_pressure) / (min_pressure - max_pressure))
#data points to dataset
x_line = []
for i in range (num_of_points):
x_line.append([x[i], y[i], time_stamp[i], button_status[i], azimuth_angles[i], altitude[i], pressure[i]])
if i == num_of_points-1:
x_dataset.append(x_line)
infile_without_extension = infile.replace('.TXT','')
index_of_s = infile_without_extension.find("S")
index_of_num = index_of_s + 1
sig_ID = int(infile_without_extension[index_of_num:])
if sig_ID < 21:
y_dataset.append([1,0])
else:
y_dataset.append([0,1])
x_dataset = np.asarray(x_dataset)
y_dataset = np.asarray(y_dataset)
return x_dataset, y_dataset
I also have another method that takes the values as they are in the text file and created an "original" dataset.
Now, the aim of my research is to create a CRNN (convolutional recurrent neural network) that can identify if a signature is authentic or forged. Here is the code for the model:
class crnn_model:
def __init__(self, trainX, trainy, testX, testy, optimizer_method):
self.trainX = trainX
self.trainy = trainy
self.testX = testX
self.testy = testy
self.evaluate_model(optimizer_method)
def evaluate_model(self, optimizer_method):
verbose, epochs, batch_size = 0, 40, 10
n_timesteps, n_features, n_outputs = len(self.trainX), 7, 2
print(n_timesteps)
model = keras.Sequential()
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features), use_bias=True))
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.MaxPooling1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.LSTM(2, input_shape=[30592,1], return_sequences=True))
model.summary()
# Compile the model
model.compile(optimizer=optimizer_method, loss='categorical_crossentropy', metrics=['accuracy'])
#fit model
model.fit(self.trainX, self.trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(self.testX, self.testy, batch_size=batch_size, verbose=0)
return accuracy
Here is the problem I am having: the number of points used to store each signature is different, hence making the input height of the input matrix vary from one signature to the next. Must I now force the dataset to some uniform/constant number of points?
Much appreciated for your time.

pytorch loading model not same softmax probabilities

I cannot reproduce the same results after loading a model using pytorch.
I am training a model 'net' and in the same file, after training (kfold) then the model is saved and also tested in 1 specific testing file:
class model(nn.Module):
def __init__(self,size_net):
print('Initialize net with size: ',size_net)
self.T = size_net
# Layer 1
self.conv1 = nn.Conv2d(1, 16, (1,16), padding = 0)
self.batchnorm1 = nn.BatchNorm2d(16, False)
# Layer 2
self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
self.conv2 = nn.Conv2d(1, 4, (2, 32))
self.batchnorm2 = nn.BatchNorm2d(4, False)
self.pooling2 = nn.MaxPool2d(2, 4)
# Layer 3
self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
self.conv3 = nn.Conv2d(4, 4, (8, 4))
self.batchnorm3 = nn.BatchNorm2d(4, False)
self.pooling3 = nn.MaxPool2d((2, 4))
# FC Layer
# NOTE: This dimension will depend on the number of timestamps per sample in your data.
# I have 120 timepoints.
self.fc1 = nn.Linear(int(self.T/2), 2)
def forward(self, x):
# Layer 1
x = F.elu(self.conv1(x))
x = self.batchnorm1(x)
x = F.dropout(x, 0.25)
x = x.permute(0, 3, 1, 2)
#print "layer 1"
# Layer 2
x = self.padding1(x)
x = F.elu(self.conv2(x))
x = self.batchnorm2(x)
x = F.dropout(x, 0.25)
x = self.pooling2(x)
#print "layer 2"
# Layer 3
x = self.padding2(x)
x = F.elu(self.conv3(x))
x = self.batchnorm3(x)
x = F.dropout(x, 0.25)
x = self.pooling3(x)
#print "layer 3"
# FC Layer
#print ('view:',x.shape)
x = x.view(-1, int(self.T/2))
#x = torch.sigmoid(self.fc1(x))
x= torch.softmax(self.fc1(x),1)
#print "layer 4"
return x
#now call the model and train
net = model(SIZE_NET)
....
eval.train_Kfold_validation(n_epochs=25)
## save models state
"""
net = EEGNet(SIZE_NET)
save_path = './eeg_net_{}.pt'.format(date.today().strftime("%Y%m%d"))
torch.save(net.state_dict(), save_path)
'''
TEST
'''
testfile = '1_testonline_1_20190202-163051.csv'
kun_1 = np.genfromtxt( '../'+ testfile, delimiter=',').astype('float32')[:-1, :]
kun_1 = kun_1[:, :SIZE_NET]
X, y = prep.list_2darrays_to_3d([kun_1], -1)
print(X.shape)
array_dstack = np.array(X)
array_dstack_reshaped = np.reshape(array_dstack,(1, 1, SIZE_NET, 16))
inputs = Variable(torch.from_numpy(array_dstack_reshaped))
pred = net(inputs)
print('prob: '+str(pred)) #Converted to probabilities
For example for this file I got: pred=tensor([[0.5912, 0.4088]], grad_fn=)
When instead I load the saved model in a new script and I attempt inference again on the same testfile:
prep= Data_prep()
fileName = '1_testonline_1_20190202-163051.csv'
kun_1 = np.genfromtxt(file_dir+fileName, delimiter=',').astype('float32')[:-1,:]
kun_1 = kun_1[:,:SIZE_NET]
X , y = prep.list_2darrays_to_3d([kun_1],[-1])
# Load pre-trained model
net = model(SIZE_NET)
load_path = file_dir+'/model_colors/model_20190205.pt'
net.load_state_dict(torch.load(load_path))
net.eval()
array_dstack = np.array(X)
print(X.shape)
# (#samples, 1, #timepoints, #channels)
array_dstack_reshaped = np.reshape(array_dstack,(1, 1, SIZE_NET, 16))
inputs = Variable(torch.from_numpy(array_dstack_reshaped))
pred = net(inputs)
print(pred)
When I run the test script the prob values are different and even worse NOT stable: running multiple times give different predictions... Any help appreciated
As #Jatentaki pointed out the solution is to ALWAYS fix the seed in all scripts that need to use the model in pytorch
torch.manual_seed(0)

TensorFlow, losses after training the model are different than losses printed during the last Epoch of Stochastic Gradient Descent.

I'm trying to do binary classification on two spirals. For testing, I am feeding my neural network the exact spiral data with no noise, and the model seems to work as the losses near 0 during SGD. However, after using my model to infer the exact same data points after SGD has completed, I get completely different losses than what was printed during the last epoch of SGD.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.nan)
# get the spiral points
t_p = np.linspace(0, 4, 1000)
x1_p = t_p * np.cos(t_p*2*np.pi)
y1_p = t_p * np.sin(t_p*2*np.pi)
x2_p = t_p * np.cos(t_p*2*np.pi + np.pi)
y2_p = t_p * np.sin(t_p*2*np.pi + np.pi)
plt.plot(x1_p, y1_p, x2_p, y2_p)
# generate data points
x1_dat = x1_p
y1_dat = y1_p
x2_dat = x2_p
y2_dat = y2_p
def model_variable(shape, name, initializer):
variable = tf.get_variable(name=name,
dtype=tf.float32,
shape=shape,
initializer=initializer
)
tf.add_to_collection('model_variables', variable)
return variable
class Model():
#layer specifications includes bias nodes
def __init__(self, sess, data, nEpochs, learning_rate, layer_specifications):
self.sess = sess
self.data = data
self.nEpochs = nEpochs
self.learning_rate = learning_rate
if layer_specifications[0] != 2 or layer_specifications[-1] != 1:
raise ValueError('First layer only two nodes, last layer only 1 node')
else:
self.layer_specifications = layer_specifications
self.build_model()
def build_model(self):
# x is the two nodes that will be layer one, will input an x, y coordinate
# and need to classify which spiral is it on, the non phase shifted or the phase
# shifted one.
# y is the output of the model
self.x = tf.placeholder(tf.float32, shape=[2, 1])
self.y = tf.placeholder(tf.float32, shape=[])
self.thetas = []
self.biases = []
for i in range(1, len(self.layer_specifications)):
self.thetas.append(model_variable([self.layer_specifications[i], self.layer_specifications[i-1]], 'theta'+str(i), tf.random_normal_initializer(stddev=0.1)))
self.biases.append(model_variable([self.layer_specifications[i], 1], 'bias'+str(i), tf.constant_initializer()))
#forward propagation
intermediate = self.x
for i in range(0, len(self.layer_specifications)-1):
if i != (len(self.layer_specifications) - 2):
intermediate = tf.nn.elu(tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i]))
else:
intermediate = tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i])
self.yhat = tf.squeeze(intermediate)
self.loss = tf.nn.sigmoid_cross_entropy_with_logits(self.yhat, self.y);
def train_init(self):
model_variables = tf.get_collection('model_variables')
self.optim = (
tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
.minimize(self.loss, var_list=model_variables)
)
self.check = tf.add_check_numerics_ops()
self.sess.run(tf.initialize_all_variables())
# here is where x and y combine to get just x in tf with shape [2, 1] and where label becomes y in tf
def train_iter(self, x, y):
loss, _, _ = sess.run([self.loss, self.optim, self.check],
feed_dict = {self.x: x, self.y: y})
print('loss: {0} on:{1}'.format(loss, x))
# here x and y are still x and y coordinates, label is separate
def train(self):
for _ in range(self.nEpochs):
for x, y, label in self.data():
print(label)
self.train_iter([[x], [y]], label)
print("NEW ONE:\n")
# here x and y are still x and y coordinates, label is separate
def infer(self, x, y, label):
return self.sess.run((tf.sigmoid(self.yhat), self.loss), feed_dict={self.x : [[x], [y]], self.y : label})
def data():
#so first spiral is label 0, second is label 1
for _ in range(len(x1_dat)-1, -1, -1):
for dat in range(2):
if dat == 0:
yield x1_dat[_], y1_dat[_], 0
else:
yield x2_dat[_], y2_dat[_], 1
layer_specifications = [2, 100, 100, 100, 1]
sess = tf.Session()
model = Model(sess, data, nEpochs=10, learning_rate=1.1e-2, layer_specifications=layer_specifications)
model.train_init()
model.train()
inferrences_1 = []
inferrences_2 = []
losses = 0
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x1_p[i], y1_p[i], 0)
if infer >= 0.5:
print('loss: {0} on point {1}, {2}'.format(loss, x1_p[i], y1_p[i]))
losses = losses + 1
inferrences_1.append('r')
else:
inferrences_1.append('g')
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x2_p[i], y2_p[i], 1)
if infer >= 0.5:
inferrences_2.append('r')
else:
print('loss: {0} on point {1}, {2}'.format(loss, x2_p[i], y2_p[i]))
losses = losses + 1
inferrences_2.append('g')
print('total losses: {}'.format(losses))
plt.scatter(x1_p, y1_p, c=inferrences_1)
plt.scatter(x2_p, y2_p, c=inferrences_2)
plt.show()

Resources