pytorch loading model not same softmax probabilities - python-3.x

I cannot reproduce the same results after loading a model using pytorch.
I am training a model 'net' and in the same file, after training (kfold) then the model is saved and also tested in 1 specific testing file:
class model(nn.Module):
def __init__(self,size_net):
print('Initialize net with size: ',size_net)
self.T = size_net
# Layer 1
self.conv1 = nn.Conv2d(1, 16, (1,16), padding = 0)
self.batchnorm1 = nn.BatchNorm2d(16, False)
# Layer 2
self.padding1 = nn.ZeroPad2d((16, 17, 0, 1))
self.conv2 = nn.Conv2d(1, 4, (2, 32))
self.batchnorm2 = nn.BatchNorm2d(4, False)
self.pooling2 = nn.MaxPool2d(2, 4)
# Layer 3
self.padding2 = nn.ZeroPad2d((2, 1, 4, 3))
self.conv3 = nn.Conv2d(4, 4, (8, 4))
self.batchnorm3 = nn.BatchNorm2d(4, False)
self.pooling3 = nn.MaxPool2d((2, 4))
# FC Layer
# NOTE: This dimension will depend on the number of timestamps per sample in your data.
# I have 120 timepoints.
self.fc1 = nn.Linear(int(self.T/2), 2)
def forward(self, x):
# Layer 1
x = F.elu(self.conv1(x))
x = self.batchnorm1(x)
x = F.dropout(x, 0.25)
x = x.permute(0, 3, 1, 2)
#print "layer 1"
# Layer 2
x = self.padding1(x)
x = F.elu(self.conv2(x))
x = self.batchnorm2(x)
x = F.dropout(x, 0.25)
x = self.pooling2(x)
#print "layer 2"
# Layer 3
x = self.padding2(x)
x = F.elu(self.conv3(x))
x = self.batchnorm3(x)
x = F.dropout(x, 0.25)
x = self.pooling3(x)
#print "layer 3"
# FC Layer
#print ('view:',x.shape)
x = x.view(-1, int(self.T/2))
#x = torch.sigmoid(self.fc1(x))
x= torch.softmax(self.fc1(x),1)
#print "layer 4"
return x
#now call the model and train
net = model(SIZE_NET)
....
eval.train_Kfold_validation(n_epochs=25)
## save models state
"""
net = EEGNet(SIZE_NET)
save_path = './eeg_net_{}.pt'.format(date.today().strftime("%Y%m%d"))
torch.save(net.state_dict(), save_path)
'''
TEST
'''
testfile = '1_testonline_1_20190202-163051.csv'
kun_1 = np.genfromtxt( '../'+ testfile, delimiter=',').astype('float32')[:-1, :]
kun_1 = kun_1[:, :SIZE_NET]
X, y = prep.list_2darrays_to_3d([kun_1], -1)
print(X.shape)
array_dstack = np.array(X)
array_dstack_reshaped = np.reshape(array_dstack,(1, 1, SIZE_NET, 16))
inputs = Variable(torch.from_numpy(array_dstack_reshaped))
pred = net(inputs)
print('prob: '+str(pred)) #Converted to probabilities
For example for this file I got: pred=tensor([[0.5912, 0.4088]], grad_fn=)
When instead I load the saved model in a new script and I attempt inference again on the same testfile:
prep= Data_prep()
fileName = '1_testonline_1_20190202-163051.csv'
kun_1 = np.genfromtxt(file_dir+fileName, delimiter=',').astype('float32')[:-1,:]
kun_1 = kun_1[:,:SIZE_NET]
X , y = prep.list_2darrays_to_3d([kun_1],[-1])
# Load pre-trained model
net = model(SIZE_NET)
load_path = file_dir+'/model_colors/model_20190205.pt'
net.load_state_dict(torch.load(load_path))
net.eval()
array_dstack = np.array(X)
print(X.shape)
# (#samples, 1, #timepoints, #channels)
array_dstack_reshaped = np.reshape(array_dstack,(1, 1, SIZE_NET, 16))
inputs = Variable(torch.from_numpy(array_dstack_reshaped))
pred = net(inputs)
print(pred)
When I run the test script the prob values are different and even worse NOT stable: running multiple times give different predictions... Any help appreciated

As #Jatentaki pointed out the solution is to ALWAYS fix the seed in all scripts that need to use the model in pytorch
torch.manual_seed(0)

Related

ValueError: Exception encountered when calling layer "batch_normalization_4" (type BatchNormalization)

I am trying to design densenet using model-subclass method. In which I created one block of 5 different layers which is repeated (using for loop) as per user's input. problem is after 1st iteration when output is given to first layer of block. it is giving error. Please help me with how to input give to input to the block
Densenet code is here:
class CNN(keras.Model):
def __init__(self,nfilters,sfilters):
super(CNN,self).__init__()
self.num_filters = nfilters[0]
self.dropout_rate = dropout_rate
self.eps = eps
self.num_blocks = num_blocks
#conv1
self.conv1 = tf.keras.layers.Conv2D(self.num_filters, kernel_size=(sfilters[0],sfilters[0]), use_bias=False, kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(1e-4))
#H_block
self.h_bn = tf.keras.layers.BatchNormalization(epsilon=self.eps)
self.h_act = tf.keras.layers.Activation('relu')
self.h_zp = tf.keras.layers.ZeroPadding2D((1,1))
self.h_do = tf.keras.layers.Dropout(rate=self.dropout_rate)
self.concat = tf.keras.layers.Concatenate()
#trans_block
self.compression_factor = compress_factor
# compression_factor is the 'θ'
self.tran_bn = tf.keras.layers.BatchNormalization( epsilon=eps )
self.tran_act = tf.keras.layers.Activation('relu')
#self.num_feature_maps = input.shape[1]
#The value of 'm'
self.tran_do = tf.keras.layers.Dropout(rate=self.dropout_rate)
self.tran_avgp = tf.keras.layers.AveragePooling2D(pool_size=(3,3))
self.num_layers = num_layers_per_block
self.growth_rate = growth_rate
self.globalaverage = keras.layers.GlobalAveragePooling2D()
self.dense = keras.layers.Dense(37) # Num Classes for CIFAR-10
self.activation = keras.layers.Activation( 'softmax' )
def call(self, inputs, training=False):
inputs = tf.keras.layers.Input(shape=input_shape)
x = self.conv1(inputs)
for i in range( self.num_blocks ):
#x, num_filters = dense_block( x, num_layers_per_block , num_filters, growth_rate , dropout_rate )
for i in range(self.num_layers): # num_layers is the value of 'l'
#H_block
x = self.h_bn(x)
x = self.h_act(x)
x = self.h_zp(x)
h_conv2d = tf.keras.layers.Conv2D(self.num_filters, kernel_size=(sfilters[0], sfilters[0]), use_bias=False , kernel_initializer='he_normal')
x = h_conv2d(x)
#x = self.h_conv2d(x)
x = self.h_do(x)
#inputs = tf.keras.layers.Concatenate()([conv_outputs, inputs])
x = tf.concat([x,self.conv1(inputs)],-1)
#x = self.concat(x,inputs)
#transititon
# compression_factor is the 'θ'
x = self.tran_bn(x)
x = self.tran_act(x)
#num_feature_maps = input.shape[1]
self.num_feature_maps = x.shape[1] # The value of 'm'
tran_conv2d = tf.keras.layers.Conv2D(np.floor(self.compression_factor*self.num_feature_maps).astype(np.int),kernel_size=(1,1), use_bias=False, padding='same', kernel_initializer='he_normal', kernel_regularizer=tf.keras.regularizers.l2(1e-4))
x = tran_conv2d(x)
x = self.tran_do(x)
x = self.tran_avgp(x)
#num_filters += growth_rate # To increase the number of filters for each layer.
self.num_filters += self.growth_rate # To increase the number of filters for each layer.
x = self.globalaverage(x)
x = self.dense(x) # Num Classes for CIFAR-10
x = self.activation(x)
Error is here:
ValueError: Exception encountered when calling layer "cnn_3" (type CNN).
in user code:
File "<ipython-input-6-86c881729324>", line 40, in call *
x = self.h_bn(x)
File "/usr/local/lib/python3.7/dist-packages/keras/utils/traceback_utils.py", line 67, in error_handler **
raise e.with_traceback(filtered_tb) from None
ValueError: Exception encountered when calling layer "batch_normalization_6" (type BatchNormalization).
Dimensions must be equal, but are 16 and 8 for '{{node batch_normalization_6/FusedBatchNormV3}} = FusedBatchNormV3[T=DT_FLOAT, U=DT_FLOAT, data_format="NHWC", epsilon=1.1e-05, exponential_avg_factor=0.01, is_training=true](Placeholder, batch_normalization_6/ReadVariableOp, batch_normalization_6/ReadVariableOp_1, batch_normalization_6/FusedBatchNormV3/ReadVariableOp, batch_normalization_6/FusedBatchNormV3/ReadVariableOp_1)' with input shapes: [?,126,126,16], [8], [8], [8], [8].
Call arguments received:
• inputs=tf.Tensor(shape=(None, 126, 126, 16), dtype=float32)
• training=True
Call arguments received:
• inputs=tf.Tensor(shape=(None, 128, 128, 1), dtype=float32)
• training=True

Neural Network initialized with random weights always returns the same output with random inputs

I have a problem with pytorch in Spyder. A randomly initialized Neural Network returns always the same output also for random input tensor. I am currently using local GPU with Spyder. I made sure that the initialization of the weights is random and not all zeros.
Example:
x = torch.rand(1, 3, 360, 640)
x = self.stage_1(x)
x = self.stage_2(x)
x = self.stage_3(x)
x = self.stage_4(x)
x = self.stage_5(x)
x = self.stage_6(x)
x = torch.flatten(x, start_dim=1)
y = torch.rand(1, 3, 360, 640)
y = self.stage_1(y)
y = self.stage_2(y)
y = self.stage_3(y)
y = self.stage_4(y)
y = self.stage_5(y)
y = self.stage_6(y)
y = torch.flatten(y, start_dim=1)
This code returns always y == x
This is the stage class:
class VggStage(nn.Module):
def __init__(self,
input_channels: int,
output_channels: int) -> None:
"""
Parameters
----------
input_channels : int
DESCRIPTION.
output_channels : int
DESCRIPTION.
Returns
-------
None
DESCRIPTION.
"""
super().__init__()
self.conv1 = nn.Conv2d(in_channels=input_channels,
out_channels=output_channels,
kernel_size=(3, 3))
self.conv2 = nn.Conv2d(in_channels=output_channels,
out_channels=output_channels,
kernel_size=(3, 3))
self.max_pool = nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
def forward(self,
x: torch.Tensor) -> torch.Tensor:
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.max_pool(x)
return x

PyTorch to Keras model

I'm trying to replicate a model, but I'm having difficulties doing so with Keras. Here is my current implementation:
filters = 256
kernel_size = 3
strides = 1
# Head module
input = Input(shape=(img_height//scale_fact, img_width//scale_fact, img_depth))
conv0 = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(input)
# Body module
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(conv0)
act = ReLU()(res)
res = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([conv0, res])
for i in range(res_blocks):
res1 = Conv2D(filters, kernel_size, strides=strides, padding='same')(res_rec)
act = ReLU()(res1)
res2 = Conv2D(filters, kernel_size, strides=strides, padding='same')(act)
res_rec = Add()([res_rec, res2])
conv = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(res_rec)
add = Add()([conv0, conv])
# Tail module
conv = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(add)
act = ReLU()(conv)
up = UpSampling2D(size=scale_fact if scale_fact != 4 else 2)(act) # TODO: try "Conv2DTranspose"
# mul = Multiply([np.zeros((img_width,img_height,img_depth)).fill(0.1), up])(up)
# When it's a 4X factor, we want the upscale split in two procedures
if(scale_fact == 4):
conv = Conv2D(filters, kernel_size, strides=strides, padding='same',
kernel_regularizer=regularizers.l2(0.01))(up)
act = ReLU()(conv)
up = UpSampling2D(size=2)(act) # TODO: try "Conv2DTranspose"
output = Conv2D(filters=3,
kernel_size=1,
strides=1,
padding='same',
kernel_regularizer=regularizers.l2(0.01))(up)
model = Model(inputs=input, outputs=output)
Here is a link to the file I'm trying to replicate. How am I supposed to replicate this custom PyTorch UpSampler that implements a customized PixelShuffling method ?
Here is the relevant part of the UpSampler that I'm having trouble with, for the most part:
import tensorflow as tf
import tensorflow.contrib.slim as slim
"""
Method to upscale an image using
conv2d transpose. Based on upscaling
method defined in the paper
x: input to be upscaled
scale: scale increase of upsample
features: number of features to compute
activation: activation function
"""
def upsample(x,scale=2,features=64,activation=tf.nn.relu):
assert scale in [2,3,4]
x = slim.conv2d(x,features,[3,3],activation_fn=activation)
if scale == 2:
ps_features = 3*(scale**2)
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,6,stride=1,activation_fn=activation)
x = PS(x,2,color=True)
elif scale == 3:
ps_features =3*(scale**2)
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,9,stride=1,activation_fn=activation)
x = PS(x,3,color=True)
elif scale == 4:
ps_features = 3*(2**2)
for i in range(2):
x = slim.conv2d(x,ps_features,[3,3],activation_fn=activation)
#x = slim.conv2d_transpose(x,ps_features,6,stride=1,activation_fn=activation)
x = PS(x,2,color=True)
return x
"""
Borrowed from https://github.com/tetrachrome/subpixel
Used for subpixel phase shifting after deconv operations
"""
def _phase_shift(I, r):
bsize, a, b, c = I.get_shape().as_list()
bsize = tf.shape(I)[0] # Handling Dimension(None) type for undefined batch dim
X = tf.reshape(I, (bsize, a, b, r, r))
X = tf.transpose(X, (0, 1, 2, 4, 3)) # bsize, a, b, 1, 1
X = tf.split(X, a, 1) # a, [bsize, b, r, r]
X = tf.concat([tf.squeeze(x, axis=1) for x in X],2) # bsize, b, a*r, r
X = tf.split(X, b, 1) # b, [bsize, a*r, r]
X = tf.concat([tf.squeeze(x, axis=1) for x in X],2) # bsize, a*r, b*r
return tf.reshape(X, (bsize, a*r, b*r, 1))
"""
Borrowed from https://github.com/tetrachrome/subpixel
Used for subpixel phase shifting after deconv operations
"""
def PS(X, r, color=False):
if color:
Xc = tf.split(X, 3, 3)
X = tf.concat([_phase_shift(x, r) for x in Xc],3)
else:
X = _phase_shift(X, r)
return X

How to get trainable weights for a manual run of session in Keras?

Because I'm manually running a session, I can't seem to collect the trainable weights of a specific layer.
x = Convolution2D(16, 3, 3, init='he_normal', border_mode='same')(img)
for i in range(0, self.blocks_per_group):
nb_filters = 16 * self.widening_factor
x = residual_block(x, nb_filters=nb_filters, subsample_factor=1)
for i in range(0, self.blocks_per_group):
nb_filters = 32 * self.widening_factor
if i == 0:
subsample_factor = 2
else:
subsample_factor = 1
x = residual_block(x, nb_filters=nb_filters, subsample_factor=subsample_factor)
for i in range(0, self.blocks_per_group):
nb_filters = 64 * self.widening_factor
if i == 0:
subsample_factor = 2
else:
subsample_factor = 1
x = residual_block(x, nb_filters=nb_filters, subsample_factor=subsample_factor)
x = BatchNormalization(axis=3)(x)
x = Activation('relu')(x)
x = AveragePooling2D(pool_size=(8, 8), strides=None, border_mode='valid')(x)
x = tf.reshape(x, [-1, np.prod(x.get_shape()[1:].as_list())])
# Readout layer
preds = Dense(self.nb_classes, activation='softmax')(x)
loss = tf.reduce_mean(categorical_crossentropy(labels, preds))
optimizer = tf.train.GradientDescentOptimizer(0.5).minimize(loss)
with sess.as_default():
for i in range(10):
batch = self.next_batch(self.batch_num)
_, l = sess.run([optimizer, loss],
feed_dict={img: batch[0], labels: batch[1]})
print(l)
print(type(weights))
I'm trying to get the weights of the last convolution layer.
I tried get_trainable_weights(layer) and layer.get_weights()but I did not manage to get anywhere.
The error
AttributeError: 'Tensor' object has no attribute 'trainable_weights'
From looking at the source* it seems like your looking for layer.trainable_weights (it's a list not a member function). Please note this returns tensors.
If you want to get their actual values, you need to evaluate them in a session:
weights1, weights2 = sess.run([weight_tensor_1, weight_tensor_2])
*https://github.com/fchollet/keras/blob/master/keras/layers/convolutional.py#L401

How to create layer0 input for input images with 3 channels

Hi I am following the http://deeplearning.net/tutorial/code/convolutional_mlp.py code to implement a conv neural net. I have input images where the channel is important and hence I want to have 3 channel feature map as layer 0 input.
So I need something like this
layer0_input = x.reshape((batch_size, 3, 240, 135)) # width 240, height 135, 3 channels
instead of
layer0_input = x.reshape((batch_size, 1, 28, 28)) # 28*28 normalized MNIST gray scale images
which will be used here
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 240, 135),
filter_shape=(nkerns[0], 1, 5, 5),
poolsize=(2, 2)
)
where that x is provided to theano as
train_model = theano.function(
[index],
cost,
updates=updates,
givens={
x: train_set_x[index * batch_size: (index + 1) * batch_size],
y: train_set_y[index * batch_size: (index + 1) * batch_size]
}
)
So - my question is - how should I create (shape) that train_set_x ?
With (gray scale intensity - i.e single channel) train_set_x is created as
shared_x = theano.shared(numpy.asarray(data_x,
dtype=theano.config.floatX),
where data_x is a flattened numpy array of length 784 (for 28*28 pixels)
Thanks a lot for advice
I was able to get it working. I am pasting some code here which might help some one. Not very elegant - but works.
def shuffle_in_unison(a, b):
#courtsey http://stackoverflow.com/users/190280/josh-bleecher-snyder
assert len(a) == len(b)
shuffled_a = np.empty(a.shape, dtype=a.dtype)
shuffled_b = np.empty(b.shape, dtype=b.dtype)
permutation = np.random.permutation(len(a))
for old_index, new_index in enumerate(permutation):
shuffled_a[new_index] = a[old_index]
shuffled_b[new_index] = b[old_index]
return shuffled_a, shuffled_b
def createDataSet(imagefolder):
os.chdir(imagefolder)
# total number of files
number_of_files = len([item for item in os.listdir('.') if os.path.isfile(os.path.join('.', item))])
# get a shuffled list : I needed this because my image names were of the format n_x_<some details>.jpg
# where n was my target and x was a number from 0 to m-1 where m was the number of samples
# of the target value n. So I needed so shuffle and iterate while putting images in train
# test and validate arrays
image_index_array = range(0,number_of_files)
random.seed(12)
random.shuffle(image_index_array)
# split 80/10/10 - train/test/val
trainsize = int(number_of_files*.8)
testsize = int(number_of_files*.1)
valsize = number_of_files - trainsize - testsize
# create the random value arrays of train/test/val by slicing the total image index array
train_index_array = image_index_array[0:trainsize]
test_index_array = image_index_array[trainsize:trainsize+testsize]
validate_index_array = image_index_array[trainsize+testsize:]
# initialize the data structures
dataset = {'train':[[],[]],'test':[[],[]],'validate':[[],[]]}
i_counter = 0
train_X = []
train_y = []
test_X = []
test_y = []
val_X = []
val_y = []
for item in os.listdir('.'):
if not os.path.isfile(os.path.join('.', item)):
continue
if item.endswith('.pkl'):
continue
print 'Processing item ' + item
item_y = item.split('_')[0]
item_x = cv2.imread(item)
height, width = item_x.shape[:2]
# this was my requirement - skip it if you do not need it
if(height != 135 or width != 240):
continue
# get 3 channels
b,g,r = cv2.split(item_x)
item_x = [b,g,r]
item_x = np.array(item_x)
item_x = item_x.reshape(3,135*240)
if i_counter in test_index_array:
test_X.append(item_x)
test_y.append(item_y)
elif i_counter in validate_index_array:
val_X.append(item_x)
val_y.append(item_y)
else:
train_X.append(item_x)
train_y.append(item_y)
i_counter = i_counter + 1
# fix the dimensions. Flatten out the channel and intensity dimensions
train_X = np.array(train_X)
train_X = train_X.reshape(train_X.shape[0],train_X.shape[1]*train_X.shape[2])
test_X = np.array(test_X)
test_X = test_X.reshape(test_X.shape[0],test_X.shape[1]*test_X.shape[2])
val_X = np.array(val_X)
val_X = val_X.reshape(val_X.shape[0],val_X.shape[1]*val_X.shape[2])
train_y = np.array(train_y)
test_y = np.array(test_y)
val_y = np.array(val_y)
# shuffle the train and test arrays in unison
train_X,train_y = shuffle_in_unison(train_X,train_y)
test_X,test_y = shuffle_in_unison(test_X,test_y)
# pickle them
dataset['train'] = [train_X,train_y]
dataset['test'] = [test_X,test_y]
dataset['validate'] = [val_X,val_y]
output = open('pcount.pkl', 'wb')
cPickle.dump(dataset, output)
output.close`
Once you have this pickle file
You can use it in convolutional_mlp.py like this.
layer0_input = x.reshape((batch_size, 3, 135, 240))
# Construct the first convolutional pooling layer:
# filtering reduces the image size to (135-8+1 , 240-5+1) = (128, 236)
# maxpooling reduces this further to (128/2, 236/2) = (64, 118)
# 4D output tensor is thus of shape (batch_size, nkerns[0], 64, 118)
layer0 = LeNetConvPoolLayer(
rng,
input=layer0_input,
image_shape=(batch_size, 3, 135, 240),
filter_shape=(nkerns[0], 3, 8, 5),
poolsize=(2, 2)
)
The load_data function in logistic_sgd.py will need a small change as below
f = open(dataset, 'rb')
dump = cPickle.load(f)
train_set = dump['train']
valid_set = dump['validate']
test_set = dump['test']
f.close()
Hope this helps

Resources