'my_layer' object has no attribute '_dynamic' - keras

I want to design a model by tensorflow2.0,when Icompile the model,it report an error
'my_layer' object has no attribute '_dynamic'
the code is
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
class my_layer(layers.Layer):
def __init__(self,classes):
self.conv1 = layers.Conv2D(32,(3,3),strides=1,padding='same')
self.conv2 = layers.Conv2D(64,(3,3),strides=1,padding='same')
self.conv3 = layers.Conv2D(32, (3, 3), strides=1, padding='same')
self.conv4 = layers.Conv2D(classes, (3, 3), strides=1, padding='same')
self.bn = layers.BatchNormalization()
self.glbavgpool = layers.GlobalMaxPooling2D()
self.fc = layers.Dense(classes)
def call(self,inputs):
x = self.conv1(inputs)
x = self.bn(x)
x = keras.activations.relu(x)
x = self.conv2(x)
x = keras.activations.relu(x)
x = self.conv3(x)
x = self.conv4(x)
x = self.bn(x)
x = self.glbavgpool(x)
out = self.fc(x)
return out
class mymodel(keras.Model):
def __init__(self,classes):
super(mymodel,self).__init__()
self.ml = my_layer(classes=classes)
def call(self,inputs):
return self.ml(inputs)
then I put all the custom layers to my_model, it worked. I think it's probably the wrong way to use mylayer.

You're seeing this error because you forgot to call the superclass constructor in your my_layer class. Add this following line:
class my_layer(layers.Layer):
def __init__(self,classes):
super(my_layer, self).__init__() # <-- Remember to call superclass constructor!!
self.conv1 = layers.Conv2D(32,(3,3),strides=1,padding='same')
self.conv2 = layers.Conv2D(64,(3,3),strides=1,padding='same')
self.conv3 = layers.Conv2D(32, (3, 3), strides=1, padding='same')
self.conv4 = layers.Conv2D(classes, (3, 3), strides=1, padding='same')
self.bn = layers.BatchNormalization()
self.glbavgpool = layers.GlobalMaxPooling2D()
self.fc = layers.Dense(classes)

Related

Decreased accuracy when converting keras code to pytorch

I am a beginner in Pytorch and I am trying to convert a keras code to Pytorch for classification task.
The results are different for me in two modes with the same settings. Accuracy is reduced by 5% and the Loss is increases from 0.2 to 4.5. Can anyone tell me if I converted the code correctly to Pytorch or not? Thank you so much.
IMG_HEIGHT=48 ,IMG_WIDTH=48 , channels=3, num_class=164
model = keras.models.Sequential([
keras.layers.Conv2D(filters=16, kernel_size=(3,3), activation='relu', input_shape=(IMG_HEIGHT,IMG_WIDTH,channels)),
keras.layers.Conv2D(filters=32, kernel_size=(3,3), activation='relu'),
keras.layers.MaxPool2D(pool_size=(2, 2)),
keras.layers.BatchNormalization(axis=-1),
keras.layers.Conv2D(filters=64, kernel_size=(3,3), activation='relu'),
keras.layers.Conv2D(filters=128, kernel_size=(3,3), activation='relu'),
keras.layers.MaxPool2D(pool_size=(2, 2)),
keras.layers.BatchNormalization(axis=-1),
keras.layers.Flatten(),
keras.layers.Dense(512, activation='relu'),
keras.layers.BatchNormalization(),
keras.layers.Dropout(rate=0.5),
keras.layers.Dense(164, activation='softmax')])
pytorch :
class convnet(nn.Module):
def __init__(self, num_class):
super(convnet, self).__init__()
self.Conv2d_1 = nn.Conv2d(3, 16, (3,3))
self.relu_1 = nn.ReLU()
self.Conv2d_2 = nn.Conv2d(16, 32, (3,3))
self.relu_2 = nn.ReLU()
self.maxpool_1 = nn.MaxPool2d(kernel_size=2, stride=2)
self.batch_1 = nn.BatchNorm2d(32)
self.Conv2d_3 = nn.Conv2d(32, 64, (3,3))
self.relu_3 = nn.ReLU()
self.Conv2d_4 = nn.Conv2d(64, 128, (3,3))
self.relu_4 = nn.ReLU()
self.maxpool_2 = nn.MaxPool2d(kernel_size=2, stride=2)
self.batch_2 = nn.BatchNorm2d(128)
self.fc1 = nn.Linear(10368, 1024)
self.relu_5 = nn.ReLU()
self.batch_3 = nn.BatchNorm1d(1024)
self.dropout_1 = nn.Dropout(p=0.5)
self.fc2 = nn.Linear(1024,num_class)
self.Softmax_1 = nn.Softmax()
def forward(self, x):
y = self.Conv2d_1(x)
y = self.relu_1(y)
y = self.Conv2d_2(y)
y = self.relu_2(y)
y = self.maxpool_1(y)
y = self.batch_1(y)
y = self.Conv2d_3(y)
y = self.relu_3(y)
y = self.Conv2d_4(y)
y = self.relu_4(y)
y = self.maxpool_2(y)
y = self.batch_2(y)
y = y.view(y.size(0), -1)
y = self.fc1(y)
y = self.relu_5(y)
y = self.batch_3(y)
y = self.dropout_1(y)
y = self.fc2(y)
y = self.Softmax_1(y)
return y

TensorFlow: "NotImplementedError: When subclassing the `Model` class, you should implement a `call` method."

inside "PWCDCNet.py" I have defined the following ANN:
class Conv2D(tfk.layers.Layer):
def __init__(self, filters, kernel_size, strides, name=None, padding=1, dilation_rate=1):
super(Conv2D, self).__init__(name=name)
self.conv_out = tfk.layers.Conv2D(filters=filters,
kernel_size=kernel_size,
strides=strides,
padding='same',
kernel_initializer='he_normal',
dilation_rate=dilation_rate,
activation=tfk.layers.LeakyReLU(0.1))
def call(self, inputs):
return self.conv_out(inputs)
class DeConv2D(tfk.layers.Layer):
def __init__(self, filters, kernel_size=4, strides=2, name=None):
super(DeConv2D, self).__init__(name=name)
self.deconv_out = tfk.layers.Conv2DTranspose(filters=filters,
kernel_size=kernel_size,
strides=strides,
padding='same',
name=name)
def call(self, inputs):
return self.deconv_out(inputs)
def CostVolumn(c1, warp, search_range, name='cost_volumn'):
padded_lvl = tf.pad(warp, [[0, 0], [search_range, search_range], [search_range, search_range], [0, 0]])
_, h, w, _ = tf.unstack(tf.shape(c1))
max_offset = search_range * 2 + 1
cost_vol = []
for y in range(0, max_offset):
for x in range(0, max_offset):
slice = tf.slice(padded_lvl, [0, y, x, 0], [-1, h, w, -1])
cost = tf.reduce_mean(c1 * slice, axis=3, keepdims=True)
cost_vol.append(cost)
cost_vol = tf.concat(cost_vol, axis=3)
cost_vol = tf.nn.leaky_relu(cost_vol, alpha=0.1, name=name)
return cost_vol
class PredictFlow(tfk.layers.Layer):
def __init__(self, name=None):
super(PredictFlow, self).__init__()
self.conv_out = tfk.layers.Conv2D(filters=2,
kernel_size=3,
strides=1,
name=name,
padding='same')
def call(self, inputs):
return self.conv_out(inputs)
class PWCDCNet(tf.keras.Model):
def __init__(self, max_displacement=4):
super(PWCDCNet, self).__init__()
self.conv1a = Conv2D( 16, kernel_size=3, strides=2, name='conv1a')
self.conv1aa = Conv2D( 16, kernel_size=3, strides=1, name='conv1aa')
self.conv1b = Conv2D( 16, kernel_size=3, strides=1, name='conv1b')
self.conv2a = Conv2D( 32, kernel_size=3, strides=2, name='conv2a')
self.conv2aa = Conv2D( 32, kernel_size=3, strides=1, name='conv2aa')
self.conv2b = Conv2D( 32, kernel_size=3, strides=1, name='conv2b')
self.conv3a = Conv2D( 64, kernel_size=3, strides=2, name='conv3a')
self.conv3aa = Conv2D( 64, kernel_size=3, strides=1, name='conv3aa')
self.conv3b = Conv2D( 64, kernel_size=3, strides=1, name='conv3b')
self.conv4a = Conv2D( 96, kernel_size=3, strides=2, name='conv4a')
self.conv4aa = Conv2D( 96, kernel_size=3, strides=1, name='conv4aa')
self.conv4b = Conv2D( 96, kernel_size=3, strides=1, name='conv4b')
self.conv5a = Conv2D(128, kernel_size=3, strides=2, name='conv5a')
self.conv5aa = Conv2D(128, kernel_size=3, strides=1, name='conv5aa')
self.conv5b = Conv2D(128, kernel_size=3, strides=1, name='conv5b')
self.conv6aa = Conv2D(196, kernel_size=3, strides=2, name='conv6aa')
self.conv6a = Conv2D(196, kernel_size=3, strides=1, name='conv6a')
self.conv6b = Conv2D(196, kernel_size=3, strides=1, name='conv6b')
self.LeakyReLU = tfk.layers.LeakyReLU(0.1, name='LeakyReLU')
self.conv6_0 = Conv2D(128, kernel_size=3, strides=1, name='conv6_0')
self.conv6_1 = Conv2D(128, kernel_size=3, strides=1, name='conv6_1')
self.conv6_2 = Conv2D(96, kernel_size=3, strides=1, name='conv6_2')
self.conv6_3 = Conv2D(64, kernel_size=3, strides=1, name='conv6_3')
self.conv6_4 = Conv2D(32, kernel_size=3, strides=1, name='conv6_4')
self.deconv6 = DeConv2D(2, kernel_size=4, strides=2, name='deconv_6')
self.upfeat6 = DeConv2D(2, kernel_size=4, strides=2, name='upfeat_6')
self.conv5_0 = Conv2D(128, kernel_size=3, strides=1, name='conv5_0')
self.conv5_1 = Conv2D(128, kernel_size=3, strides=1, name='conv5_1')
self.conv5_2 = Conv2D(96, kernel_size=3, strides=1, name='conv5_2')
self.conv5_3 = Conv2D(64, kernel_size=3, strides=1, name='conv5_3')
self.conv5_4 = Conv2D(32, kernel_size=3, strides=1, name='conv5_4')
self.deconv5 = DeConv2D(2, kernel_size=4, strides=2, name='deconv_5')
self.upfeat5 = DeConv2D(2, kernel_size=4, strides=2, name='upfeat_5')
self.conv4_0 = Conv2D(128, kernel_size=3, strides=1, name='conv4_0')
self.conv4_1 = Conv2D(128, kernel_size=3, strides=1, name='conv4_1')
self.conv4_2 = Conv2D(96, kernel_size=3, strides=1, name='conv4_2')
self.conv4_3 = Conv2D(64, kernel_size=3, strides=1, name='conv4_3')
self.conv4_4 = Conv2D(32, kernel_size=3, strides=1, name='conv4_4')
self.deconv4 = DeConv2D(2, kernel_size=4, strides=2, name='deconv4')
self.upfeat4 = DeConv2D(2, kernel_size=4, strides=2, name='upfeat4')
self.conv3_0 = Conv2D(128, kernel_size=3, strides=1, name='conv3_0')
self.conv3_1 = Conv2D(128, kernel_size=3, strides=1, name='conv3_1')
self.conv3_2 = Conv2D(96, kernel_size=3, strides=1, name='conv3_2')
self.conv3_3 = Conv2D(64, kernel_size=3, strides=1, name='conv3_3')
self.conv3_4 = Conv2D(32, kernel_size=3, strides=1, name='conv3_4')
self.deconv3 = DeConv2D(2, kernel_size=4, strides=2, name='deconv3')
self.upfeat3 = DeConv2D(2, kernel_size=4, strides=2, name='upfeat3')
self.conv2_0 = Conv2D(128, kernel_size=3, strides=1, name='conv2_0')
self.conv2_1 = Conv2D(128, kernel_size=3, strides=1, name='conv2_1')
self.conv2_2 = Conv2D(96, kernel_size=3, strides=1, name='conv2_2')
self.conv2_3 = Conv2D(64, kernel_size=3, strides=1, name='conv2_3')
self.conv2_4 = Conv2D(32, kernel_size=3, strides=1, name='conv2_4')
self.deconv2 = DeConv2D(2, kernel_size=4, strides=2, name='deconv2')
self.dc_conv1 = Conv2D(128, kernel_size=3, strides=1, padding=1, dilation_rate=1, name='dc_conv1')
self.dc_conv2 = Conv2D(128, kernel_size=3, strides=1, padding=2, dilation_rate=2, name='dc_conv2')
self.dc_conv3 = Conv2D(128, kernel_size=3, strides=1, padding=4, dilation_rate=4, name='dc_conv3')
self.dc_conv4 = Conv2D(96, kernel_size=3, strides=1, padding=8, dilation_rate=8, name='dc_conv4')
self.dc_conv5 = Conv2D(64, kernel_size=3, strides=1, padding=16, dilation_rate=16, name='dc_conv5')
self.dc_conv6 = Conv2D(32, kernel_size=3, strides=1, padding=1, dilation_rate=1, name='dc_conv6')
self.predict_flow6 = PredictFlow(name='predict_flow6')
self.predict_flow5 = PredictFlow(name='predict_flow5')
self.predict_flow4 = PredictFlow(name='predict_flow4')
self.predict_flow3 = PredictFlow(name='predict_flow3')
self.predict_flow2 = PredictFlow(name='predict_flow2')
self.dc_conv7 = PredictFlow(name='dc_conv7')
def call(self, inputs):
im1 = inputs[:, :, :, :3]
im2 = inputs[:, :, :, 3:]
c11 = self.conv1b(self.conv1aa(self.conv1a(im1)))
c21 = self.conv1b(self.conv1aa(self.conv1a(im2)))
c12 = self.conv2b(self.conv2aa(self.conv2a(c11)))
c22 = self.conv2b(self.conv2aa(self.conv2a(c21)))
c13 = self.conv3b(self.conv3aa(self.conv3a(c12)))
c23 = self.conv3b(self.conv3aa(self.conv3a(c22)))
c14 = self.conv4b(self.conv4aa(self.conv4a(c13)))
c24 = self.conv4b(self.conv4aa(self.conv4a(c23)))
c15 = self.conv5b(self.conv5aa(self.conv5a(c14)))
c25 = self.conv5b(self.conv5aa(self.conv5a(c24)))
c16 = self.conv6b(self.conv6a(self.conv6aa(c15)))
c26 = self.conv6b(self.conv6a(self.conv6aa(c25)))
### 6th flow
corr6 = CostVolumn(c1=c16, warp=c26, search_range=4)
x = tf.concat([self.conv6_0(corr6), corr6], 3)
x = tf.concat([self.conv6_1(x), x], 3)
x = tf.concat([self.conv6_2(x), x], 3)
x = tf.concat([self.conv6_3(x), x], 3)
x = tf.concat([self.conv6_4(x), x], 3)
flow6 = self.predict_flow6(x)
up_flow6 = self.deconv6(flow6)
up_feat6 = self.upfeat6(x)
### 5th flow
warp5 = bilinear_warp(c25, up_flow6*0.625)
corr5 = CostVolumn(c1=c15, warp=warp5, search_range=4)
x = tf.concat([corr5, c15, up_flow6, up_feat6], 3)
x = tf.concat([self.conv5_0(x), x], 3)
x = tf.concat([self.conv5_1(x), x], 3)
x = tf.concat([self.conv5_2(x), x], 3)
x = tf.concat([self.conv5_3(x), x], 3)
x = tf.concat([self.conv5_4(x), x], 3)
flow5 = self.predict_flow5(x)
up_flow5 = self.deconv5(flow5)
up_feat5 = self.upfeat5(x)
### 4th flow
warp4 = bilinear_warp(c24, up_flow5*1.25)
corr4 = CostVolumn(c1=c14, warp=warp4, search_range=4)
x = tf.concat([corr4, c14, up_flow5, up_feat5], 3)
x = tf.concat([self.conv4_0(x), x], 3)
x = tf.concat([self.conv4_1(x), x], 3)
x = tf.concat([self.conv4_2(x), x], 3)
x = tf.concat([self.conv4_3(x), x], 3)
x = tf.concat([self.conv4_4(x), x], 3)
flow4 = self.predict_flow4(x)
up_flow4 = self.deconv4(flow4)
up_feat4 = self.upfeat4(x)
### 3rd flow
warp3 = bilinear_warp(c23, up_flow4*2.5)
corr3 = CostVolumn(c1=c13, warp=warp3, search_range=4)
x = tf.concat([corr3, c13, up_flow4, up_feat4], 3)
x = tf.concat([self.conv3_0(x), x], 3)
x = tf.concat([self.conv3_1(x), x], 3)
x = tf.concat([self.conv3_2(x), x], 3)
x = tf.concat([self.conv3_3(x), x], 3)
x = tf.concat([self.conv3_4(x), x], 3)
flow3 = self.predict_flow3(x)
up_flow3 = self.deconv3(flow3)
up_feat3 = self.upfeat3(x)
# 2nd flow
warp2 = bilinear_warp(c22, up_flow3*5.0)
corr2 = CostVolumn(c1=c12, warp=warp2, search_range=4)
x = tf.concat([corr2, c12, up_flow3, up_feat3], 3)
x = tf.concat([self.conv2_0(x), x], 3)
x = tf.concat([self.conv2_1(x), x], 3)
x = tf.concat([self.conv2_2(x), x], 3)
x = tf.concat([self.conv2_3(x), x], 3)
x = tf.concat([self.conv2_4(x), x], 3)
flow2 = self.predict_flow2(x)
x = self.dc_conv4(self.dc_conv3(self.dc_conv2(self.dc_conv1(x))))
flow2 = flow2 + self.dc_conv7(self.dc_conv6(self.dc_conv5(x)))
return flow2
This ANN gets imported into another file:
from PWCNet_tf2.PWCDCNet import PWCDCNet
PWC_model = tf.keras.Model(PWCDCNet())
checkpoint = tf.train.Checkpoint(PWC_model)
save_path = checkpoint.save('/some_directories/PWCNet_tf2/checkpoints/0001/tf_ckpt')
checkpoint.restore(save_path)
But if I try to:
output = PWC_model(input)
I get the error:
NotImplementedError: When subclassing the `Model` class, you should implement a `call` method.
I am wondering, as a call method is implemented for every Layer, but also for the ANN as a whole. Is the error due to the way I import the model?
Thanks to all of you in advance!
The error was that I called:
PWC_model = tf.keras.Model(PWCDCNet())
instead of:
PWC_model = PWCDCNet()

expected string or bytes-like object when calling model subclass

I have attempted to write a generative adversarial network. Below is the code of one of the discriminators.
class D1(Layer):
def __init__ (self, input_shape=(256, 256, 3), name='d1', **kwargs):
super(D1, self).__init__(name=name, **kwargs)
self.h1 = Conv2D(64, (3, 3), strides=(1, 1), padding='same')
self.h2 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h3 = LeakyReLU(alpha=0.2)
self.h4 = Conv2D(128, (3, 3), strides=(1, 1), padding='same')
self.h5 = Conv2D(128, (3, 3), strides=(1, 1), padding='same')
self.h6 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h7 = LeakyReLU(alpha=0.2)
self.h8 = Conv2D(256, (3, 3), strides=(1, 1), padding='same')
self.h9 = Conv2D(256, (3, 3), strides=(1, 1), padding='same')
self.h10 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h11 = LeakyReLU(alpha=0.2)
self.h12 = Conv2D(512, (3, 3), strides=(1, 1), padding='same')
self.h13 = Conv2D(512, (3, 3), strides=(1, 1), padding='same')
self.h14 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h15 = Flatten()
self.h16 = Dropout(0.4)
self.D1R = Dense(1, activation='sigmoid')
self.h17 = Dense(4096, activation='relu')
self.h18 = Dense(4096, activation='relu')
self.D1C = Dense(16, activation='sigmoid')
def call(self, inputs):
x = self.h1(inputs)
x = self.h2(x)
x = self.h3(x)
x = self.h4(x)
x = self.h5(x)
x = self.h6(x)
x = self.h7(x)
x = self.h8(x)
x = self.h9(x)
x = self.h10(x)
x = self.h11(x)
x = self.h12(x)
x = self.h13(x)
x = self.h14(x)
x = self.h15(x)
x = self.h16(x)
d1r = self.D1R(x)
x = self.h17(x)
x = self.h18(x)
d1c = self.D1C(x)
return d1r, d1c'''
class Discriminator1(Model):
def __init__(
self,
input_shape=(None, 256, 256, 3),
name='disc1',
**kwargs
):
super(Discriminator1, self).__init__(name=name, **kwargs)
self.d1 = D1(input_shape=input_shape)
def call(self, inputs):
image = inputs
d1r, d1c = self.d1(image)
d1_loss = d1_loss(d1r, d1c)
self.add_loss(d1_loss)
return out
When I call it in training, it throws a TypeError: expected string or byte-like object. I cannot figure what it is.
Any help? None of my functions are supposed to use strings
'''def generate_latent_noise(latent_dim, n_samples):
x_input = randn(latent_dim * n_samples)
x_input = x_input.reshape(n_samples, latent_dim)
return x_input'''
'''def generate_fake_samples(g, latent_dim, n_samples, y_i, y_l):
x_input = generate_latent_noise(latent_dim, n_samples)
X = g.predict(x_input)
y = zeros((n_samples, 1))
for i in range(n_samples-1):
intent = y_i[i]
bio = y_l[i]
return X, y, intent, bio'''
'''epochs = 200
opt = SGD(learning_rate=1e-3, momentum=0.99)
metric = Accuracy()
yi, yl = retrieve_target_labels('/content/drive/My Drive/Project/input.xlsx')
g = Generator(100)
d1 = D1((256, 256, 3))
d2 = D2((256, 256, 3))
gen = G_Model((256, 256, 3), 100, yi, yl)
disc1 = Discriminator1((256, 256, 3), 100)
disc2 = Discriminator2((256, 256, 3), 100)
art, yc_real, yi_real, yl_real =load_real_samples('/content/drive/MyDrive/Project/TrainSA.xlsx')
half_batch = yi.shape[0]
n_batch = half_batch * 2
batch_per_epoch = int(art.shape[0]/n_batch)
for epoch in range(epochs):
for batch in range(batch_per_epoch):
fake, y, yi, yl = generate_fake_samples(g, 100, half_batch, yi, yl)
real, y_real, c_real, i_real, l_real = generate_real_samples(art, half_batch, yc_real, yi_real, yl_real)
fake_image = tf.convert_to_tensor(fake)
d1r, d1c = d1(fake_image) #error!
d1_loss_fake = d1.losses
d1r, d1c = d1(real)
d1_loss = d1.losses
d2i_fake, d2l_fake = d2(fake_image)
d2_loss_fake = d2.losses
d2i, d2l = d2(real)
d2_loss = d2.losses
g_loss = gen.losses '''
It is a bit difficult to provide a minimal working example as the error is at the end of my code, after a lot of functions have been called, but I tried to include the ones that might be involved in the error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-95-d4bb0da7c68f> in <module>()
23 #d1_loss_fake = d1.losses
24 real_image = tf.convert_to_tensor(real, dtype = tf.float32)
---> 25 d1r, d1c = disc1(real_image)
26 d1_loss = d1.losses
27 d2i_fake, d2l_fake = d2(fake_image)
3 frames
/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py in
name_scope(self, name)
4126 # Scopes created in the root must match the more restrictive
4127 # op name regex, which constrains the initial character.
-> 4128 if not _VALID_OP_NAME_REGEX.match(name):
4129 raise ValueError("'%s' is not a valid scope name" % name)
4130 old_stack = self._name_stack
TypeError: expected string or bytes-like object

The training loss of vgg16 implemented in pytorch does not decrease

I want to try some toy examples in pytorch, but the training loss does not decrease in the training.
Some info is provided here:
The model is vgg16, consisted of 13 conv layers and 3 dense layers.
The data is cifar100 in pytorch.
I choose cross entropy as the loss function.
The code is as follows
# encoding: utf-8
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torchvision
import numpy as np
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv1_1(x))
x = F.relu(self.conv1_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv2_1(x))
x = F.relu(self.conv2_2(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv3_1(x))
x = F.relu(self.conv3_2(x))
x = F.relu(self.conv3_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv4_1(x))
x = F.relu(self.conv4_2(x))
x = F.relu(self.conv4_3(x))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv5_1(x))
x = F.relu(self.conv5_2(x))
x = F.relu(self.conv5_3(x))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
def num_flat_features(self, x):
size = x.size()[1:]
num_features = 1
for s in size:
num_features *= s
return num_features
if __name__ == '__main__':
BATCH_SIZE = 128
LOG_INTERVAL = 5
# data
transform = transforms.Compose([
transforms.ToTensor()
])
trainset = torchvision.datasets.CIFAR100(
root='./data',
train=True,
download=True,
transform=transform
)
testset = torchvision.datasets.CIFAR100(
root='./data',
train=False,
download=True,
transform=transform
)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE, shuffle=True)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE, shuffle=False)
# model
vgg16 = VGG16(100)
vgg16.cuda()
# optimizer
optimizer = optim.SGD(vgg16.parameters(), lr=0.01)
# loss
criterion = nn.CrossEntropyLoss()
print('———— Train Start —————')
for epoch in range(20):
running_loss = 0.
for step, (batch_x, batch_y) in enumerate(trainloader):
batch_x, batch_y = batch_x.cuda(), batch_y.cuda()
#
optimizer.zero_grad()
output = vgg16(batch_x)
loss = criterion(output, batch_y)
loss.backward()
optimizer.step()
running_loss += loss.item()
if step % LOG_INTERVAL == 0:
print('[%d, %4d] loss: %.4f' % (epoch, step, running_loss / LOG_INTERVAL))
running_loss = 0.
def test():
print('———— Test Start ————')
correct = 0
total = 0
#
with torch.no_grad():
for test_x, test_y in testloader:
images, labels = test_x.cuda(), test_y.cuda()
output = vgg16(images)
_, predicted = torch.max(output.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
accuracy = 100 * correct / total
print('Accuracy of the network is: %.4f %%' % accuracy)
print('———— Test Finish ————')
test()
print('———— Train Finish —————')
The loss stays around 4.6060 and never decrease. I have tried different learning rate but does not work.
I have noticed that you are not using Batch normalization in between your convolution layers. I have added batch normalization layers and it seems to work. Following is the modified code:
class VGG16(torch.nn.Module):
def __init__(self, n_classes):
super(VGG16, self).__init__()
# construct model
self.conv1_1 = nn.Conv2d(3, 64, 3, padding=1)
self.conv11_bn = nn.BatchNorm2d(64)
self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
self.conv12_bn = nn.BatchNorm2d(64)
self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
self.conv21_bn = nn.BatchNorm2d(128)
self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
self.conv22_bn = nn.BatchNorm2d(128)
self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
self.conv31_bn = nn.BatchNorm2d(256)
self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
self.conv32_bn = nn.BatchNorm2d(256)
self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
self.conv33_bn = nn.BatchNorm2d(256)
self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
self.conv41_bn = nn.BatchNorm2d(512)
self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv42_bn = nn.BatchNorm2d(512)
self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv43_bn = nn.BatchNorm2d(512)
self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
self.conv51_bn = nn.BatchNorm2d(512)
self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
self.conv52_bn = nn.BatchNorm2d(512)
self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
self.conv53_bn = nn.BatchNorm2d(512)
self.fc6 = nn.Linear(512, 512)
self.fc7 = nn.Linear(512, 512)
self.fc8 = nn.Linear(512, n_classes)
def forward(self, x):
x = F.relu(self.conv11_bn(self.conv1_1(x)))
x = F.relu(self.conv12_bn(self.conv1_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv22_bn(self.conv2_1(x)))
x = F.relu(self.conv21_bn(self.conv2_2(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv31_bn(self.conv3_1(x)))
x = F.relu(self.conv32_bn(self.conv3_2(x)))
x = F.relu(self.conv33_bn(self.conv3_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv41_bn(self.conv4_1(x)))
x = F.relu(self.conv42_bn(self.conv4_2(x)))
x = F.relu(self.conv43_bn(self.conv4_3(x)))
x = F.max_pool2d(x, (2, 2))
x = F.relu(self.conv51_bn(self.conv5_1(x)))
x = F.relu(self.conv52_bn(self.conv5_2(x)))
x = F.relu(self.conv53_bn(self.conv5_3(x)))
x = F.max_pool2d(x, (2, 2))
x = x.view(-1, self.num_flat_features(x))
x = F.relu(self.fc6(x))
x = F.relu(self.fc7(x))
x = self.fc8(x)
return x
However, a more elegant version of the same could be found here

How to re-use old weights in a slightly modified model?

I have a CNN network built like this for a particular task.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
The model is stored using the torch built-in method like this.
net = Net()
optimizer = optim.SGD(net.parameters(), lr=1e-3)
state = {
'state_dict': net.state_dict()
'opt': optimizer.state_dict()
}
torch.save(state, 'model.pt')
I have increased a single layer in the network while the rest of the model was kept the same.
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 256, kernel_size=3, padding=1) # (new added)
self.conv14 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x)) (new added)
x = F.relu(self.conv14(x))
x = F.softmax(x, 2)
return x
Since the other conv layers are kept the same, is there any way I can re-use the saved model to load the weights to conv11, conv12 and conv14 ? Instead of starting to train from beginning ?
Assume you trained the following model and now you make a minor modification to it (like adding a layer) and want to use your trained weights
import torch
import torch.nn as nn
import torch.optim as optim
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
net = Net()
optimizer = optim.SGD(net.parameters(), lr=1e-3)
you save the model (and the optimizer state) with:
state = {'state_dict': net.state_dict(),
'opt': optimizer.state_dict()
}
torch.save(state, 'state.pt')
Your new model is (note that corresponding layers keep the same name, so you don't make conv13 -> conv14):
class NewNet(nn.Module):
def __init__(self):
super(NewNet, self).__init__()
self.conv11 = nn.Conv2d(1, 128, kernel_size=3, padding=1)
self.conv12 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
self.convnew = nn.Conv2d(256, 256, kernel_size=3, padding=1) # (new added)
self.conv13 = nn.Conv2d(256, 2, kernel_size=3, padding=1)
def forward(self, x):
in_size = x.size(0)
x = F.relu(self.conv11(x))
x = F.relu(self.conv12(x))
x = F.relu(self.convnew(x)) # (new added)
x = F.relu(self.conv13(x))
x = F.softmax(x, 2)
return x
Now you can load your model.pt file:
state = torch.load('state.pt')
state is a dict, state['opt'] contains all the parameters that you had for your optimizer, for example state['opt']['param_groups'][0]['lr'] gives
0.001
Assuming corresponding layers kept the same name, you can recover your parameters and initialize the appropriate layers by:
net = NewNet()
for name, param in net.named_parameters():
if name in state['state_dict'].keys():
param = param.data
param.copy_(state['state_dict'][name])

Resources