I know it is the reset_parameters() in conv.py that is responsible for the default weight initialization,
I changed the function to
def reset_parameters(self):
n = self.in_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
print('reset w, stdv=',stdv)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
print('reset b, stdv=',stdv)
self.bias.data.uniform_(-stdv, stdv)
print('w:',self.weight.data.norm(), 'b:',self.bias.data.norm())
and after my model was created, I apply the follow manual weights_init() to change the weight initialization method of my conv layer
def weights_init(m):
classname = m.__class__.__name__
if classname.find('Conv') != -1:
print(m)
print(m.weight.data.norm())
print(m.bias.data.norm())
std_w = m.weight.size(1) * m.weight.size(2) * m.weight.size(3)
std_b = m.weight.size(0)
std_w = 1. / math.sqrt(std_w)
# std_b = 1. / math.sqrt(std_b)
std_b = std_w
m.weight.data.uniform_(-std_w, std_w)
m.bias.data.uniform_(-std_b, std_b)
print(m.weight.data.norm())
print(m.bias.data.norm())
print('\n\n')
I think they are same , but I found it print below info in reset_params function.
reset w, stdv= 0.19245008972987526 reset b, stdv= 0.19245008972987526
w: 4.651364750286455 b: 0.9658572124243059 reset w, stdv=
0.041666666666666664 reset b, stdv= 0.041666666666666664 w: 4.60668514079571 b: 0.19021859795685142 reset w, stdv= 0.041666666666666664 reset b, stdv= 0.041666666666666664 w: 6.529658534196003 b: 0.24801288097906313 reset w, stdv= 0.029462782549439483 reset b, stdv= 0.029462782549439483 w: 6.544403663970284 b: 0.20246035190569983 reset w, stdv= 0.029462782549439483 reset b, stdv= 0.029462782549439483 w: 9.237618805061214 b: 0.2699324704165474 reset w, stdv= 0.020833333333333332 reset b, stdv= 0.020833333333333332 w: 9.240560902888104 b: 0.18776950085546212 reset w, stdv= 0.020833333333333332 reset b, stdv= 0.020833333333333332 w: 9.23323252375467 b: 0.19598698034213305 reset w, stdv= 0.020833333333333332 reset b, stdv= 0.020833333333333332 w: 9.247914516750834 b: 0.19991090497324737 reset w, stdv= 0.020833333333333332 reset b, stdv= 0.020833333333333332 w: 13.062441360447233 b: 0.2709608856088436 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.058955297303523 b: 0.19297756771652977 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.064573213326009 b: 0.19342352625500445 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.060771314609305 b: 0.1931597201764238 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.068217941106957 b: 0.1944194648771781 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.064472494773318 b: 0.1871614021517605 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.065174600640301 b: 0.19473828458164352 reset w, stdv= 0.014731391274719742 reset b, stdv= 0.014731391274719742 w: 13.064107007871193 b: 0.18669129860732317
but during my weights_init() ,it print below info:
Conv2d (3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
2.5413928031921387
0.0
4.599651336669922
0.8222851753234863
Conv2d (64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11.280376434326172
0.0
4.624712944030762
0.18499651551246643
Conv2d (64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
11.323299407958984
0.0
6.527068614959717
0.2626609206199646
Conv2d (128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
16.010761260986328
0.0
6.516138553619385
0.18262024223804474
Conv2d (128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
16.00145149230957
0.0
9.22119426727295
0.2743944823741913 etc …
Obviously, it has been changed somewhere, for example, the bias was changed to 0 at least.
I don’t know if there another point in the source code which modify the conv weight initialization, who can explain this question to me will be much appreciated! My English is poor, hoping you can understand it :smile:
Related
For a toy LeNet-5 CNN architecture on MNIST implemented in TensorFlow-2.10 + Python-3.10, with a batch-size = 256:
class LeNet5(Model):
def __init__(self):
super(LeNet5, self).__init__()
self.conv1 = Conv2D(
filters = 6, kernel_size = (5, 5),
strides = (1, 1), activation = None,
input_shape = (28, 28, 1)
)
self.pool1 = AveragePooling2D(
pool_size = (2, 2), strides = (2, 2)
)
self.conv2 = Conv2D(
filters = 16, kernel_size = (5, 5),
strides = (1, 1), activation = None
)
self.pool2 = AveragePooling2D(
pool_size = (2, 2), strides = (2, 2)
)
self.flatten = Flatten()
self.dense1 = Dense(
units = 120, activation = None
)
self.dense2 = Dense(
units = 84, activation = None
)
self.output_layer = Dense(
units = 10, activation = None
)
def call(self, x):
x = tf.nn.relu(self.conv1(x))
x = self.pool1(x)
x = tf.nn.relu(self.conv2(x))
x = self.pool2(x)
x = self.flatten(x)
x = tf.nn.relu(self.dense1(x))
x = tf.nn.relu(self.dense2(x))
x = tf.nn.softmax(self.output_layer(x))
return x
def shape_computation(self, x):
print(f"Input shape: {x.shape}")
x = self.conv1(x)
print(f"conv1 output shape: {x.shape}")
x = self.pool1(x)
print(f"pool1 output shape: {x.shape}")
x = self.conv2(x)
print(f"conv2 output shape: {x.shape}")
x = self.pool2(x)
print(f"pool2 output shape: {x.shape}")
x = self.flatten(x)
print(f"flattened shape: {x.shape}")
x = self.dense1(x)
print(f"dense1 output shape: {x.shape}")
x = self.dense2(x)
print(f"dense2 output shape: {x.shape}")
x = self.output_layer(x)
print(f"output shape: {x.shape}")
del x
return None
# Initialize an instance of LeNet-5 CNN-
model = LeNet5()
model.build(input_shape = (None, 28, 28, 1))
# Define loss and optimizer-
loss_fn = tf.keras.losses.CategoricalCrossentropy(reduction = tf.keras.losses.Reduction.NONE)
# optimizer = tf.keras.optimizers.Adam(learning_rate = 0.0003)
optimizer = tf.keras.optimizers.SGD(
learning_rate = 10e-3, momentum = 0.0,
nesterov = False
)
with tf.GradientTape() as grad_tape:
pred = model(x)
loss = loss_fn(y, pred)
loss.shape
TensorShape([256])
This computes individual loss for each of the 256 training images in a given batch.
# Compute gradient using loss wrt parameters-
grads = grad_tape.gradient(loss, model.trainable_variables)
type(grads), len(grads)
# (list, 10)
for i in range(len(grads)):
print(f"i: {i}, grads.shape: {grads[i].shape}")
"""
i: 0, grads.shape: (5, 5, 1, 6)
i: 1, grads.shape: (6,)
i: 2, grads.shape: (5, 5, 6, 16)
i: 3, grads.shape: (16,)
i: 4, grads.shape: (256, 120)
i: 5, grads.shape: (120,)
i: 6, grads.shape: (120, 84)
i: 7, grads.shape: (84,)
i: 8, grads.shape: (84, 10)
i: 9, grads.shape: (10,)
"""
Corresponding to loss for each training example, how can I compute gradient corresponding to each training example?
I want to implement these three layers in a single class, but I face this problem
ValueError: out_channels must be divisible by groups
I don't understand where is the mistake?!
THis is the code:
class Block(nn.Module):
def __init__(self, in_channels, out_channels, exp=1, stride=1, type=''):
super().__init__()
self.t = type
self.stride = stride
self.inc, self.outc = in_channels, out_channels
self.exp = exp
self.blockc = nn.Sequential(
nn.Conv2d(self.inc, self.inc* self.exp, kernel_size=1),
nn.ReLU(),
nn.Conv2d(self.inc * self.exp, self.outc, kernel_size=3, groups= self.inc * self.exp, stride= self.stride),
nn.ReLU(),
nn.Conv2d(self.outc, self.outc * self.exp, kernel_size=1),
nn.Linear(self.outc * self.exp, self.outc * self.exp))
def forward(self, x):
out = self.blockc(x)
if self.t == 'A':
out = torch.sum(out,x)
return out
and here is the layers consists of depthwise convolution and conv 1x1 also there is a param exp which means out_channels = exp* in_channles
Here is a possible implementation, you will have to adjust the channels and padding for your needs:
class BType(Enum):
A = 0
B = 1
C = 2
class Block(nn.Module):
def __init__(self, c_in: int, c_out: int, btype: BType) -> nn.Module:
super().__init__()
self.btype = btype
if btype == BType.A:
assert c_in == c_out
self.c1 = nn.Sequential(
nn.Conv2d(c_in, c_in, kernel_size=1),
nn.ReLU())
self.c2 = nn.Sequential(
nn.Conv2d(c_in, c_in, kernel_size=3, groups=c_in,
stride=2 if btype == BType.C else 1,
padding=2 if btype == BType.C else 1),
nn.ReLU())
self.c3 = nn.Conv2d(c_in, c_out, kernel_size=1)
def forward(self, x: torch.Tensor) -> torch.Tensor:
out = self.c1(x)
out = self.c2(out)
out = self.c3(out)
if self.btype == BType.A:
out += x
return out
Here is a test with all three block types:
block A:
>>> block = Block(3, 3, BType.A)
>>> block(torch.rand(2,3,10,10)).shape
torch.Size([2, 3, 10, 10])
block B:
>>> block = Block(3, 10, BType.B)
>>> block(torch.rand(2,3,10,10)).shape
torch.Size([2, 10, 10, 10])
block C:
>>> block = Block(3, 10, BType.C)
>>> block(torch.rand(2,3,10,10)).shape
torch.Size([2, 10, 6, 6])
I have attempted to write a generative adversarial network. Below is the code of one of the discriminators.
class D1(Layer):
def __init__ (self, input_shape=(256, 256, 3), name='d1', **kwargs):
super(D1, self).__init__(name=name, **kwargs)
self.h1 = Conv2D(64, (3, 3), strides=(1, 1), padding='same')
self.h2 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h3 = LeakyReLU(alpha=0.2)
self.h4 = Conv2D(128, (3, 3), strides=(1, 1), padding='same')
self.h5 = Conv2D(128, (3, 3), strides=(1, 1), padding='same')
self.h6 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h7 = LeakyReLU(alpha=0.2)
self.h8 = Conv2D(256, (3, 3), strides=(1, 1), padding='same')
self.h9 = Conv2D(256, (3, 3), strides=(1, 1), padding='same')
self.h10 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h11 = LeakyReLU(alpha=0.2)
self.h12 = Conv2D(512, (3, 3), strides=(1, 1), padding='same')
self.h13 = Conv2D(512, (3, 3), strides=(1, 1), padding='same')
self.h14 = MaxPooling2D(pool_size=(2, 2), strides=None, padding='same')
self.h15 = Flatten()
self.h16 = Dropout(0.4)
self.D1R = Dense(1, activation='sigmoid')
self.h17 = Dense(4096, activation='relu')
self.h18 = Dense(4096, activation='relu')
self.D1C = Dense(16, activation='sigmoid')
def call(self, inputs):
x = self.h1(inputs)
x = self.h2(x)
x = self.h3(x)
x = self.h4(x)
x = self.h5(x)
x = self.h6(x)
x = self.h7(x)
x = self.h8(x)
x = self.h9(x)
x = self.h10(x)
x = self.h11(x)
x = self.h12(x)
x = self.h13(x)
x = self.h14(x)
x = self.h15(x)
x = self.h16(x)
d1r = self.D1R(x)
x = self.h17(x)
x = self.h18(x)
d1c = self.D1C(x)
return d1r, d1c'''
class Discriminator1(Model):
def __init__(
self,
input_shape=(None, 256, 256, 3),
name='disc1',
**kwargs
):
super(Discriminator1, self).__init__(name=name, **kwargs)
self.d1 = D1(input_shape=input_shape)
def call(self, inputs):
image = inputs
d1r, d1c = self.d1(image)
d1_loss = d1_loss(d1r, d1c)
self.add_loss(d1_loss)
return out
When I call it in training, it throws a TypeError: expected string or byte-like object. I cannot figure what it is.
Any help? None of my functions are supposed to use strings
'''def generate_latent_noise(latent_dim, n_samples):
x_input = randn(latent_dim * n_samples)
x_input = x_input.reshape(n_samples, latent_dim)
return x_input'''
'''def generate_fake_samples(g, latent_dim, n_samples, y_i, y_l):
x_input = generate_latent_noise(latent_dim, n_samples)
X = g.predict(x_input)
y = zeros((n_samples, 1))
for i in range(n_samples-1):
intent = y_i[i]
bio = y_l[i]
return X, y, intent, bio'''
'''epochs = 200
opt = SGD(learning_rate=1e-3, momentum=0.99)
metric = Accuracy()
yi, yl = retrieve_target_labels('/content/drive/My Drive/Project/input.xlsx')
g = Generator(100)
d1 = D1((256, 256, 3))
d2 = D2((256, 256, 3))
gen = G_Model((256, 256, 3), 100, yi, yl)
disc1 = Discriminator1((256, 256, 3), 100)
disc2 = Discriminator2((256, 256, 3), 100)
art, yc_real, yi_real, yl_real =load_real_samples('/content/drive/MyDrive/Project/TrainSA.xlsx')
half_batch = yi.shape[0]
n_batch = half_batch * 2
batch_per_epoch = int(art.shape[0]/n_batch)
for epoch in range(epochs):
for batch in range(batch_per_epoch):
fake, y, yi, yl = generate_fake_samples(g, 100, half_batch, yi, yl)
real, y_real, c_real, i_real, l_real = generate_real_samples(art, half_batch, yc_real, yi_real, yl_real)
fake_image = tf.convert_to_tensor(fake)
d1r, d1c = d1(fake_image) #error!
d1_loss_fake = d1.losses
d1r, d1c = d1(real)
d1_loss = d1.losses
d2i_fake, d2l_fake = d2(fake_image)
d2_loss_fake = d2.losses
d2i, d2l = d2(real)
d2_loss = d2.losses
g_loss = gen.losses '''
It is a bit difficult to provide a minimal working example as the error is at the end of my code, after a lot of functions have been called, but I tried to include the ones that might be involved in the error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-95-d4bb0da7c68f> in <module>()
23 #d1_loss_fake = d1.losses
24 real_image = tf.convert_to_tensor(real, dtype = tf.float32)
---> 25 d1r, d1c = disc1(real_image)
26 d1_loss = d1.losses
27 d2i_fake, d2l_fake = d2(fake_image)
3 frames
/tensorflow-1.15.2/python3.6/tensorflow_core/python/framework/ops.py in
name_scope(self, name)
4126 # Scopes created in the root must match the more restrictive
4127 # op name regex, which constrains the initial character.
-> 4128 if not _VALID_OP_NAME_REGEX.match(name):
4129 raise ValueError("'%s' is not a valid scope name" % name)
4130 old_stack = self._name_stack
TypeError: expected string or bytes-like object
I have two different size tensors to put in the network.
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = TC(C(a)), TC(C(b))
The results are
a_out = torch.size([1, 1, 99]) # What I want is [1, 1, 100]
b_out = torch.size([1, 1, 101])
Is there any method to handle this problem?
I need your help.
Thanks
It is expected behaviour as per documentation. May be padding can be used when even input length is detected to get same length as input.
Something like this
class PadEven(nn.Module):
def __init__(self, conv, deconv, pad_value=0, padding=(0, 1)):
super().__init__()
self.conv = conv
self.deconv = deconv
self.pad = nn.ConstantPad1d(padding=padding, value=pad_value)
def forward(self, x):
nd = x.size(-1)
x = self.deconv(self.conv(x))
if nd % 2 == 0:
x = self.pad(x)
return x
C = nn.Conv1d(1, 1, kernel_size=1, stride=2)
TC = nn.ConvTranspose1d(1, 1, kernel_size=1, stride=2)
P = PadEven(C, TC)
a = torch.rand(1, 1, 100)
b = torch.rand(1, 1, 101)
a_out, b_out = P(a), P(b)
strong textLet's we have a list :
testList1 = [("Sita_English", 1), ("Sita_Maths", 2), ("Ram_English", 3), ("Ram_Maths", 4), ("Shyam_English", 5)]
I have added the values by name:
out2 = list(map(lambda v:(v[0], sum(map(lambda s: s[1], v[1]))), groupby(testList1, key=lambda x: x[0].split('_')[0])))
And i got this as a output:
[('Sita', 3), ('Ram', 7), ('Shyam', 5)]
Now i want to have a Output Like:
[('English', 9), ('Maths', 6)]
Using collections.defaultdict
Ex:
from collections import defaultdict
testList1 = [("Sita_English", 1), ("Sita_Maths", 2), ("Ram_English", 3), ("Ram_Maths", 4), ("Shyam_English", 5)]
out = defaultdict(int)
out2 = defaultdict(int)
for i, v in testList1:
name, lang = i.split("_")
out2[name] += v
out[lang] += v
print(out2) #out2.items() for list of tuples
print(out) #out.items() for list of tuples
Output:
defaultdict(<type 'int'>, {'Sita': 3, 'Ram': 7, 'Shyam': 5})
defaultdict(<type 'int'>, {'Maths': 6, 'English': 9})
As requested by OP
out2 = list(map(lambda v:(v[0], sum(map(lambda s: s[1], v[1]))), groupby(sorted(testList1, key=lambda x: x[0].split("_")[1]), key=lambda x: x[0].split('_')[1])))
#-->[('English', 9), ('Maths', 6)]