I want to feed my 3,320,320 pictures in an existing ResNet model. The model actually expects input of size 3,32,32. As I am afraid of loosing information I don't simply want to resize my pictures.
What is the best way to preprocess my images, so that they are able to run on the ResNet34?
Should I add additional layers in the forward method of ResNet? If yes, what would be a suitable combination in my case?
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_fitmodule import FitModule
from torch.autograd import Variable
import numpy as np
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(FitModule):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_planes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion * planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(FitModule):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = conv3x3(3, 64)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x): # add additional layers here?
x = x.float()
out = F.relu(self.bn1(self.conv1(x).float()).float())
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
Thanks plenty!
Regards,
Fabian
If you change your avg_pool operation to 'AdaptiveAvgPool2d' your model will work for any image size.
However with your current setup, your 320x320 images would be 40x40 going into the pooling stage, which is a large feature map to pool over. Consider adding more conv layers.
Related
I’m trying to implement very very simple UNET from this code.
class unet(nn.Module):
def __init__(self, ngf=64, norm_layer=nn.BatchNorm1d):
super(unet, self).__init__()
# construct unet structure
unet_block = skipconnection_block(ngf*2, ngf, submodule=None, norm_layer=norm_layer, inner=True)
unet_block = skipconnection_block(ngf, 1, submodule=unet_block, norm_layer=norm_layer, outer=True)
self.model = unet_block
def forward(self, x):
self.unet = nn.Sequential(self.model)
x = self.unet(x)
return x
class skipconnection_block(nn.Module):
def __init__(self, inner_nc, outer_nc, submodule=None, outer=False, inner=False, norm_layer=nn.BatchNorm1d):
super(skipconnection_block, self).__init__()
self.outer = outer
downrelu = nn.LeakyReLU(0.2, True)
uprelu = nn.ReLU(True)
if inner:
downconv_0 = nn.Conv1d(in_channels=outer_nc, out_channels=inner_nc, kernel_size=4, stride=2, padding=0)
upconv_0 = nn.ConvTranspose1d(in_channels=inner_nc, out_channels=outer_nc, kernel_size=4, stride=2, padding=0)
down = [downrelu, downconv_0]
up = [uprelu, upconv_0, norm_layer(outer_nc)]
model = down + up
elif outer:
downconv_1 = nn.Conv1d(in_channels=outer_nc, out_channels=inner_nc, kernel_size=4, stride=2, padding=0)
upconv_1 = nn.ConvTranspose1d(in_channels=inner_nc, out_channels=outer_nc, kernel_size=4, stride=2, padding=0)
down = [downrelu, downconv_1, norm_layer(inner_nc)]
up = [uprelu, upconv_1, norm_layer(outer_nc)]
model = down + [submodule] + up
self.model = nn.Sequential(*model)
def forward(self, x):
if self.outer:
return self.model(x)
else:
return torch.cat([x, self.model(x)], 1)
and when i tried like this for checking summary architecture of unet,
unet = load_skip_model()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
unet.to(device)
print(torchsummary.summary(unet, (1, 150)))
i got this result below.
RuntimeError: Given transposed=1, weight of size [64, 1, 4], expected input[2, 128, 74] to have 64 channels, but got 128 channels instead
I dont understand why i got this.
can anyone please please give some help…?? thank uu
As a rule of thumb, when you define a neural network like so, first check your layer in/out dims one by one.
Here for example, your first conv1d does not receive the expected dimension as input which cause you error.
I have a resnet18 pretrained model, now I want to change as feature backbone into MobileNet using pytorch , please suggest any optimal way is available or not to implement this.
In the below code I want to use backbone mobilenet instead of resnet as feature extraction
#=====
import torch
from model.backbone import resnet
import numpy as np
class conv_bn_relu(torch.nn.Module):
def init(self,in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1,bias=False):
super(conv_bn_relu,self).init()
self.conv = torch.nn.Conv2d(in_channels,out_channels, kernel_size,
stride = stride, padding = padding, dilation = dilation,bias = bias)
self.bn = torch.nn.BatchNorm2d(out_channels)
self.relu = torch.nn.ReLU()
def forward(self,x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class parsingNet(torch.nn.Module):
def init(self, size=(288, 800), pretrained=True, backbone='50', cls_dim=(37, 10, 4), use_aux=False):
super(parsingNet, self).init()
self.size = size
self.w = size[0]
self.h = size[1]
self.cls_dim = cls_dim # (num_gridding, num_cls_per_lane, num_of_lanes)
# num_cls_per_lane is the number of row anchors
self.use_aux = use_aux
self.total_dim = np.prod(cls_dim)
# input : nchw,
# output: (w+1) * sample_rows * 4
self.model = resnet(backbone, pretrained=pretrained)
if self.use_aux:
self.aux_header2 = torch.nn.Sequential(
conv_bn_relu(128, 128, kernel_size=3, stride=1, padding=1) if backbone in ['34','18'] else conv_bn_relu(512, 128, kernel_size=3, stride=1, padding=1),
conv_bn_relu(128,128,3,padding=1),
conv_bn_relu(128,128,3,padding=1),
conv_bn_relu(128,128,3,padding=1),
)
self.aux_header3 = torch.nn.Sequential(
conv_bn_relu(256, 128, kernel_size=3, stride=1, padding=1) if backbone in ['34','18'] else conv_bn_relu(1024, 128, kernel_size=3, stride=1, padding=1),
conv_bn_relu(128,128,3,padding=1),
conv_bn_relu(128,128,3,padding=1),
)
self.aux_header4 = torch.nn.Sequential(
conv_bn_relu(512, 128, kernel_size=3, stride=1, padding=1) if backbone in ['34','18'] else conv_bn_relu(2048, 128, kernel_size=3, stride=1, padding=1),
conv_bn_relu(128,128,3,padding=1),
)
self.aux_combine = torch.nn.Sequential(
conv_bn_relu(384, 256, 3,padding=2,dilation=2),
conv_bn_relu(256, 128, 3,padding=2,dilation=2),
conv_bn_relu(128, 128, 3,padding=2,dilation=2),
conv_bn_relu(128, 128, 3,padding=4,dilation=4),
torch.nn.Conv2d(128, cls_dim[-1] + 1,1)
# output : n, num_of_lanes+1, h, w
)
initialize_weights(self.aux_header2,self.aux_header3,self.aux_header4,self.aux_combine)
self.cls = torch.nn.Sequential(
torch.nn.Linear(1800, 2048),
torch.nn.ReLU(),
torch.nn.Linear(2048, self.total_dim),
)
self.pool = torch.nn.Conv2d(512,8,1) if backbone in ['34','18'] else torch.nn.Conv2d(2048,8,1)
# 1/32,2048 channel
# 288,800 -> 9,40,2048
# (w+1) * sample_rows * 4
# 37 * 10 * 4
initialize_weights(self.cls)
def forward(self, x):
# n c h w - > n 2048 sh sw
# -> n 2048
x2,x3,fea = self.model(x)
if self.use_aux:
x2 = self.aux_header2(x2)
x3 = self.aux_header3(x3)
x3 = torch.nn.functional.interpolate(x3,scale_factor = 2,mode='bilinear')
x4 = self.aux_header4(fea)
x4 = torch.nn.functional.interpolate(x4,scale_factor = 4,mode='bilinear')
aux_seg = torch.cat([x2,x3,x4],dim=1)
aux_seg = self.aux_combine(aux_seg)
else:
aux_seg = None
fea = self.pool(fea).view(-1, 1800)
group_cls = self.cls(fea).view(-1, *self.cls_dim)
if self.use_aux:
return group_cls, aux_seg
return group_cls
def initialize_weights(*models):
for model in models:
real_init_weights(model)
def real_init_weights(m):
if isinstance(m, list):
for mini_m in m:
real_init_weights(mini_m)
else:
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0)
elif isinstance(m, torch.nn.Linear):
m.weight.data.normal_(0.0, std=0.01)
elif isinstance(m, torch.nn.BatchNorm2d):
torch.nn.init.constant_(m.weight, 1)
torch.nn.init.constant_(m.bias, 0)
elif isinstance(m,torch.nn.Module):
for mini_m in m.children():
real_init_weights(mini_m)
else:
print('unkonwn module', m)
#======
For speaker verification from speech, I want to impellent bellow figure. I have some question please guide me.
First I am defining a class for first 4 layers (Code model #1). Then I am using ECAPA-TDNN code from github (code model #2 – link: https://github.com/TaoRuijie/ECAPA-TDNN ). I don’t know how can I connect between two models 1 and 2 or in the other hand, how can I impelement bellow figure?
Model #1 : The first 4 layer code:
import torch.nn as nn
class NeuralNetwork(nn.Module):
def __init__(self, num_class):
super(NeuralNetwork, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2d(1, 80, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
self.conv2 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv3 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv4 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv5 = nn.Sequential(nn.Conv2d(128, 20, T = flatten),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
Model #2 : ECAPA-TDNN form git-hub:
import math, torch, torchaudio
import torch.nn as nn
import torch.nn.functional as F
class ECAPA_TDNN(nn.Module):
def __init__(self, C):
super(ECAPA_TDNN, self).__init__()
self.torchfbank = torch.nn.Sequential(
PreEmphasis(),
torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160, \
f_min = 20, f_max = 7600, window_fn=torch.hamming_window, n_mels=80),
)
self.specaug = FbankAug() # Spec augmentation
self.conv1 = nn.Conv1d(80, C, kernel_size=5, stride=1, padding=2)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(C)
self.layer1 = Bottle2neck(C, C, kernel_size=3, dilation=2, scale=8)
self.layer2 = Bottle2neck(C, C, kernel_size=3, dilation=3, scale=8)
self.layer3 = Bottle2neck(C, C, kernel_size=3, dilation=4, scale=8)
# I fixed the shape of the output from MFA layer, that is close to the setting from ECAPA paper.
self.layer4 = nn.Conv1d(3*C, 1536, kernel_size=1)
self.attention = nn.Sequential(
nn.Conv1d(4608, 256, kernel_size=1),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Tanh(), # I add this layer
nn.Conv1d(256, 1536, kernel_size=1),
nn.Softmax(dim=2),
)
self.bn5 = nn.BatchNorm1d(3072)
self.fc6 = nn.Linear(3072, 192)
self.bn6 = nn.BatchNorm1d(192)
def forward(self, x, aug):
with torch.no_grad():
x = self.torchfbank(x)+1e-6
x = x.log()
x = x - torch.mean(x, dim=-1, keepdim=True)
if aug == True:
x = self.specaug(x)
x = self.conv1(x)
x = self.relu(x)
x = self.bn1(x)
x1 = self.layer1(x)
x2 = self.layer2(x+x1)
x3 = self.layer3(x+x1+x2)
x = self.layer4(torch.cat((x1,x2,x3),dim=1))
x = self.relu(x)
t = x.size()[-1]
global_x = torch.cat((x,torch.mean(x,dim=2,keepdim=True).repeat(1,1,t), torch.sqrt(torch.var(x,dim=2,keepdim=True).clamp(min=1e-4)).repeat(1,1,t)), dim=1)
w = self.attention(global_x)
mu = torch.sum(x * w, dim=2)
sg = torch.sqrt( ( torch.sum((x**2) * w, dim=2) - mu**2 ).clamp(min=1e-4) )
x = torch.cat((mu,sg),1)
x = self.bn5(x)
x = self.fc6(x)
x = self.bn6(x)
return x
2. I am using log Mel filterbank energy for feature extraction. My dimension isn’t 80. How can I define 80 for dimension?(by batch-size? How?)
3. C is constant (128) but how can I calculate T? for one speech signal I used bellow code But for all speech in my dataset, second code didn’t work. Or T is another thing?
# Number of Channel
ch = file_info.channels('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (ch)
# Number of Frame in One Audio File
frame_one_wav = audiosegment.from_file('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (frame_one_wav)
# All Frame
all_frame = []
for i in range (number_of_files_train):
wavs_info = rate_train, sig_train = wavfile.read(wavs_train[i])
frame_wav = audiosegment.from_file(wavs_info)
all_frame.append(frame_wav())
For a pytorch module, I suppose I could use .named_children, .named_modules, etc. to obtain a list of the submodules. However, I suppose the list is not given in order, right? An example:
In [19]: import transformers
In [20]: model = transformers.DistilBertForSequenceClassification.from_pretrained('distilb
...: ert-base-cased')
In [21]: [name for name, _ in model.named_children()]
Out[21]: ['distilbert', 'pre_classifier', 'classifier', 'dropout']
The order of .named_children() in the above model is given as distilbert, pre_classifier, classifier, and dropout. However, if you examine the code, it is evident that dropout happens before classifier. So how do I get the order of these submodules?
In Pytorch, the results of print(model) or .named_children(), etc are listed based on the order they are declared in __init__ of the model's class e.g.
Case 1
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.fc1 = nn.Linear(320, 50)
self.fc2 = nn.Linear(50, 10)
self.conv2_drop = nn.Dropout2d()
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=0.6)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Model()
print(model)
[name for name, _ in model.named_children()]
# output
['conv1', 'conv2', 'fc1', 'fc2', 'conv2_drop']
Case 2
Changed order of fc1 and fc2 layers in constructor.
class Model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
self.fc2 = nn.Linear(50, 10)
self.fc1 = nn.Linear(320, 50)
self.conv2_drop = nn.Dropout2d()
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
x = F.dropout(x, p=0.6)
x = self.fc2(x)
return F.log_softmax(x, dim=1)
model = Model()
print(model)
[name for name, _ in model.named_children()]
# output
['conv1', 'conv2', 'fc2', 'fc1', 'conv2_drop']
That's why classifier is printed before dropout as it's declared so in constructor:
class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
...
self.distilbert = DistilBertModel(config)
self.pre_classifier = nn.Linear(config.dim, config.dim)
self.classifier = nn.Linear(config.dim, config.num_labels)
self.dropout = nn.Dropout(config.seq_classif_dropout)
Nevertheless, you can play with model's submodules using .modules(), etc. but they'll be listed only in the order they are declared in __init__. If you only want to print structure based on forward method, you may try using pytorch-summary.
I am trying to build a custom convolution using the method shown in pytorch unfold function
The custom convolution function is given below:
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import math
from torch.nn.modules.utils import _pair
class customConv(nn.Module):
def __init__(self, n_channels, out_channels, kernel_size, dilation=1, padding=0, stride=1, bias=True):
super(customConv, self).__init__()
self.kernel_size = _pair(kernel_size)
self.out_channels = out_channels
self.dilation = _pair(dilation)
self.padding = _pair(padding)
self.stride = _pair(stride)
self.n_channels = n_channels
self.weight = Parameter(torch.Tensor(self.out_channels, self.n_channels, self.kernel_size[0], self.kernel_size[1]))
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
else:
self.register_parameter('bias', None)
self.reset_parameters()
def reset_parameters(self):
n = self.n_channels
for k in self.kernel_size:
n *= k
stdv = 1. / math.sqrt(n)
self.weight.data.uniform_(-stdv, stdv)
if self.bias is not None:
self.bias.data.uniform_(-stdv, stdv)
def forward(self, input_):
hout = ((input_.shape[2] + 2 * self.padding[0] - self.dilation[0] * (self.kernel_size[0]-1)-1)//self.stride[0])+1
wout = ((input_.shape[3] + 2 * self.padding[1] - self.dilation[1] * (self.kernel_size[1]-1)-1)//self.stride[1])+1
inputUnfolded = F.unfold(input_, kernel_size=self.kernel_size, padding=self.padding, dilation=self.dilation, stride=self.stride)
if self.bias:
convolvedOutput = (inputUnfolded.transpose(1, 2).matmul(
self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2)) + self.bias.view(-1, 1)
else:
convolvedOutput = (inputUnfolded.transpose(1, 2).matmul(self.weight.view(self.weight.size(0), -1).t()).transpose(1, 2))
convolutionReconstruction = convolvedOutput.view(input_.shape[0], self.out_channels, hout, wout)
return convolutionReconstruction
But when I try comparing it with the pytorch implementation, I do not get the exact value. The code to check for difference is provided below
import torch
from torch import nn
from customConvolve import customConv
torch.manual_seed(1)
input = torch.randn (10,3,64,64)
conv1 = nn.Conv2d(input.shape[1],5, kernel_size=3, dilation=1, padding=1, stride=1 ,bias = False)
conv1_output = conv1(input)
conv2 = customConv(n_channels=input.shape[1], out_channels=5, kernel_size=3, dilation=1, stride =1, padding = 1, bias = False)
conv2_output = conv2(input)
print(torch.equal(conv1.weight.data, conv2.weight.data))
print(torch.equal(conv1_output, conv2_output))
I would like to know why the variation exists and how to solve this?
Thank you.