How can I use this CNN for multi-classification? - conv-neural-network

I found an encoder-decoder-based CNN from Github that I want to use for multi-class classification and prediction. My expected input size is 224 x 224. How can I proceed with the following CNN, can anyone give me any idea, please? I want to use cross-entropy as a loss function. The original code is for segmentation with its own created functions, but I want to customize this for my use. So, Maybe I need to work differently.
This CNN applied Maxpooling and Unpooling, but why? Then in the fuse part added both max pool and unpool with 1x1 conv operation. What should I do, If I want to use it like VGG or ResNet models?
from torch import nn
import torch
import torch.nn.functional as F
def Conv3X3(in_, out):
return torch.nn.Conv2d(in_, out, 3, padding=1)
class ConvRelu(nn.Module):
def __init__(self, in_, out):
super().__init__()
self.conv = Conv3X3(in_, out)
self.activation = torch.nn.ReLU(inplace=True)
def forward(self, x):
x = self.conv(x)
x = self.activation(x)
return x
class Down(nn.Module):
def __init__(self, nn):
super(Down,self).__init__()
self.nn = nn
self.maxpool_with_argmax = torch.nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True)
def forward(self,inputs):
down = self.nn(inputs)
unpooled_shape = down.size()
outputs, indices = self.maxpool_with_argmax(down)
return outputs, down, indices, unpooled_shape
class Up(nn.Module):
def __init__(self, nn):
super().__init__()
self.nn = nn
self.unpool=torch.nn.MaxUnpool2d(2,2)
def forward(self,inputs,indices,output_shape):
outputs = self.unpool(inputs, indices=indices, output_size=output_shape)
outputs = self.nn(outputs)
return outputs
class Fuse(nn.Module):
def __init__(self, nn, scale):
super().__init__()
self.nn = nn
self.scale = scale
self.conv = Conv3X3(64,1)
def forward(self,down_inp,up_inp):
outputs = torch.cat([down_inp, up_inp], 1)
outputs = F.interpolate(outputs, scale_factor=self.scale, mode='bilinear')
outputs = self.nn(outputs)
return self.conv(outputs)
class DeepCrack(nn.Module):
def __init__(self, num_classes=1000):
super(DeepCrack, self).__init__()
self.down1 = Down(torch.nn.Sequential(
ConvRelu(3,64),
ConvRelu(64,64),
))
self.down2 = Down(torch.nn.Sequential(
ConvRelu(64,128),
ConvRelu(128,128),
))
self.down3 = Down(torch.nn.Sequential(
ConvRelu(128,256),
ConvRelu(256,256),
ConvRelu(256,256),
))
self.down4 = Down(torch.nn.Sequential(
ConvRelu(256, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.down5 = Down(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.up1 = Up(torch.nn.Sequential(
ConvRelu(64, 64),
ConvRelu(64, 64),
))
self.up2 = Up(torch.nn.Sequential(
ConvRelu(128, 128),
ConvRelu(128, 64),
))
self.up3 = Up(torch.nn.Sequential(
ConvRelu(256, 256),
ConvRelu(256, 256),
ConvRelu(256, 128),
))
self.up4 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 256),
))
self.up5 = Up(torch.nn.Sequential(
ConvRelu(512, 512),
ConvRelu(512, 512),
ConvRelu(512, 512),
))
self.fuse5 = Fuse(ConvRelu(512 + 512, 64), scale=16)
self.fuse4 = Fuse(ConvRelu(512 + 256, 64), scale=8)
self.fuse3 = Fuse(ConvRelu(256 + 128, 64), scale=4)
self.fuse2 = Fuse(ConvRelu(128 + 64, 64), scale=2)
self.fuse1 = Fuse(ConvRelu(64 + 64, 64), scale=1)
self.final = Conv3X3(5,1)
def forward(self,inputs):
# encoder part
out, down1, indices_1, unpool_shape1 = self.down1(inputs)
out, down2, indices_2, unpool_shape2 = self.down2(out)
out, down3, indices_3, unpool_shape3 = self.down3(out)
out, down4, indices_4, unpool_shape4 = self.down4(out)
out, down5, indices_5, unpool_shape5 = self.down5(out)
# decoder part
up5 = self.up5(out, indices=indices_5, output_shape=unpool_shape5)
up4 = self.up4(up5, indices=indices_4, output_shape=unpool_shape4)
up3 = self.up3(up4, indices=indices_3, output_shape=unpool_shape3)
up2 = self.up2(up3, indices=indices_2, output_shape=unpool_shape2)
up1 = self.up1(up2, indices=indices_1, output_shape=unpool_shape1)
fuse5 = self.fuse5(down_inp=down5,up_inp=up5)
fuse4 = self.fuse4(down_inp=down4, up_inp=up4)
fuse3 = self.fuse3(down_inp=down3, up_inp=up3)
fuse2 = self.fuse2(down_inp=down2, up_inp=up2)
fuse1 = self.fuse1(down_inp=down1, up_inp=up1)
output = self.final(torch.cat([fuse5,fuse4,fuse3,fuse2,fuse1],1))
return output, fuse5, fuse4, fuse3, fuse2, fuse1
if __name__ == '__main__':
inp = torch.randn((1,3,512,512))
model = DeepCrack()
out = model(inp)

Related

Mobilenet as feature backbone to use Resnet18 pretrained model using Pytorch

I have a resnet18 pretrained model, now I want to change as feature backbone into MobileNet using pytorch , please suggest any optimal way is available or not to implement this.
In the below code I want to use backbone mobilenet instead of resnet as feature extraction
#=====
import torch
from model.backbone import resnet
import numpy as np
class conv_bn_relu(torch.nn.Module):
def init(self,in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1,bias=False):
super(conv_bn_relu,self).init()
self.conv = torch.nn.Conv2d(in_channels,out_channels, kernel_size,
stride = stride, padding = padding, dilation = dilation,bias = bias)
self.bn = torch.nn.BatchNorm2d(out_channels)
self.relu = torch.nn.ReLU()
def forward(self,x):
x = self.conv(x)
x = self.bn(x)
x = self.relu(x)
return x
class parsingNet(torch.nn.Module):
def init(self, size=(288, 800), pretrained=True, backbone='50', cls_dim=(37, 10, 4), use_aux=False):
super(parsingNet, self).init()
self.size = size
self.w = size[0]
self.h = size[1]
self.cls_dim = cls_dim # (num_gridding, num_cls_per_lane, num_of_lanes)
# num_cls_per_lane is the number of row anchors
self.use_aux = use_aux
self.total_dim = np.prod(cls_dim)
# input : nchw,
# output: (w+1) * sample_rows * 4
self.model = resnet(backbone, pretrained=pretrained)
if self.use_aux:
self.aux_header2 = torch.nn.Sequential(
conv_bn_relu(128, 128, kernel_size=3, stride=1, padding=1) if backbone in ['34','18'] else conv_bn_relu(512, 128, kernel_size=3, stride=1, padding=1),
conv_bn_relu(128,128,3,padding=1),
conv_bn_relu(128,128,3,padding=1),
conv_bn_relu(128,128,3,padding=1),
)
self.aux_header3 = torch.nn.Sequential(
conv_bn_relu(256, 128, kernel_size=3, stride=1, padding=1) if backbone in ['34','18'] else conv_bn_relu(1024, 128, kernel_size=3, stride=1, padding=1),
conv_bn_relu(128,128,3,padding=1),
conv_bn_relu(128,128,3,padding=1),
)
self.aux_header4 = torch.nn.Sequential(
conv_bn_relu(512, 128, kernel_size=3, stride=1, padding=1) if backbone in ['34','18'] else conv_bn_relu(2048, 128, kernel_size=3, stride=1, padding=1),
conv_bn_relu(128,128,3,padding=1),
)
self.aux_combine = torch.nn.Sequential(
conv_bn_relu(384, 256, 3,padding=2,dilation=2),
conv_bn_relu(256, 128, 3,padding=2,dilation=2),
conv_bn_relu(128, 128, 3,padding=2,dilation=2),
conv_bn_relu(128, 128, 3,padding=4,dilation=4),
torch.nn.Conv2d(128, cls_dim[-1] + 1,1)
# output : n, num_of_lanes+1, h, w
)
initialize_weights(self.aux_header2,self.aux_header3,self.aux_header4,self.aux_combine)
self.cls = torch.nn.Sequential(
torch.nn.Linear(1800, 2048),
torch.nn.ReLU(),
torch.nn.Linear(2048, self.total_dim),
)
self.pool = torch.nn.Conv2d(512,8,1) if backbone in ['34','18'] else torch.nn.Conv2d(2048,8,1)
# 1/32,2048 channel
# 288,800 -> 9,40,2048
# (w+1) * sample_rows * 4
# 37 * 10 * 4
initialize_weights(self.cls)
def forward(self, x):
# n c h w - > n 2048 sh sw
# -> n 2048
x2,x3,fea = self.model(x)
if self.use_aux:
x2 = self.aux_header2(x2)
x3 = self.aux_header3(x3)
x3 = torch.nn.functional.interpolate(x3,scale_factor = 2,mode='bilinear')
x4 = self.aux_header4(fea)
x4 = torch.nn.functional.interpolate(x4,scale_factor = 4,mode='bilinear')
aux_seg = torch.cat([x2,x3,x4],dim=1)
aux_seg = self.aux_combine(aux_seg)
else:
aux_seg = None
fea = self.pool(fea).view(-1, 1800)
group_cls = self.cls(fea).view(-1, *self.cls_dim)
if self.use_aux:
return group_cls, aux_seg
return group_cls
def initialize_weights(*models):
for model in models:
real_init_weights(model)
def real_init_weights(m):
if isinstance(m, list):
for mini_m in m:
real_init_weights(mini_m)
else:
if isinstance(m, torch.nn.Conv2d):
torch.nn.init.kaiming_normal_(m.weight, nonlinearity='relu')
if m.bias is not None:
torch.nn.init.constant_(m.bias, 0)
elif isinstance(m, torch.nn.Linear):
m.weight.data.normal_(0.0, std=0.01)
elif isinstance(m, torch.nn.BatchNorm2d):
torch.nn.init.constant_(m.weight, 1)
torch.nn.init.constant_(m.bias, 0)
elif isinstance(m,torch.nn.Module):
for mini_m in m.children():
real_init_weights(mini_m)
else:
print('unkonwn module', m)
#======

pytorch Error: module 'torch.nn' has no attribute 'ReLu'

i am working in google colab, so i assume its the current version of pytorch.
I tried this:
class Fc(nn.Module):
def __init__(self):
super(Fc, self).__init__()
self.flatt = nn.Flatten()
self.seq = nn.Sequential(nn.Linear(28*28, 512),
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLu(),
nn.Linear(512, 10), nn.ReLu())
def forward(x):
p = self.flatt(x)
p = self.seq(p)
return p
m1 = Fc()
and got:
<ipython-input-85-142a1e77b6b6> in <module>()
----> 1 m1 = Fc()
<ipython-input-84-09df3be0b613> in __init__(self)
4 self.flatt = nn.Flatten()
5 self.relu = torch.nn.modules.activation.ReLU()
----> 6 self.seq = nn.Sequential(nn.Linear(28*28, 1012), nn.ReLU(),
nn.Linear(1012, 512), nn.ReLu(), nn.Linear(512, 10), nn.ReLu())
AttributeError: module 'torch.nn' has no attribute 'ReLu'
What I am doing wrong here?
You got a typo regarding casing. It's called ReLU not ReLu.
import torch.nn as nn
class Fc(nn.Module):
def __init__(self):
super(Fc, self).__init__()
self.flatt = nn.Flatten()
self.seq = nn.Sequential(nn.Linear(28*28, 512),
# TODO: Adjust here
nn.ReLU(),
nn.Linear(512, 512),
nn.ReLU(),
# TODO: Adjust here
nn.Linear(512, 10), nn.ReLU())
def forward(x):
p = self.flatt(x)
p = self.seq(p)
return p
m1 = Fc()

How can I connect between layers in two network by python and pytorch?

For speaker verification from speech, I want to impellent bellow figure. I have some question please guide me.
First I am defining a class for first 4 layers (Code model #1). Then I am using ECAPA-TDNN code from github (code model #2 – link: https://github.com/TaoRuijie/ECAPA-TDNN ). I don’t know how can I connect between two models 1 and 2 or in the other hand, how can I impelement bellow figure?
Model #1 : The first 4 layer code:
import torch.nn as nn
class NeuralNetwork(nn.Module):
def __init__(self, num_class):
super(NeuralNetwork, self).__init__()
self.conv1 = nn.Sequential(nn.Conv2d(1, 80, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
self.conv2 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv3 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv4 = nn.Sequential(nn.Conv2d(128, 40, T),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=1))
self.conv5 = nn.Sequential(nn.Conv2d(128, 20, T = flatten),
BatchNorm2d(4),
ReLU(inplace=True),
MaxPool2d(kernel_size=2, stride=2))
Model #2 : ECAPA-TDNN form git-hub:
import math, torch, torchaudio
import torch.nn as nn
import torch.nn.functional as F
class ECAPA_TDNN(nn.Module):
def __init__(self, C):
super(ECAPA_TDNN, self).__init__()
self.torchfbank = torch.nn.Sequential(
PreEmphasis(),
torchaudio.transforms.MelSpectrogram(sample_rate=16000, n_fft=512, win_length=400, hop_length=160, \
f_min = 20, f_max = 7600, window_fn=torch.hamming_window, n_mels=80),
)
self.specaug = FbankAug() # Spec augmentation
self.conv1 = nn.Conv1d(80, C, kernel_size=5, stride=1, padding=2)
self.relu = nn.ReLU()
self.bn1 = nn.BatchNorm1d(C)
self.layer1 = Bottle2neck(C, C, kernel_size=3, dilation=2, scale=8)
self.layer2 = Bottle2neck(C, C, kernel_size=3, dilation=3, scale=8)
self.layer3 = Bottle2neck(C, C, kernel_size=3, dilation=4, scale=8)
# I fixed the shape of the output from MFA layer, that is close to the setting from ECAPA paper.
self.layer4 = nn.Conv1d(3*C, 1536, kernel_size=1)
self.attention = nn.Sequential(
nn.Conv1d(4608, 256, kernel_size=1),
nn.ReLU(),
nn.BatchNorm1d(256),
nn.Tanh(), # I add this layer
nn.Conv1d(256, 1536, kernel_size=1),
nn.Softmax(dim=2),
)
self.bn5 = nn.BatchNorm1d(3072)
self.fc6 = nn.Linear(3072, 192)
self.bn6 = nn.BatchNorm1d(192)
def forward(self, x, aug):
with torch.no_grad():
x = self.torchfbank(x)+1e-6
x = x.log()
x = x - torch.mean(x, dim=-1, keepdim=True)
if aug == True:
x = self.specaug(x)
x = self.conv1(x)
x = self.relu(x)
x = self.bn1(x)
x1 = self.layer1(x)
x2 = self.layer2(x+x1)
x3 = self.layer3(x+x1+x2)
x = self.layer4(torch.cat((x1,x2,x3),dim=1))
x = self.relu(x)
t = x.size()[-1]
global_x = torch.cat((x,torch.mean(x,dim=2,keepdim=True).repeat(1,1,t), torch.sqrt(torch.var(x,dim=2,keepdim=True).clamp(min=1e-4)).repeat(1,1,t)), dim=1)
w = self.attention(global_x)
mu = torch.sum(x * w, dim=2)
sg = torch.sqrt( ( torch.sum((x**2) * w, dim=2) - mu**2 ).clamp(min=1e-4) )
x = torch.cat((mu,sg),1)
x = self.bn5(x)
x = self.fc6(x)
x = self.bn6(x)
return x
2. I am using log Mel filterbank energy for feature extraction. My dimension isn’t 80. How can I define 80 for dimension?(by batch-size? How?)
3. C is constant (128) but how can I calculate T? for one speech signal I used bellow code But for all speech in my dataset, second code didn’t work. Or T is another thing?
# Number of Channel
ch = file_info.channels('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (ch)
# Number of Frame in One Audio File
frame_one_wav = audiosegment.from_file('/home/narges/Project-SV-1400.11.20/S.V-Code/dataset/train/id10292/FXxcN18rX7c/00001.wav')
print (frame_one_wav)
# All Frame
all_frame = []
for i in range (number_of_files_train):
wavs_info = rate_train, sig_train = wavfile.read(wavs_train[i])
frame_wav = audiosegment.from_file(wavs_info)
all_frame.append(frame_wav())

TypeError: can't multiply sequence by non-int of type 'tuple' while using summary() for pytorch model

As I am designing a multimodal architecture, with a 3dcnn and 2dcnn, so i want to view the model architecture like in Keras. The input size are as in the picture, and i kept getting this error when i try to print the structure, but no issue while running it for training and model evaluation.
class CombineModel(nn.Module): #A multimodal network(3D and 2D CNN), with multioutputs (2 outputs)
def __init__(self, num_classes):
super(CombineModel, self).__init__()
self.CNN3D = CNN3DModel() #output size is (512,1)
self.Unet = UNet11() #output size is (512,1)
self.fc1 = nn.Linear(1024, 512)
self.fc2 = nn.Linear(512, num_classes)
self.fc3 = nn.Linear(512, 1)
self.relu = nn.ReLU()
self.flatten = nn.Flatten()
self.softmax = nn.Softmax()
def forward(self,x1,x2):
out1 = self.CNN3D(x1)
out2 = self.Unet(x2)
outa = torch.cat((out1, out2), dim=1)
outa = self.fc1(outa)
outa = self.relu(outa)
outa = self.softmax(self.fc2(outa))
outb = self.fc3(self.relu(out2))
return outa, outb

Change input shape dimensions for ResNet model (pytorch)

I want to feed my 3,320,320 pictures in an existing ResNet model. The model actually expects input of size 3,32,32. As I am afraid of loosing information I don't simply want to resize my pictures.
What is the best way to preprocess my images, so that they are able to run on the ResNet34?
Should I add additional layers in the forward method of ResNet? If yes, what would be a suitable combination in my case?
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorch_fitmodule import FitModule
from torch.autograd import Variable
import numpy as np
def conv3x3(in_planes, out_planes, stride=1):
return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False)
class BasicBlock(FitModule):
expansion = 1
def __init__(self, in_planes, planes, stride=1):
super(BasicBlock, self).__init__()
self.conv1 = conv3x3(in_planes, planes, stride)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = conv3x3(planes, planes)
self.bn2 = nn.BatchNorm2d(planes)
self.shortcut = nn.Sequential()
if stride != 1 or in_planes != self.expansion * planes:
self.shortcut = nn.Sequential(
nn.Conv2d(in_planes, self.expansion * planes,
kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(self.expansion * planes)
)
def forward(self, x):
out = F.relu(self.bn1(self.conv1(x)))
out = self.bn2(self.conv2(out))
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet(FitModule):
def __init__(self, block, num_blocks, num_classes=10):
super(ResNet, self).__init__()
self.in_planes = 64
self.conv1 = conv3x3(3, 64)
self.bn1 = nn.BatchNorm2d(64)
self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
self.linear = nn.Linear(512 * block.expansion, num_classes)
def _make_layer(self, block, planes, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1)
layers = []
for stride in strides:
layers.append(block(self.in_planes, planes, stride))
self.in_planes = planes * block.expansion
return nn.Sequential(*layers)
def forward(self, x): # add additional layers here?
x = x.float()
out = F.relu(self.bn1(self.conv1(x).float()).float())
out = self.layer1(out)
out = self.layer2(out)
out = self.layer3(out)
out = self.layer4(out)
out = F.avg_pool2d(out, 4)
out = out.view(out.size(0), -1)
out = self.linear(out)
return out
def ResNet34():
return ResNet(BasicBlock, [3, 4, 6, 3])
Thanks plenty!
Regards,
Fabian
If you change your avg_pool operation to 'AdaptiveAvgPool2d' your model will work for any image size.
However with your current setup, your 320x320 images would be 40x40 going into the pooling stage, which is a large feature map to pool over. Consider adding more conv layers.

Resources