i have this model:
class model(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(in_channels=12,out_channels=64,kernel_size=3,stride= 1,padding=1)
# self.conv2 = nn.Conv2d(in_channels=64,out_channels=64,kernel_size=3,stride= 1,padding=1)
self.fc1 = nn.Linear(24576, 128)
self.bn = nn.BatchNorm1d(128)
self.dropout1 = nn.Dropout2d(0.5)
self.fc2 = nn.Linear(128, 10)
self.fc3 = nn.Linear(10, 3)
def forward(self, x):
x = F.relu(self.conv1(x))
# x = F.relu(self.conv2(x))
x = F.max_pool2d(x, (2,2))
# print(x.shape)
x = x.view(-1,24576)
x = self.bn(F.relu(self.fc1(x)))
x = self.dropout1(x)
embeding_stage = F.relu(self.fc2(x))
x = self.fc3(embeding_stage)
return x
and i want to save the embeding_stage layer like i save the model here:
model = model()
torch.save(model.state_dict(), 'C:\project\count_speakers\model_pytorch.h5')
thanks,
Ayal
I'm not sure I understand what you mean with "save the embedding_stage layer" but if you want to save fc2 or fc3 or something, then you can do that with torch.save().
Ex: to save fc3: torch.save(model.fc3),'C:\...\fc3.pt')
Edit:
Op wants to have the output of the embedding_stage.
You can do that in several ways:
load your model with model.load_state_dict(torch.load('C:\...\model_pytorch.h5'))
then model = nn.Sequential(*list(model.children())[:-1]). The output of model is the embeding_stage.
make a Model2(nn.Module), exactly the same as your first Model(), but replace return x in def forward(self, x): with return embeding_stage. Then load the state of your first model into your second model like this: model2.load_state_dict(torch.load('C:\...\model_pytorch.h5'))
Like this fc3 will be loaded, but not used. The output of model2(x) will be the embeding_stage.
Related
I need to save configuration file of the bert model with CNN. I will this file to visualize bert. so any suggestion to do that?
my model:
class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
config = BertConfig.from_pretrained('bert-base-uncased', output_hidden_states=True)
self.bert = BertModel.from_pretrained('bert-base-uncased',config=config)
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=3072, out_channels=256, kernel_size=5, stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.flat=nn.Flatten()
self.clf1 = nn.Linear(256,256)
self.clf2= nn.Linear(256,6)
def forward(self,inputs, mask , labels):
inputs=torch.tensor(inputs)
mask=torch.tensor(mask)
labels=torch.tensor(labels)
x = self.bert(input_ids=inputs,attention_mask=mask, return_dict= True)
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.flat(x)
x = self.clf1(x)
x = self.clf2(x)
return x
Note That: save_pretrained() function can't work with fine-tuned bert model with CNN
i am building a federated learning model, i would need to json encode my model so that i can exchange it with new workers i add to the network.
could you help me?
is it possible to encode a model in a json file so that it can be sent?
below i put my net class i tried with the class methods but of course i get the error:
object not serialisable in json.
thanks for your help
class net(nn.Module):
def __init__(self):
super( net, self).__init__()
self.c1 = nn.Conv2d(3, 32, 3)
self.b1 = nn.BatchNorm2d(32)
self.c2 = nn.Conv2d(32, 64, 3)
self.p1 = nn.MaxPool2d(2, 2)
self.c3 = nn.Conv2d(64, 128, 3)
self.d1 = nn.Dropout(0.5)
self.c4 = nn.Conv2d(128, 256, 5)
self.p2 = nn.MaxPool2d(2, 2)
self.c5 = nn.Conv2d(256, 128, 3)
self.p3 = nn.MaxPool2d(2, 2)
self.d2 = nn.Dropout(0.2)
self.l1 = nn.Linear(29*29*128, 128)
self.l2 = nn.Linear(128, 32)
self.l3 = nn.Linear(32 , 10)
self.weights_initialization()
def forward(self, x):
x = F.relu(self.c1(x))
x = self.b1(x)
x = F.relu(self.c2(x))
x = self.p1(x)
x = F.relu(self.c3(x))
x = self.d1(x)
x = F.relu(self.c4(x))
x = self.p2(x)
x = F.relu(self.c5(x))
x = self.p3(x)
x = self.d2(x)
x = x.view(x.size(0), -1)
x = F.relu(self.l1(x))
x = self.d2(x)
x = F.relu(self.l2(x))
x = self.l3(x)
def forward(self, x):
out = F.relu(self.fc1(x))
out = F.relu(self.fc2(out))
return self.output(out)
def weights_initialization(self):
for m in self.modules():
if isinstance(m, nn.Linear):
nn.init.xavier_normal_(m.weight)
nn.init.constant_(m.bias, 0)
model = net()
I have tried the classical methods but have not succeeded in any way in doing this encoding. I currently have no idea how to do this, can you help me?
class MixModel(nn.Module):
def __init__(self,pre_trained='bert-base-uncased'):
super().__init__()
config = BertConfig.from_pretrained('bert-base-uncased', output_hidden_states=True)
self.bert = BertModel.from_pretrained('bert-base-uncased',config=config)
self.hidden_size = self.bert.config.hidden_size
self.conv = nn.Conv1d(in_channels=3072, out_channels=256, kernel_size=5, stride=1)
self.relu = nn.ReLU()
self.pool = nn.MaxPool1d(kernel_size= 64- 5 + 1)
self.dropout = nn.Dropout(0.3)
self.flat=nn.Flatten()
self.clf1 = nn.Linear(256,256)
self.clf2= nn.Linear(256,6)
def forward(self,inputs, mask , labels):
inputs=torch.tensor(inputs)
mask=torch.tensor(mask)
labels=torch.tensor(labels)
x = self.bert(input_ids=inputs,attention_mask=mask, return_dict= True)
x = self.conv(x)
x = self.relu(x)
x = self.pool(x)
x = self.dropout(x)
x = self.flat(x)
x = self.clf1(x)
x = self.clf2(x)
return x
I want to save model,weights and config file for my model after training. after searching I found that model.save_pretrained function is good solution for me but I got an error that model called mixmodel has no function called save_pretrained
so how can I save config file for my model mixmodel?
I think that "state_dict" is what you need.
There's good tutorial for PyTorch in the documentation
https://pytorch.org/tutorials/beginner/saving_loading_models.html
I created a GAN for text and using VanillaGAN training approach, but the main problem is the sequences that are created are good, but the main problem is that when I use nn.sigmoid to see the discriminator labels it shows [0] for data that is created which are completely real, and it is not correct.
Here is my Discriminator code:
class Classifier(nn.Module):
def __init__(self, hidden_size, hidden_size2, dropout):
super().__init__()
self.FC1 = nn.Sequential(
nn.Linear(512, hidden_size),
nn.LeakyReLU(0.1),
nn.Dropout(dropout))
self.FC2 = nn.Sequential(
nn.Linear(hidden_size, hidden_size2),
nn.LeakyReLU(0.1),
nn.Dropout(dropout)
)
self.FC3 = nn.Linear(hidden_size2, 1)
self.dropout = nn.Dropout(dropout)
self.bach = nn.BatchNorm1d(512)
self.bach2 = nn.BatchNorm1d(hidden_size)
self.bach3 = nn.BatchNorm1d(64)
def forward(self, x):
z = self.dropout(x)
z = self.bach(z)
z = self.FC1(z)
z = self.bach2(z)
z = self.FC2(z)
z = self.bach3(z)
out = self.FC3(z)
return out
As input to this Classifier, hidden states of real and fake sequences are feed into this network.
My loss function is BCEWithlogits and this is the Train class
# to create real labels (1s)
def label_real(size):
data = torch.ones(size, 1)
return data.to(device)
# to create fake labels (0s)
def label_fake(size):
data = torch.zeros(size, 1)
return data.to(device)
# function to train the discriminator network
def train_discriminator(optimizer, data_real, data_fake):
b_size = data_real.size(1)
real_label = label_real(b_size)
fake_label = label_fake(b_size)
optimizer.zero_grad()
output_real = discriminator(data_real)
loss_real = criterion(output_real, real_label)
output_fake = discriminator(data_fake)
loss_fake = criterion(output_fake, fake_label)
loss_real.backward()
loss_fake.backward()
optimizer.step()
return loss_real + loss_fake
I use nn.sigmoid after training and on testing the model. Please help me to know what is wrong with my neural network?
I would like to implement a GRU able to encode a sequence of vectors to one vector (many-to-one), and then another GRU able to decode a vector to a sequence of vector (one-to-many). The size of the vectors wouldn't be changed. I would like to have an opinion about what I implemented.
Here is the code:
class AEGRU(nn.Module):
def __init__(self, opt):
super(AEGRU, self).__init__()
self.length = 256
self.latent_space = 256
self.num_layers = 1
self.GRU_enc = nn.GRU(input_size=3, hidden_size=self.latent_space, num_layers=self.num_layers, batch_first=True)
self.fc_enc = nn.Linear(self.latent_space, self.latent_space)
self.GRU_dec = nn.GRU(input_size=self.latent_space, hidden_size=3, num_layers=self.num_layers, batch_first=True)
self.fc_dec = nn.Linear(3, 3)
def enc(self, x):
# x has shape: Batch_size x self.length x 3
h0 = torch.zeros(self.num_layers, x.shape[0], self.latent_space).cuda()
out, _ = self.GRU_enc(x, h0)
out = out[:, -1, :]
out = self.fc_enc(out)
return out
def dec(self, x):
# x has shape: Batch_size x self.latent_space
x = x[:, None, :]
h = torch.zeros(self.num_layers, x.shape[0], 3).cuda()
# method 1 ??
'''outputs = torch.zeros(x.shape[0], self.length, 3).cuda()
for i in range(self.length):
out, h = self.GRU_dec(x, h)
outputs[:, i, :] = out[:, 0, :]'''
# method 2 ??
x = x.repeat(1, self.length, 1)
outputs, _ = self.GRU_dec(x, h)
# linear layer
outputs = self.fc_dec(outputs)
return outputs
def forward(self, x):
self.indices = []
latent = self.enc(x)
output = self.dec(latent)
return output
I am not sure whether this is the good way to do a one-to-many GRU. Could I have some opinions about this?
Thanks for reading!