GRU included CNN for text Generator - conv-neural-network

I try to integrate CNN to GRU. My model gets the image through CNN. The features from CNN will pass to the GRU frame by frame. The structure is shown in the picture.
This is my example code that implemented follows the above structure.
encoder :
### input Image size [batch,seq,colorch,hight,weight]
### expext output
class CNNencoder(nn.Module):
def __init__(self, input_size, hidden_size,batch_size =5):
super(CNNencoder, self).__init__()
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
self.hidden_size = hidden_size
self.modelVGG = models.vgg11(pretrained = False)
self.modelVGG = self.modelVGG.to(self.device)
self.adaptor = nn.Linear(8192, self.hidden_size)
self.adaptor = self.adaptor.to(self.device)
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True, bidirectional=False) ## (inputSize, hidden_size)
self.gru = self.gru.to(self.device)
self.batch_size = batch_size
def forward(self, input, hidden):
seqs = input.size()[1]
for indexseq in range(0, seqs):
inputImageBatch = input[:, indexseq,:,:,:].view(-1,3,128,128)
features = self.modelVGG.features(inputImageBatch)
flat_features = features.view(features.size(0), 1,-1) # flatten
if indexseq == 0:
output = flat_features
else:
output = torch.cat((output, flat_features), dim=1)
# output = flat_features ## expected [batch,seq,features]
outputAdaptor = self.adaptor(output)
outputGru, hidden = self.gru(outputAdaptor, hidden)
return outputGru, hidden
def initHidden(self):
return torch.zeros(1, self.batch_size, self.hidden_size, device=self.device)
I would like to know. How CNN parameter will get gradient through time?
If I create a model from this class and execute loss.backword().
loss values get from decoder section.

Related

How to use pipeline for Custom token-classification model

Model description
I add simple custom pytorch-crf layer on top of TokenClassification model. It will make the model more robust.
I train the model successfully but when I test the mode. The folder doesn't have config.json file inside it. So the pipeline function gives error as
Error:AttributeError: 'BERT_CRF' object has no attribute 'config'
CODE
class BERT_CRF(nn.Module):
def __init__(self, bert_model, num_labels):
super(BERT_CRF, self).__init__()
self.bert = bert_model
self.dropout = nn.Dropout(0.25)
self.classifier = nn.Linear(768, num_labels)
self.crf = CRF(num_labels, batch_first = True)
def forward(self, input_ids, attention_mask, labels=None, token_type_ids=None):
outputs = self.bert(input_ids, attention_mask=attention_mask)
sequence_output = torch.stack((outputs[1][-1], outputs[1][-2], outputs[1][-3], outputs[1][-4])).mean(dim=0)
sequence_output = self.dropout(sequence_output)
emission = self.classifier(sequence_output) # [32,256,17]
if labels is not None:
labels=labels.reshape(attention_mask.size()[0],attention_mask.size()[1])
loss = -self.crf(log_soft(emission, 2), labels, mask=attention_mask.type(torch.uint8), reduction='mean')
prediction = self.crf.decode(emission, mask=attention_mask.type(torch.uint8))
return [loss, prediction]
else:
prediction = self.crf.decode(emission, mask=attention_mask.type(torch.uint8))
return prediction
tokenizer = AutoTokenizer.from_pretrained("fine-tuned_model",model_max_length=256)
bert_model = BertForTokenClassification.from_pretrained('spanbert_base',id2label=id2label,label2id=label2id)
bert_model.config.output_hidden_states=True
model = BERT_CRF(bert_model, num_labels=21)
model.load_state_dict(torch.load("fine-tuned_model/pytorch_model.bin"))
model.eval()
token_classifier = pipeline("token-classification", model=model, aggregation_strategy="max",tokenizer=tokenizer,grouped_entities=True)
AttributeError: 'BERT_CRF' object has no attribute 'config'

Simple RNN Error "Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu" How to?

I'm working on a basic RNN-NLP classifier using PyTorch, and trying to use CUDA for acceleration.(On Google_Colab)
but, I can't solve this error.
The code is written like this.
error message
Input and hidden tensors are not at the same device, found input tensor at cuda:0 and hidden tensor at cpu
RNN class
class RNN(nn.Module):
def __init__(self, vocab_size, emb_size, hidden_size, output_size):
super().__init__()
self.hidden_size = hidden_size
self.emb = nn.Embedding(vocab_size, emb_size)
self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh', batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
def forward(self, x):
self.batch_size = x.size()[0]
hidden = self.init_hidden()
emb = self.emb(x)
out, hidden = self.rnn(emb, hidden)
out = self.fc(out[:, -1, :])
return out
def init_hidden(self):
hidden = torch.zeros(1, self.batch_size, self.hidden_size)
return hidden
device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
Setting var
VOCAB_SIZE = len(word_id.keys()) +1
EMB_SIZE = 300
OUTPUT_SIZE = 4
HIDDEN_SIZE = 50
model = RNN(VOCAB_SIZE,EMB_SIZE, HIDDEN_SIZE, OUTPUT_SIZE)
model = model.to(device)
Predict
for i in range(10):
# datasetet の、リスト0indexに入力要素
X, y = dataset_train[i]
X = X.to(device)
print(torch.softmax(model(X.unsqueeze(0)), dim=1))
This code works on CPU. but, can't works on "GPU".
Follow this error, I try to make some fix code.
ex) hidden.to(device),,,,
but,I can't solve...
Pleas someone tell me how to solve.
Thank you very much for my question.
Doesn't doing something like the following work?
device = torch.device("cuda" if torch.cuda.is_available() else "CPU")
class RNN(nn.Module):
def __init__(self, vocab_size, emb_size, hidden_size, output_size):
super().__init__()
self.hidden_size = hidden_size
self.emb = nn.Embedding(vocab_size, emb_size)
self.rnn = nn.RNN(emb_size, hidden_size, nonlinearity='tanh', batch_first=True)
self.fc = nn.Linear(hidden_size, output_size)
self.to(device)
def forward(self, x):
self.batch_size = x.size()[0]
hidden = self.init_hidden()
emb = self.emb(x)
out, hidden = self.rnn(emb, hidden)
out = self.fc(out[:, -1, :])
return out
def init_hidden(self):
hidden = torch.zeros(1, self.batch_size, self.hidden_size).to(device)
return hidden

token_type_ids error in transformers.BertForTokenClassification (HuggingFace)

I'm facing an error with token_type_ids in my training function that uses BertForTokenClassification in HuggingFace.
class BERTClassification(nn.Module):
def __init__(self):
super(BERTClassification, self).__init__()
self.encoder = BertForTokenClassification.from_pretrained(config.pretrainDIR, local_files_only=True, num_labels=2)
def forward(self, input_ids, labels, token_type_ids=None, attention_mask=None):
output= self.encoder(input_ids = input_ids, labels = labels, token_type_ids = token_type_ids, attention_mask = attention_mask)
return output
class Trainer():
def __init__(self, model, data_loader, optimizer, device, scheduler):
self.model = model
self.data_loader = data_loader
self.optimizer = optimizer
self.device = device
self.scheduler = scheduler
def train_func(self):
self.model.to(self.device)
self.model.train()
for bi, d in tqdm(enumerate(self.data_loader), total=len(self.data_loader)):
ids = d["input_ids"]
targets = d["labels"]
type_ids = d['type_ids']
attention_mask = d['attention_mask']
ids = ids.to(self.device)
targets = targets.to(self.device)
type_ids = type_ids.to(self.device)
attention_mask = attention_mask.to(self.device)
self.optimizer.zero_grad()
output = self.model(
input_ids = ids,
labels = targets,
token_type_ids = type_ids,
attention_mask = attention_mask
)
where self.model is called from a pretrained BERTClassification module.
I get an error only when I include token_type_ids in self.model. Otherwise, the code runs just fine.
Error messages I get using CUDA is:
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling `cublasCreate(handle)`
Error messages I get using cpu is:
index out of range in self
I'm confused because the shape of token_type_ids is equivalent to the shape of ids, targets, and attention_mask.

Different training result obtained from training simple LSTM in Keras and Pytorch

I’m trying to implement my LSTM model from Keras to Pytorch, but the results in Pytorch seem really bad at the moment. The network is really simple as below.
model = Sequential()
model.add(LSTM(10, input_length=shape[1], input_dim=shape[2]))
# output shape: (1, 1)
model.add(Dense(10,activation="tanh"))
model.add(Dense(10,activation="tanh"))
model.add(Dense(10,activation="tanh"))
model.add(Dense(10,activation="tanh"))
model.add(Dense(1,activation="linear"))
model.compile(loss="mse", optimizer="adam")
model.summary()
And I migrate it to the Pytorch framework,
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim,bilstm=False):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.isBi = bilstm
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True,bidirectional=bilstm).double()
# for name, param in self.lstm.named_parameters():
# if name.startswith("weight"):
# nn.init.orthogonal_(param)
# else:
# pass
self.fc1 = nn.Sequential(nn.Linear(hidden_dim, 10).double(),nn.Tanh())
self.final_layer1 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer2 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer3 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer4 = nn.Sequential(nn.Linear(10,output_dim).double())
def forward(self, x):
out, (hn, cn) = self.lstm(x)
out = out[:, -1, :]
out = self.fc1(out)
out = self.final_layer1(out)
out = self.final_layer2(out)
out = self.final_layer3(out)
out = self.final_layer4(out)
return out
The result is really bad. I was wondering if the initializing methods/activation functions used in Keras are different from the one I used in Pytorch(Keras seems to be using hard_sigmoid where Pytorch uses sigmoid?).
Would really appreciate it if somebody could help me with this problem!
UPDATED
My training code in Pytorch.
criterion = nn.MSELoss()
model = LSTM(input_dim,hidden_dim,num_layers,output_dim,bilstm)
model = model.cuda()
optimizer = optim.Adam(model.parameters(),lr=0.001)
for epoch in range(1,epoch_number+1):
model.train()
iteration = 0
for i,data in enumerate(train_loader):
dat, label = data
dat = dat.double()
label = label.double()
if torch.cuda.is_available():
dat = dat.cuda()
label = label.cuda()
else:
dat = Variable(dat)
label = Variable(label)
out = model(dat)
optimizer.zero_grad()
loss = criterion(out, label)
loss.backward()
optimizer.step()

PyTorch LSTMCell Teacher Forcing

I'm fairly new to PyTorch and I'm trying to design an 18 node LSTM using LSTMCell with Teacher Forcing. I have quite a few difficulties.
Here's my model:
class tryLSTM(nn.moduleList):
def __init__(self, input_size, hidden_size, batch_size):
super(tryLSTM, self).__init__()
self.input_size = input_size
self.hidden_size = hidden_size
self.batch_size = batch_size
self.lstm0 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm1 = nn.LSTMCell(input_size, hidden_size, bias=True)
self.lstm2 = nn.LSTMCell(input_size, hidden_size, bias=True)
.........
self.lstm17 = nn.LSTMCell(input_size, hidden_size, bias=True)
def init_hidden(self):
# initialize the hidden state and the cell state to zeros
hidden = torch.zeros(self.batch_size, self.hidden_size)
cell = torch.zeros(self.batch_size, self.hidden_size)
return hidden, cell
def forward(self, x, hc):
out = []
h_0, c_0 = hc
h_1, c_1 = self.lstm1(x[0], h_0, c_0)
out[0] = h_1
h_2, c_2 = self.lstm2(x[1], h_1, c_1)
out[1] = h_2
......
h_17, c_17 = self.lstm17(x[16], h_16, c_16)
out[16] = h_17
model = tryLSTM(input_size=128, hidden_size=128, batch_size=18)
if gpu: model.cuda()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
criterion = nn.BCELoss(weight=None, reduction='mean')
here's the training loop:
def train(epoch):
model.train()
# initialize hidden and cell state
hc = model.init_hidden()
for batch_idx, (data, target) in enumerate(train_loader):
# Zero out the gradients
optimizer.zero_grad()
target = data[1:]
print(target.size())
# Put data on GPU
if gpu:
data = data.cuda()
target = target.cuda()
# Get outputs of LSTM
output = model(data, hc)
print(output.size)
# Calculate loss
loss = criterion(output, target)
# Calculate gradients
loss.backward()
# Update model parameters
optimizer.step()
train_loss.append(loss.item())
Q.1 I'm getting the following error:
TypeError: forward() takes from 2 to 3 positional arguments but 4 were given
Please help, Thanks!

Resources