Loading PyTorch Lightning Trained checkpoint - python-3.x

I am using PyTorch Lightning version 1.4.0 and have defined the following class for the dataset:
class CustomTrainDataset(Dataset):
'''
Custom PyTorch Dataset for training
Args:
data (pd.DataFrame) - DF containing product info (and maybe also ratings)
all_itemIds (list) - Python3 list containing all Item IDs
'''
def __init__(self, data, all_orderIds):
self.users, self.items, self.labels = self.get_dataset(data, all_orderIds)
def __len__(self):
return len(self.users)
def __getitem__(self, idx):
return self.users[idx], self.items[idx], self.labels[idx]
def get_dataset(self, data, all_orderIds):
users, items, labels = [], [], []
user_item_set = set(zip(train_ratings['CustomerID'], train_ratings['ItemCode']))
num_negatives = 7
for u, i in user_item_set:
users.append(u)
items.append(i)
labels.append(1)
for _ in range(num_negatives):
negative_item = np.random.choice(all_itemIds)
while (u, negative_item) in user_item_set:
negative_item = np.random.choice(all_itemIds)
users.append(u)
items.append(negative_item)
labels.append(0)
return torch.tensor(users), torch.tensor(items), torch.tensor(labels)
followed by the PL class:
class NCF(pl.LightningModule):
'''
Neural Collaborative Filtering (NCF)
Args:
num_users (int): Number of unique users
num_items (int): Number of unique items
data (pd.DataFrame): Dataframe containing the food ratings for training
all_orderIds (list): List containing all orderIds (train + test)
'''
def __init__(self, num_users, num_items, data, all_itemIds):
# def __init__(self, num_users, num_items, ratings, all_movieIds):
super().__init__()
self.user_embedding = nn.Embedding(num_embeddings = num_users, embedding_dim = 8)
# self.user_embedding = nn.Embedding(num_embeddings = num_users, embedding_dim = 10)
self.item_embedding = nn.Embedding(num_embeddings = num_items, embedding_dim = 8)
# self.item_embedding = nn.Embedding(num_embeddings = num_items, embedding_dim = 10)
self.fc1 = nn.Linear(in_features = 16, out_features = 64)
# self.fc1 = nn.Linear(in_features = 20, out_features = 64)
self.fc2 = nn.Linear(in_features = 64, out_features = 64)
self.fc3 = nn.Linear(in_features = 64, out_features = 32)
self.output = nn.Linear(in_features = 32, out_features = 1)
self.data = data
# self.ratings = ratings
# self.all_movieIds = all_movieIds
self.all_orderIds = all_orderIds
def forward(self, user_input, item_input):
# Pass through embedding layers
user_embedded = self.user_embedding(user_input)
item_embedded = self.item_embedding(item_input)
# Concat the two embedding layers
vector = torch.cat([user_embedded, item_embedded], dim = -1)
# Pass through dense layer
vector = nn.ReLU()(self.fc1(vector))
vector = nn.ReLU()(self.fc2(vector))
vector = nn.ReLU()(self.fc3(vector))
# Output layer
pred = nn.Sigmoid()(self.output(vector))
return pred
def training_step(self, batch, batch_idx):
user_input, item_input, labels = batch
predicted_labels = self(user_input, item_input)
loss = nn.BCELoss()(predicted_labels, labels.view(-1, 1).float())
return loss
def configure_optimizers(self):
return torch.optim.Adam(self.parameters())
def train_dataloader(self):
return DataLoader(
ChupsTrainDataset(
self.data, self.all_orderIds
),
batch_size = 32, num_workers = 2
# Google Colab's suggested max number of worker in current
# system is 2 and not 4.
)
print(f"num_users = {num_users}, num_items = {num_items} & all_itemIds = {len(all_itemIds)}")
# num_users = 12958, num_items = 511238 & all_itemIds = 9114
# Initialize NCF model-
model = NCF(num_users, num_items, train_ratings, all_itemIds)
trainer = pl.Trainer(
max_epochs = 75, gpus = 1,
# max_epochs = 5,
reload_dataloaders_every_n_epochs = True,
# reload_dataloaders_every_epoch = True, # deprecated!
progress_bar_refresh_rate = 50,
logger = False, checkpoint_callback = False)
trainer.fit(model)
# Save trained model as a checkpoint-
trainer.save_checkpoint("NCF_Trained.ckpt")
To load the saved checkpoint, I have tried:
trained_model = NCF.load_from_checkpoint(
"NCF_Trained.ckpt", num_users = num_users,
num_items = train_ratings, data = train_ratings,
all_itemIds = all_itemIds)
trained_model = NCF(num_users, num_items, train_ratings, all_orderIds).load_from_checkpoint(checkpoint_path = "NCF_Trained.ckpt")
But these don't seem to work. How do I load this saved checkpoint?
Thanks!

add a line in your init method:
self.save_hyperparameters(logger=False)
Then call
trained_model = NCF.load_from_checkpoint("NCF_Trained.ckpt")

As shown in here, load_from_checkpoint is a primary way to load weights in pytorch-lightning and it automatically load hyperparameter used in training. So you do not need to pass params except for overwriting existing ones. My suggestion is to try trained_model = NCF.load_from_checkpoint("NCF_Trained.ckpt")

In my case it was crucial to set the model into the evaluation mode via model.eval(). Otherwise it would produce wrong results.

Related

how can I predict only 5 days price in this LSTM model (by pytorch)

class StockDataset(Dataset):
#데이터 셋은 i번째 레코드 값을 주는 역할 수행
def __init__(self, symbol, x_frames, y_frames, start, end):
self.symbol = symbol
self.x_frames = x_frames
self.y_frames = y_frames
self.start = datetime.datetime(*start)
self.end = datetime.datetime(*end)
#위에서 지정했던 데이터, 날짜 값들 다 받아옴
self.data = pdr.DataReader(self.symbol, 'yahoo', self.start, self.end)
def __len__(self):
return len(self.data) - (self.x_frames + self.y_frames) + 1
def __getitem__(self, idx):
global data
#global data_set
#데이터셋 i번째 값 입력받았을때 그걸 출력해줌 데이터를 '리스트'화 하는 것
idx += self.x_frames
data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
data = data[['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']]
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1)) #로그수익률 변환 한뒤, 혹시모를 결측값 위해 1더해줌
global x_ex
global y_ex
x_ex= data[:self.x_frames]
y_ex= data[self.x_frames:]
data = data.values #numpy array로 변환한거
X = data[:self.x_frames]
y = data[self.x_frames:]
return X, y
This one is dataset
class LSTM(nn.Module):
#50분 이후부터 모델설명
def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.batch_size = batch_size
self.dropout = dropout
self.use_bn = use_bn
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
self.hidden = self.init_hidden()
self.regressor = self.make_regressor()
def init_hidden(self):
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def make_regressor(self):
layers = []
if self.use_bn:
layers.append(nn.BatchNorm1d(self.hidden_dim))
layers.append(nn.Dropout(self.dropout))
layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
layers.append(nn.ReLU())
layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
regressor = nn.Sequential(*layers)
return regressor
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
return y_pred
This one is model
def test(model, partition, args):
global y_true
global y_pred
global X
testloader = DataLoader(partition['test'],
batch_size=args.batch_size,
shuffle=False, drop_last=True)
model.eval()
test_acc = 0.0
with torch.no_grad():
for i, (X, y) in enumerate(testloader):
X = X.transpose(0, 1).float().to(args.device)
y_true = y[:, :, 3].float().to(args.device)
model.hidden = [hidden.to(args.device) for hidden in model.init_hidden()]
y_pred = model(X)
test_acc += metric(y_pred, y_true)[0]
test_acc = test_acc / len(testloader)
return test_acc
This is test data loader.
# ====== Random Seed Initialization ====== #
seed = 666
np.random.seed(seed)
torch.manual_seed(seed)
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_lr"
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ====== Data Loading ====== #
args.symbol = '005930.KS' #원하는 종목
args.batch_size = 4 #배치사이즈
args.x_frames = 5 #수정x 이전 n일치 데이터 이게 너무 길면 1주일 예측 불가능
args.y_frames = 5 #수정y 이후 n일치 데이터 이게 너무 길면 1주일 예측 불가능
# ====== Model Capacity ===== #
args.input_dim = 6
args.hid_dim = 50
args.n_layers = 2 # (은닉층의 레이어 갯수) https://justkode.kr/deep-learning/pytorch-rnn링크 참고
# ====== Regularization ======= #
args.l2 = 0.0001
args.dropout = 0.3
args.use_bn = True
# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.001
args.epoch = 1
# ====== Experiment Variable ====== #
name_var1 = 'lr' # lr=러닝레이트
name_var2 = 'n_layers'#뉴럴 네트워크 몇개를 쌓을것인지?
list_var1 = [0.001, 0.0001, 0.00001]
list_var2 = [1,2,3]
#데이터셋 실제 형성
trainset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2012,1,1), (2021,1,1)) #학습기간
valset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2021,1,2), (2021,12,30)) #검증기간 최소 +6 월, +19 일 안하면 float division by zero 에러 발생 왜?? 21년 기준
testset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2022,1,10), (2022,1,14)) #테스트기간 최소 +6 월, + 25일 안하면 float division by zero 에러 발생. 22년기준
#기간이 일정 영업일 이상을 요구하는듯? <<146 영업일 이상 데이터 요구. 그만큼 안주면 오류남 왜??
partition = {'train': trainset, 'val':valset, 'test':testset}
for var1 in list_var1:
for var2 in list_var2:
setattr(args, name_var1, var1)
setattr(args, name_var2, var2)
print(args)
setting, result = experiment(partition, deepcopy(args))
save_exp_result(setting, result)
#꼭 디렉토리에 있는 파일들 지운다음에 그래프 그려야한다. 안그러면 결과값 전부 겹쳐서 나옴
This one is hyper parameter regulate.
I wonder how can I get result when I set testset length in 5days? (like (2022,1,10) (2022,1,14))
This cord didn't work when I set testset length at least 7month (maybe + 146 trade day)
error is float divided by zero. (when I use lower 146 days.)
if I set length +146 days, then codes work well.
I think this code cause error:
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
log data was so small, so error occurred. (my opinion)
data is yahoo finance data. Thanx to read
When I # below code, then data got infinite.
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))

getting shape mismatch error between shape of labels and logits?

I'm trying to do attention mechanism while returning the tensor I'm getting the following error
ValueError: Shape mismatch: The shape of labels (received (64, 53)) should equal the shape of logits except for the last dimension (received (64, 1, 500)).
Please find the below code
Here is code for attention please correct me if it is wrong
class Attention(tf.keras.layers.Layer):
def __init__(self):
super().__init__()
def call(self,enc_op,hidden_state):
# print(enc_op.shape,hidden_state.shape)
query_with_time_axis = tf.expand_dims(hidden_state, 1)
context_vector = tf.matmul(enc_op,tf.transpose(query_with_time_axis,perm=[0,2,1]))
context_vector = tf.nn.softmax(context_vector,axis=1)
context_vector = context_vector * enc_op
context_vector = tf.reduce_sum(context_vector, axis=1)
return context_vector
Here is decoder part I'm calling the attention from here
class Decoder(tf.keras.layers.Layer):
def init(self,vocab_size,embedding_dim,input_length,dec_units):
super().init()
self.vocab_size = vocab_size
self.embedding_dim = embedding_dim
self.dec_units = dec_units
self.input_length = input_length
self.attention = Attention()
def build(self,input_shape):
self.embedding = Embedding(input_dim=self.vocab_size,output_dim = self.embedding_dim,input_shape = input_shape,
mask_zero = True, name = "embedding_layer_decoder")
self.lstm = LSTM(self.dec_units,return_sequences=True,return_state=True,name = "Decoder_LSTM")
def call(self,target_sentances,enc_op,hidden_state,cell_state):
target_embed = self.embedding(target_sentances)
for i in range(target_embed.shape[1]):
context_vector = self.attention(enc_op,hidden_state)
y = tf.concat([context_vector, target_embed[:,i,:]], axis=-1)
y = tf.expand_dims(y, 1)
lstm_output,hidden_state,_ = self.lstm(y,initial_state = [hidden_state,cell_state])
return lstm_output
class Mymodel(Model):
def __init__(self,encoder_inputs_length,decoder_inputs_length,output_vocab_size):
super().__init__()
self.encoder = Encoder(vocab_size = 500, embedding_dim = 50, input_length = encoder_inputs_length, enc_units=64)
self.decoder = Decoder(vocab_size = 500, embedding_dim = 50, input_length = decoder_inputs_length, dec_units=64)
self.dense = Dense(output_vocab_size,activation = "softmax")
def call(self,data):
input,output = data[0],data[1]
print(input.shape,output.shape)
encoder_output,encoder_h,encoder_c = self.encoder(input)
print("="*20, "ENCODER", "="*20)
print("-"*35)
print(encoder_output)
print("ENCODER ==> OUTPUT SHAPE",encoder_output.shape)
print("ENCODER ==> HIDDEN STATE SHAPE",encoder_h.shape)
print("ENCODER ==> CELL STATE SHAPE", encoder_c.shape)
print("="*20,"Decoder","="*20)
decoder_output = self.decoder(output,encoder_output,encoder_h,encoder_c)
output1 = self.dense(decoder_output)
print("-"*35)
print("Final output shape",output.shape)
print("="*50)
return output1
model = Mymodel(encoder_inputs_length=30,decoder_inputs_length=20,output_vocab_size=500)
ENCODER_SEQ_LEN = 30
DECODER_SEQ_LEN = 20
optimizer = tf.keras.optimizers.Adam()
model.compile(optimizer=optimizer,loss=tf.keras.losses.SparseCategoricalCrossentropy())
for (batch, (inp, targ)) in enumerate(dataset.take(steps_per_epoch)):
model.fit([inp, targ], targ, steps_per_epoch=1)
The shape of my input and target is
(64, 55) (64, 53)
64 is batch size

Some parameters are not getting saved when saving a model in pytorch

I have built an encoder-decoder model with attention for morph inflection generation. I am able to train the model and predict on test data but I am getting wrong predicting after loading a saved model
I am not getting any error during saving or loading but
When I load a saved model its predictions are completely wrong. It looks like some parameters are not getting saved.
I have tried to load and save the model using both techniques
using state_dict() eg. torch.save(encoder.state_dict(),'path')
saving complete model eg.torch.save(encoder,'path')
I have tried to save different classes one by one and also making a superclass that initiates all those class and then saving just superclass
but nothing seems to be working
Encoder class
class Encoder(nn.Module):
def __init__(self,vocab_size,embedding_size, encoder_hid_dem,decoder_hid_dem,bidirectional,dropout):
super().__init__()
self.encoder_hid_dem = encoder_hid_dem
self.encoder_n_direction=1;
self.bias = False
self.dropout=dropout
if(bidirectional==True):
self.encoder_n_direction=2;
self.embedding_layer = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_size, padding_idx=0)
self.GRU_layer = nn.GRU(input_size=embedding_size, hidden_size=encoder_hid_dem, batch_first=True, bidirectional=bidirectional)
self.fc = nn.Linear(encoder_hid_dem*self.encoder_n_direction,decoder_hid_dem)
self.dropout = nn.Dropout(dropout)
def forward(self, input_word):
# print(input_word.size())
#[batch_size src_sent_lent]
embed_out = self.embedding_layer(input_word)
#[BATCH_SIZE src_sent_lent embedding_dim]
embed_out = F.relu(embed_out)
embed_out = self.dropout(embed_out)
self.batch = embed_out.size()[0]
# hidden = self.init_hidden()
GRU_out,hidden = self.GRU_layer(embed_out)
# print(GRU_out.size())
# print(hidd.size())
#[BATCH_SIZE sec_sent_len n_direction*hid_dem]
#[n_layer*n_direction batch_size hid_dem]
#where the first hid_dim elements in the third axis are the hidden states from the top layer forward RNN, and the last hid_dim elements are hidden states from the top layer backward RNN
#hidden is stacked [forward_1, backward_1, forward_2, backward_2, ...]
#hidden [-2, :, : ] is the last of the forwards RNN
#hidden [-1, :, : ] is the last of the backwards RNN
GRU_out = F.relu(GRU_out)
hidden = torch.tanh(self.fc(torch.cat((hidden[-2,:,:],hidden[-1,:,:]),dim=1)))
# print(GRU_out.size())
# print(hidden.size())
#outputs = [batch_size src sent len, encoder_hid_dim * n_direction]
#hidden = [batch size, dec hid dim]
return GRU_out,hidden
def init_hidden(self):
return (Variable(torch.eye(1, self.encoder_hid_dem)).unsqueeze(1).repeat(2, self.batch, 1).to(self.device))
Attention class
class Attention(nn.Module):
def __init__(self,encoder_hid_dem,decoder_hid_dem,bidirectional):
super().__init__()
self.enc_hid_dim = encoder_hid_dem
self.dec_hid_dim = decoder_hid_dem
self.encoder_n_direction=1;
if(bidirectional==True):
self.encoder_n_direction=2;
self.attn = nn.Linear((encoder_hid_dem * self.encoder_n_direction) + decoder_hid_dem, decoder_hid_dem)
self.v = nn.Parameter(torch.rand(decoder_hid_dem))
def forward(self, hidden, encoder_outputs):
#hidden = [batch size, dec hid dim]
#encoder_outputs = [batch_size ,src sent len, enc hid dim * encoder_n_direction]
batch_size = encoder_outputs.shape[0]
src_len = encoder_outputs.shape[1]
hidden = hidden.unsqueeze(1).repeat(1, src_len, 1)
#hidden = [batch size, src sent len, dec hid dim]
#encoder_outputs = [batch size, src sent len, enc hid dim * encoder_n_direction]
energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs), dim=2)))
#energy = [batch size, src sent len, dec hid dim]
energy = energy.permute(0, 2, 1)
#energy = [batch size, dec hid dim, src sent len]
#v = [dec hid dim]
v = self.v.repeat(batch_size, 1).unsqueeze(1)
#v = [batch size, 1, dec hid dim]
attention = torch.bmm(v, energy).squeeze(1)
#attention= [batch size, src len]
return F.softmax(attention, dim=1)
Decoder class
class Decoder(nn.Module):
def __init__(self, decoder_hid_dem, encoder_hid_dem, vocab_size,embedding_dim,attention,decoder_input_size,linear_input_size,bidirectional,dropout):
super().__init__()
self.encoder_hid_dem=encoder_hid_dem
self.decoder_hid_dem=decoder_hid_dem
self.attention=attention
self.dropout = dropout
self.output_dim = vocab_size
self.decoder_n_direction=1;
if(bidirectional==True):
self.decoder_n_direction=2;
self.GRU_layer_out = nn.GRU(decoder_input_size,decoder_hid_dem)
self.out_layer = nn.Linear(in_features=linear_input_size, out_features=vocab_size)
self.dropout = nn.Dropout(dropout)
#self.GRU_layer_out.bias = torch.nn.Parameter(torch.zeros(decoder_input_size))
def forward(self, feature, hidden,actual_word,encoder_outputs):
feature = feature.unsqueeze(1)
# print('decoder')
# print(feature.size())
#[batch_size src_sent_lent=1 feat_size=6]
# print(hidden.size())
# [batch_size dec_hid_dim]
# print(actual_word.size())
# [batch_size src_sent_lent=1 embedding_dim]
# print(encoder_outputs.size())
# outputs = [batch_size src sent len, encoder_hid_dim * encoder_n_directional]
a = self.attention(hidden,encoder_outputs)
# print(a.size())
# [batch_size src_sent_len]
a = a.unsqueeze(1)
#a = [batch size, 1, src len]
weighted = torch.bmm(a,encoder_outputs)
# print(weighted.size())
# weighted = [batch size, 1, enc_hid_dim * encoder_n_direction]
# if len(actual_word.size()) != 0:
input_char = torch.cat((actual_word,feature,weighted),2)
# else:
# input_char = torch.cat((feature,weighted),2)
input_char=input_char.permute(1,0,2)
# print(input_char.size())
# [1 BATCH_SIZE decoder_input_size]
hidden = hidden.unsqueeze(0)
# print(hidden.size())
#[1 batch_size decoder_hid_dem]
output, hidden = self.GRU_layer_out(input_char, hidden)
# print(output.size())
# [sent_len=1 batch_size decoder_n_direction*decoder_hid_dem]
# print(hidden.size())
# [n_layer*n_direction BATCH_SIZE hid_dem]
output = F.leaky_relu(output)
output = self.dropout(output)
output = torch.cat((output.squeeze(0),weighted.squeeze(1),actual_word.squeeze(1)),dim=1)
pre_out = self.out_layer(output)
predicted_output = F.log_softmax(pre_out, dim=1)
# print(predicted_output.size())
# [ batch_size vacab_size ]
return predicted_output, hidden.squeeze(0)
def init_hidden(self, batch):
return (Variable(torch.eye(1, self.decoder_hid_dem)).unsqueeze(1).repeat(1, batch, 1).to(self.device),Variable(torch.eye(1, self.decoder_hid_dem)).unsqueeze(1).repeat(1, batch, 1).to(self.device))
seq2seq class
class Seq2Seq(nn.Module):
def __init__(self,encoder,decoder,device):
super().__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device
def forward(self,input_word,output_word,features_word,teaching_forcing_ratio,limit):
#print(input_word)
#print(input_word.size())
input_word = input_word.to(self.device)
output_word = output_word.to(self.device)
features_word = features_word.to(self.device)
batch_size= input_word.size()[0]
if(limit==0):
max_len = input_word.size()[1]
else:
max_len = limit
vocabsize = self.decoder.output_dim
actual_word = self.encoder.embedding_layer(torch.tensor(char_to_index['<sos>']).view(1, -1).to(self.device)).repeat(batch_size, 1, 1)
encoder_outputs,hidden = self.encoder(input_word)
features=features_word[:,:]
predicted_word = torch.zeros(max_len,batch_size,vocabsize).to(self.device)
for t in range(1,max_len):
output,hidden=self.decoder(features, hidden,actual_word,encoder_outputs)
#print(output.size())
predicted_word[t] = output
topv, topi = output.topk(1)
bs = topi.size()[0]
temp2 = torch.zeros(0,1,300).to(self.device)
for row in range(bs):
index = topi[row][0].item()
temp = self.encoder.embedding_layer(torch.tensor(index).view(1, -1).to(self.device))
temp2 = torch.cat((temp2,temp))
teacher_force = random.random() < teaching_forcing_ratio
if teacher_force == 1:
actual_word = self.encoder.embedding_layer(output_word[:,t]).unsqueeze(1)
else:
actual_word = temp2
return predicted_word
and this code is used to save and load model
torch.save(model.state_dict(), 'model.pt')
model.load_state_dict(torch.load('model.pt'))
I want that when I run my model on pre-trained weights, it should predict correctly acc to those weights
Your provided code for saving/loading parameters is wrong. The loading and saving model parameters are pretty straight-forward. In your case, it should be:
# loading
saved_params = torch.load(
filename, map_location=lambda storage, loc: storage
)
s2s.load_state_dict(saved_params)
# saving
params = s2s.state_dict()
torch.save(params, filename)
[Update]
You need to make the Seq2Seq class a derived class of PyTorch's nn.Module just like your encoder/decoder classes. Otherwise, you can't use the state_dict() method. You can assume Seq2Seq class is like a container that contains your whole network, although it does not have any learnable weights itself.

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

tensorflow variables inside class differing from the one outside

I am trying to solve a ANN model using Tensorflow. At the moment, I am able to run the program as a long string of text. Now however, I would like to convert my code to something that is easier to use. So I converted my code to a class. Here is what I did. (basically copied the entire set of code to a class.
import os
import tensorflow as tf
class NNmodel:
def __init__(self,
layers, inpShape, outShape,
features,
learning_rate=0.1, nSteps = 100,
saveFolder='models'):
self.layers = layers
self.features = features
self.learning_rate = learning_rate
self.saveFolder = saveFolder
self.nSteps = 100
self.d = tf.placeholder(shape = inpShape, dtype = tf.float32, name='d') # input layer
self.dOut = tf.placeholder(shape = outShape, dtype = tf.float32, name='dOut') # output layer
self.weights = []
self.biases = []
self.compute = [self.d]
layerSizes = [self.features] + [l['size'] for l in self.layers]
for i, (v1, v2) in enumerate(zip(layerSizes, layerSizes[1:])):
self.weights.append(
tf.Variable(np.random.randn(v1, v2)*0.1, dtype = tf.float32, name='W{}'.format(i)))
self.biases.append(
tf.Variable(np.zeros((1,1)), dtype = tf.float32, name='b{}'.format(i)) )
self.compute.append( tf.matmul(
self.compute[-1], self.weights[i]) + self.biases[i] )
if self.layers[i]['activation'] == 'tanh':
self.compute.append( tf.tanh( self.compute[-1] ) )
if self.layers[i]['activation'] == 'relu':
self.compute.append( tf.nn.relu( self.compute[-1] ) )
if self.layers[i]['activation'] == 'sigmoid':
self.compute.append( tf.sigmoid ( self.compute[-1] ) )
self.result = self.compute[-1]
self.delta = self.dOut - self.result
self.cost = tf.reduce_mean(self.delta**2)
self.optimizer = tf.train.AdamOptimizer(
learning_rate = self.learning_rate).minimize(self.cost)
return
def findVal(self, func, inpDict, restorePt=None):
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
if restorePt is not None:
try:
saver.restore(sess, tf.train.latest_checkpoint(restorePt) )
print('Session restored')
except Exception as e:
print('Unable to restore the session ...')
return None
else:
print('Warning, no restore point selected ...')
result = sess.run(func, feed_dict = inpDict)
sess.close()
return result
def optTF(self, inpDict, printSteps=50, modelFile=None):
cost = []
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print('x'*100)
for i in range(self.nSteps):
# First run the optimizer ...
sess.run(self.optimizer, feed_dict = inpDict)
# Save all the data you want to save
c = sess.run( self.cost, feed_dict = inpDict)
cost.append(c)
if (i%printSteps) == 0:
print('{:5d}'.format(i))
result = self.run(self.result, feed_dict = inpDict)
if modelFile is not None:
path = saver.save(sess, os.path.join(
self.saveFolder, modelFile))
print('Model saved in: {}'.format(path))
else:
print('Warning! model not saved')
sess.close()
return cost, result
When I use this model, I see that there seems to be a problem:
N = 500
features = 2
nSteps = 1000
X = [ (np.random.random(N))*np.random.randint(1000, 2000) for i in range(features)]
X = np.array([np.random.random(N), np.random.random(N)])
data = [X.T, X[0].reshape(-1, 1)]
layers = [
{'name':'6', 'size': 10, 'activation':'tanh'},
{'name':'7', 'size': 1, 'activation':'linear'},
]
m1 = NNmodel(layers, inpShape=np.shape(data[0]), outShape = np.shape(data[1]),
features=features,
learning_rate=0.1, nSteps = 100,
saveFolder='models1')
d = tf.placeholder(shape = np.shape(data[0]), dtype = tf.float32, name='d_4')
dOut = tf.placeholder(shape = np.shape(data[1]), dtype = tf.float32, name='dOut')
m1.findVal(m1.result, {d: data[0], dOut:data[1]})
Now it appears that there is a mismatch between the placeholders that I am using d and dOut that I provide form outside, and the ones that are already present within the model self.d and self.dOut. How do I solve this problem?
Why not to just use the placeholders declared within the model?
m1.findVal(m1.result, {m1.d: data[0], m1.dOut:data[1]})

Resources