I have a question regarding the validation Data.
I have this neural network and I divided my data into train_generator, val_generator, test_generator.
I made a custom model with a custom fit.
class MyModel(tf.keras.Model):
def __init__(self):
def __call__(.....)
def train_step(....)
then I have:
train_generator = DataGenerator(....)
val_generator = DataGenerator(....)
test_generator = DataGenerator(....)
then :
model = MyModel()
metrics=["accuracy"]), validation_data = val_generator, epochs=40)
ok and the program gives me no errors
But my question is : how can I know what happens with my validation_data ?
Is it processed the same way as the train_data ( train_generator ) in the train_step function ?
Or do I need to specify how to process the validation data ?
If it helps I will also live MyModel class
class MyModel(tf.keras.Model):
def __init__(self):
super(MyModel2, self).__init__()
self.dec2 = Decoder2()
def __call__(self, y_hat, **kwargs):
z_hat = self.dec2(y_hat)
return z_hat
def train_step(self, dataset):
with tf.GradientTape() as tape:
y_hat = dataset[0]
z_true = dataset[1]
z_pred = self(y_hat, training=True)
#print("This is z_true : ", z_true.shape)
#print("This is z_pred : ", z_pred.shape)
loss = tf.reduce_mean(tf.abs(tf.cast(z_pred, tf.float64) - tf.cast(z_true, tf.float64)))
print("loss: ", loss)
trainable_vars = self.trainable_variables
gradients = tape.gradient(loss, trainable_vars)
# Update weights
self.optimizer.apply_gradients(zip(gradients, trainable_vars))
# Update metrics (includes the metric that tracks the loss)
self.compiled_metrics.update_state(z_true, z_pred)
# Return a dict mapping metric names to current value
return { m.result() for m in self.metrics}

You have to add a test_step(self, data) function to your MyModel class as you can see it here: Providing your own evaluation step


Different training result obtained from training simple LSTM in Keras and Pytorch

I’m trying to implement my LSTM model from Keras to Pytorch, but the results in Pytorch seem really bad at the moment. The network is really simple as below.
model = Sequential()
model.add(LSTM(10, input_length=shape[1], input_dim=shape[2]))
# output shape: (1, 1)
model.compile(loss="mse", optimizer="adam")
And I migrate it to the Pytorch framework,
class LSTM(nn.Module):
def __init__(self, input_dim, hidden_dim, num_layers, output_dim,bilstm=False):
super(LSTM, self).__init__()
self.hidden_dim = hidden_dim
self.num_layers = num_layers
self.isBi = bilstm
self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers, batch_first=True,bidirectional=bilstm).double()
# for name, param in self.lstm.named_parameters():
# if name.startswith("weight"):
# nn.init.orthogonal_(param)
# else:
# pass
self.fc1 = nn.Sequential(nn.Linear(hidden_dim, 10).double(),nn.Tanh())
self.final_layer1 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer2 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer3 = nn.Sequential(nn.Linear(10,10).double(),nn.Tanh())
self.final_layer4 = nn.Sequential(nn.Linear(10,output_dim).double())
def forward(self, x):
out, (hn, cn) = self.lstm(x)
out = out[:, -1, :]
out = self.fc1(out)
out = self.final_layer1(out)
out = self.final_layer2(out)
out = self.final_layer3(out)
out = self.final_layer4(out)
return out
The result is really bad. I was wondering if the initializing methods/activation functions used in Keras are different from the one I used in Pytorch(Keras seems to be using hard_sigmoid where Pytorch uses sigmoid?).
Would really appreciate it if somebody could help me with this problem!
My training code in Pytorch.
criterion = nn.MSELoss()
model = LSTM(input_dim,hidden_dim,num_layers,output_dim,bilstm)
model = model.cuda()
optimizer = optim.Adam(model.parameters(),lr=0.001)
for epoch in range(1,epoch_number+1):
iteration = 0
for i,data in enumerate(train_loader):
dat, label = data
dat = dat.double()
label = label.double()
if torch.cuda.is_available():
dat = dat.cuda()
label = label.cuda()
dat = Variable(dat)
label = Variable(label)
out = model(dat)
loss = criterion(out, label)

How to create a data preprocessing pipeline in pytorch outside the Dataloader class?

I am trying to make a model for data with 40 features which have to classified into 10 classes. I am new to PyTorch and this is my first project in it.
I am given a custom Dataset class (which I am not allowed to change) which is as follows:
class MyData(Dataset):
def _init_(self, mode):
with open(mode+'.pkl', 'rb') as handle:
data = pickle.load(handle)
self.X = data['x'].astype('float')
self.y = data['y'].astype('long')
def _len_(self):
return len(self.X)
def _getitem_(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
sample = (self.X[idx], self.y[idx])
return sample
I have done some preprocessing on the data like normalization and then trained and saved the model. As I wasn't allowed to change the dataset class, I made the changes outside of it and then used the DataLoader method. The preprocessing is as follows :
features, labels = train_data[:]
df = pd.DataFrame(features)
x = df.values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
input_array = x_scaled
output_array = labels
inputs = torch.Tensor(input_array)
targets = torch.Tensor(output_array).type(torch.LongTensor)
dataset = TensorDataset(inputs, targets)
train_ds, val_ds = random_split(dataset, [3300, 300])
batch_size = 300
n_epochs = 200
log_interval = 10
train_losses = []
train_counter = []
test_losses = []
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
test_counter = [i*len(train_loader.dataset) for i in range(n_epochs + 1)]
After this I define the training and testing functions ( and remove the print statements as the autograder will not be able to grade my assignment if I do so) as follows:
def train(epoch):
for batch_idx, (data, target) in enumerate(train_loader):
output = model(data.double())
loss = criterion(output, target)
if batch_idx % log_interval == 0:
(batch_idx*32) + ((epoch-1)*len(train_loader.dataset)))
def test():
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in val_loader:
output = model(data.double())
test_loss += criterion(output, target).item()
pred =, keepdim=True)[1]
correct += pred.eq(
test_loss /= len(val_loader.dataset)
for epoch in range(1, n_epochs + 1):
Even after doing that, the autograder is still not able to grade my code. I mainly think it's because maybe I am making an error with how I input the data to the model but I am not able to narrow down to what exactly is the problem and how do I correct it. As I'm new to pytorch, I was looking at how to do the preprocessing but all of them involved the Dataset Class so I'm not sure how to go about it.
My model is as follows:
class MyModel(nn.Module):
def __init__(self):
super(MyModel, self).__init__()
nn.Conv1d(in_channels=40, out_channels=256, kernel_size=1, stride=2), #applying batch norm
nn.BatchNorm1d(256, affine=True),
nn.Conv1d(in_channels=256, out_channels=128, kernel_size=1, stride=2), #applying batch norm
nn.BatchNorm1d(128, affine=True),
nn.Conv1d(in_channels=128, out_channels=64, kernel_size=1, stride=2), #applying batch norm
nn.BatchNorm1d(64, affine=True),
nn.Conv1d(in_channels=64, out_channels=32, kernel_size=1, stride=2), #applying batch norm
nn.BatchNorm1d(32, affine=True),
nn.Linear(32, 10),
def forward(self,x):
# result=self.net_stack(x[None])
result=self.net_stack(x[:, :, None]).double()
return result
One instruction I've got is that they've written:
# Please make sure we can load your model with:
# model = MyModel()
# This means you must give default values to all parameters you may wish to set, such as output size.
You can try to do it within the training loop
for batch_idx, (data, target) in enumerate(train_loader):
# you can do something here to manipulate your input
data = transform(data)'cuda') # Move to gpu, i noticed you didnt do it in your training loop
# Forward pass
output = model(data)

I get something wrong when use model.train() and model.eval() on pytorch

I have prepare features and their labels as blow; I want to build a model which is constructed by transformers' encoder and then add a linear layer to predict a value. but I got some error when I use the model to predict after its training.
At first I run below code:
import torch
from torch import nn
features = torch.rand(bach_size, channels, lenght)
labels = torch.rand(batch_size)
class TransformerModel(nn.Module):
def __init__(self):
super(TransformerModel, self).__init__()
encoder_layer = nn.TransformerEncoderLayer(d_model=8, nhead=8, dropout=0.5)
self.transformer_encoder = nn.TransformerEncoder(encoder_layer, 6)
self.decoder = nn.Linear(40, 1)
def forward(self, src):
encoded = self.transformer_encoder(src.transpose(1, 0)).transpose(1, 0)
pred = self.decoder(encoded.reshape(encoded.shape[0], -1))
return pred
model = TransformerModel()
criterion = nn.MSELoss()
lr = 0.3 # learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
def train():
model.train() # Turn on the train mode
output = model(features)
loss = criterion(output.view(-1, 1), labels.view(-1, 1))
torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
return loss.item()
for _ in range(100):
After that, I predict features by the below codes:
output = model(features)
I get all values of 'output' are the same, and if use 'model.train()', the 'output' seems Ok; so what is the problem? or the model was built wrong?

Pytorch Problem with Custom Dataset Class

First, I made a custom dataset to load in images from my dataframe (containing the image filepath and corresponding int label):
class Dataset(
def __init__(self, dataframe, transform=None):
self.frame = dataframe
self.transform = transform
def __len__(self):
return len(self.frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
filename = self.frame.iloc[idx, 0]
image = torch.from_numpy(io.imread(filename).transpose((2, 0, 1))).float()
label = self.frame.iloc[idx, 1]
sample = {'image': image, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
Then, I use pre-existing model architecture like so:
model = models.densenet161()
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10) # where 10 is my number of classes
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
Finally, for training, I do the following:
model.train() # switch to train mode
for epoch in range(5):
for i, sample in enumerate(train_set): # where train_set is an instance of my Dataset class
image, label = sample['image'].unsqueeze(0), torch.Tensor(sample['label']).long()
output = model(image)
loss = criterion(output, label)
However, I am experiencing errors with loss = criterion(output, label). It tells me that ValueError: Expected input batch_size (1) to match target batch_size (2).. Can someone teach me how to properly use a custom dataset, especially with loading in batches of data? Also, why am I experiencing that ValueError? Thank you!
please check the following lines:
label = self.frame.iloc[idx, 1] in dataset defination, you may print this to re-check, is this return two int
image, label = sample['image'].unsqueeze(0), torch.Tensor(sample['label']).long() in training code, you need to check the shape of the tensor

Keras Custom Layer Error (Operation IsVariableInitialized has been marked as not fetchable)

I'm trying to create a custom Keras layer on a toy dataset, and am having issues. At a high level, I want to create an "Input Gate" layer, which would have trainable weights to turn each column of input on or off. So I'm starting with just trying to multiply the inputs by a sigmoid'd version of the learned weights. My code is as follows:
### This is my custom layer
class InputGate(Layer):
def __init__(self, **kwargs):
super(InputGate, self).__init__(**kwargs)
def build(self, input_shape):
self.kernel = self.add_weight(name='input_gate',
super(InputGate, self).build(input_shape) # Be sure to call this somewhere!
def call(self, inputs):
gate_amount = K.sigmoid(self.kernel)
return inputs * gate_amount
def get_config(self):
config = {}
base_config = super(InputGate, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
def compute_output_shape(self, input_shape):
return input_shape
def create_linear_model(x, y, num_noise_vars = 0, reg_strength=0):
new_x = get_x_with_noise(x, num_noise_vars=num_noise_vars)
model = Sequential([
Dense(1, kernel_regularizer=l2(reg_strength))
model.compile(optimizer="rmsprop", loss="mse") = 0.001
return {"model": model, "new_x": new_x}
def get_x_with_noise(x, num_noise_vars):
noise_vars = []
for noise_var in range(num_noise_vars):
x_with_noise = noise_vars
new_x = np.array(list(zip(*x_with_noise)))
return new_x
x = np.random.random(500)
y = (x * 3) + 10
num_noise_vars = 5
info = create_linear_model(x, y, num_noise_vars=num_noise_vars)
model = info["model"]
new_x = info["new_x"]
results =, y, epochs=num_epochs, verbose=0)
And then I get the following error:
ValueError: Operation 'input_gate_14/IsVariableInitialized' has been marked as not fetchable.
This layer is mostly taken from the docs( I'm using Keras 2.0.9, with Tensorflow backend on a CPU (Macbook Air).
This layer seems as simple as can be, and googling the error leads me to discussions that don't seem relevant. Anyone have ideas of what's causing this?
Any help is much appreciated! Thanks!
