Got Very Different Scores After Translating Simple Test Model from Keras to PyTorch - keras

I'm trying to transition from Keras to PYTorch.
After reading tutorials and similar questions, I came up with the following simple models to test. However, the two models below gives me very different scores: Keras (0.9), PyTorch (0.03).
Could someone give me guidance?
Basically my dataset has 120 features and multilabels with 3 classes that look like below.
[
[1,1,1],
[0,1,1],
[1,0,0],
...
]
def score(true, pred):
lrl = label_ranking_loss(true, pred)
lrap = label_ranking_average_precision_score(true, pred)
print('LRL:', round(lrl), 'LRAP:', round(lrap))
#Keras:
model= Sequential()
model.add(Dense(60, activation="relu", input_shape=(120,)))
model.add(Dense(30, activation="relu"))
model.add(Dense(3, activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=100)
pred = model.predict(x_test)
score(y_test, pred)
#PyTorch
model = torch.nn.Sequential(
torch.nn.Linear(120, 60),
torch.nn.ReLU(),
torch.nn.Linear(60, 30),
torch.nn.ReLU(),
torch.nn.Linear(30, 3),
torch.nn. Sigmoid())
loss_fn = torch.nn. BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
epochs = 100
batch_size = 32
n_batch = int(x_train.shape[0]/batch_size)
for epoch in range(epochs):
avg_cost = 0
for i in range(n_batch):
x_batch = x_train[i*batch_size:(i+1)*batch_size]
y_batch = y_train[i*batch_size:(i+1)*batch_size]
x, y = Variable(torch.from_numpy(x_batch).float()), Variable(torch.from_numpy(y_batch).float(), requires_grad=False)
pred = model(x)
loss = loss_fn(pred, y)
loss.backward()
optimizer.step()
avg_cost += loss.item()/n_batch
print(epoch, avg_cost)
x, y = Variable(torch.from_numpy(x_test).float()), Variable(torch.from_numpy(y_test).float(), requires_grad=False)
pred = model(x)
score(y_test, pred.data.numpy())

You need to call optimizer.zero_grad() at the start of each iteration, otherwise the gradients from different batches just keep getting accumulated.

Related

How to fix error validation split in MLP using Keras?

I'm newbie in Neural Network. I'm going to do a text classification research using MLP model with keras. Input layer consisting of 900 nodes, 2 hidden layers, and 2 outputs.
The code I use is as follows:
#Split data training & testing (90:10)
Train_X, Test_X, Train_Y, Test_Y = model_selection.train_test_split(dataset['review'],dataset['sentimen'],test_size=0.2, random_state=8)
Encoder = LabelEncoder()
Train_Y = Encoder.fit_transform(Train_Y)
Test_Y = Encoder.fit_transform(Test_Y)
Tfidf_vect = TfidfVectorizer(max_features=None)
Tfidf_vect.fit(dataset['review'])
Train_X_Tfidf = Tfidf_vect.transform(Train_X)
Test_X_Tfidf = Tfidf_vect.transform(Test_X)
#ANN Architecture
model = Sequential()
model.add(Dense(units = 100, activation = 'sigmoid', input_shape=(32, 900)))
model.add(Dense(units = 100, activation = 'sigmoid'))
model.add(Dense(units = 2, activation = 'sigmoid'))
opt = Adam (learning_rate=0.001)
model.compile(loss = 'binary_crossentropy', optimizer = opt,
metrics = ['accuracy'])
print(model.summary())
#Hyperparameter
epochs= 100
batch_size= 32
es = EarlyStopping(monitor="val_loss",mode='min',patience=10)
model_prediction = model.fit(Train_X_Tfidf, Train_Y, epochs=epochs,
batch_size=batch_size, verbose=1,
validation_split=0.1, callbacks =[es])
But getting Error:
/usr/local/lib/python3.8/dist-packages/keras/engine/data_adapter.py in train_validation_split(arrays, validation_split)
1478 unsplitable = [type(t) for t in flat_arrays if not _can_split(t)]
1479 if unsplitable:
-> 1480 raise ValueError(
1481 "`validation_split` is only supported for Tensors or NumPy "
1482 "arrays, found following types in the input: {}".format(unsplitable))
ValueError: `validation_split` is only supported for Tensors or NumPy arrays, found following types in the input: [<class 'scipy.sparse.csr.csr_matrix'>]
How to Fix it? Thank you so much.

LSTM: calculating MSELoss in for loop returns NAN when backward pass

I am new with LSTM and ran into a problem. I'm trying to predict a variable using 7 features in time steps of 4. I am working with PyTorch.
Data
From my initial data frame (traindf), I created tensors for every feature and the target (Y) by:
featureX_train = torch.tensor(traindf.featureX[:test].values).view(-1, 4, 1)
Y_train = torch.tensor(traindf.Y[:test].values).view(-1, 4, 1)
...
featureX_test = torch.tensor(traindf.featureX[test:].values).view(-1, 4, 1)
Y_test = torch.tensor(traindf.Y[test:].values).view(-1, 4, 1)
I concatenated all the feature tensors into one X_train and one X_test. All tensors are float32:
print(X_train.shape, Y_train.shape)
print(X_test.shape, Y_test.shape)
torch.Size([24436, 4, 7]) torch.Size([24436, 4, 1])
torch.Size([6109, 4, 7]) torch.Size([6109, 4, 1])
Eventually, I have a train and test data set:
train_dataset = TensorDataset(X_train, Y_train)
test_dataset = TensorDataset(X_test, Y_test)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32, shuffle=False)
Preview of my data:
print(train_dataset[0])
print(test_dataset[0])
(tensor([[ 7909.0000, 8094.0000, 9119.0000, 8666.0000, 17599.0000, 13657.0000,
10158.0000],
[ 7909.0000, 8073.0000, 9119.0000, 8636.0000, 17609.0000, 13975.0000,
10109.0000],
[ 7939.5000, 8083.5000, 9166.5000, 8659.5000, 18124.5000, 13971.0000,
10142.0000],
[ 7951.0000, 8064.0000, 9201.0000, 8663.0000, 17985.0000, 13967.0000,
10076.0000]]), tensor([[41.],
[41.],
[41.],
[41.]]))
(tensor([[ 8411.0000, 8530.0000, 9439.0000, 9101.0000, 17368.0000, 14174.0000,
11111.0000],
[ 8460.0000, 8651.5000, 9579.5000, 9355.5000, 17402.0000, 14509.0000,
11474.5000],
[ 8436.0000, 8617.0000, 9579.0000, 9343.0000, 17318.0000, 14288.0000,
11404.0000],
[ 8519.0000, 8655.0000, 9580.0000, 9348.0000, 17566.0000, 14640.0000,
11404.0000]]), tensor([[59.],
[59.],
[59.],
[59.]]))
Applying LSTM model
My LSTM model:
class LSTMModel(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
self.lstm = nn.LSTM(input_size, hidden_size)
self.linear = nn.Linear(hidden_size, output_size)
def forward(self, x):
x, _ = self.lstm(x)
# x = self.linear(x[:, -1, :])
x = self.linear(x)
return x
model = LSTMModel(input_size=7, hidden_size=32, output_size=1)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters())
model.train()
When I try:
for X, Y in train_loader:
optimizer.zero_grad()
Y_pred = model(X)
loss = loss_fn(Y_pred, Y)
print(loss)
I get (correctly I assume) Loss: tensor(1318.9419, grad_fn=<MseLossBackward0>)
However, when I run:
for X, Y in train_loader:
optimizer.zero_grad()
Y_pred = model(X)
loss = loss_fn(Y_pred, Y)
# Now apply backward pass
loss.backward()
optimizer.step()
print(loss)
I get: tensor(nan, grad_fn=<MseLossBackward0>)
Tried normalizing
I have tried normalizing the data:
mean = X.mean()
std = X.std()
X_normalized = (X - mean) / std
Y_pred = model(X_normalized)
But it yields the same result. Why do I yield 'nan' after applying loss.backward() in such a loop? How can I fix this? Thanks in advance!
My X_train contained few nan values. By removing the matrices with nan values, I solved this issue:
mask = torch.isnan(X_train).any(dim=1).any(dim=1)
X_train = X_train[~mask]
# Do the same for Y_train as it needs to be the same size
Y_train = Y_train[~mask]
# Create the TensorDataset for the training set
train_dataset = TensorDataset(X_train, Y_train)

Predicting Future values with Keras LSTM

I have created an LSTM sales prediction model that works really well on the train and test sets. I would now like to predict beyond the dates in the entire dataset.
I have tried following this answer how to use the Keras model to forecast for future dates or events? but I really can't figure out how to adjust my code to do future predictions.
Also, I changed my code from
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
to
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 1:8]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 1:8]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
after trying the solution in Keras time series can I predict next 6 month in one time
Here is the code where training and modelling happens:
# changed to initial
for df in m:
train_set, test_set = m[df][0:-6].values, m[df][-6:].values
#apply Min Max Scaler
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)
# reshape training set
train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)
# reshape test set
test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)
#build the LSTM Model
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])
print('Fitting model for: {}'.format(df))
#fit our LSTM Model
model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=500, batch_size=1, verbose=1, shuffle=False)
# model.save('lstm_model.h5')
print('Predictions for: {}'.format(df))
#check prediction
y_pred = model.predict(X_test,batch_size=1)
print('Inverse Transform for: {}'.format(df))
#inverse transformation to see actual sales
#reshape y_pred
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])
#rebuild test set for inverse transform
pred_test_set = []
for index in range(0,len(y_pred)):
print (np.concatenate([y_pred[index],X_test[index]],axis=1))
pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))
#reshape pred_test_set
pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])
#inverse transform
pred_test_set_inverted = scaler.inverse_transform(pred_test_set)
I would like the predictions to go beyond the data in the dataset.
UPDATE: I trained the model and took its predictions on the test set. Use these as input for another LSTM model to fit and predict for 12 months. It worked for me. Also changed my last Dense layer (above) to predict 1 point at a time instead of 7 as I had before.
Below is the code:
from numpy import array
for df in d:
if df in list_df:
# df_ADIDAS DYN PUL DEO 150 FCA5421
#KEEP
result_list = []
sales_dates = list(d["{}".format(df)][-7:].Month)
act_sales = list(d["{}".format(df)][-7:].Sale)
for index in range(0,len(pred_test_set_inverted)):
result_dict = {}
result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index]) #change to 0 ffrom act_sales[index]
result_dict['date'] = sales_dates[index] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>REVIEW
result_list.append(result_dict)
df_result = pd.DataFrame(result_list)
predictions = list(df_result['pred_value'])
forecasts = []
result_list
for i in range(len(result_list)):
forecasts.append(result_list[i]['pred_value'])
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
# find the end of this pattern
end_ix = i + n_steps
# check if we are beyond the sequence
if end_ix > len(sequence)-1:
break
# gather input and output parts of the pattern
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
# choose a number of time steps
n_steps = 4
# split into samples
X, y = split_sequence(forecasts, n_steps)
# summarize the data
# for i in range(len(X)):
# print(X[i], y[i])
n_features = 1
X = X.reshape((X.shape[0], X.shape[1], n_features))
# define model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
# fit model
model.fit(X, y, epochs=200, verbose=0)
# demonstrate prediction
x_input = array(predictions[-4:])
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
#print(yhat)
currentStep = yhat[:, -1:]
print('Twelve Month Prediction for {}'.format(df))
for i in range(12):
if i == 0:
x_input = x_input.reshape((1, n_steps, n_features))
yhat = model.predict(x_input, verbose=0)
print(yhat)
else:
x0_input = np.append(x_input, [currentStep[i-1]])
x0_input = x0_input.reshape((1, n_steps+1, n_features))
x_input = x0_input[:,1:]
yhat = model.predict(x_input)
currentStep = np.append(currentStep, yhat[:,-1:])
print(yhat)
Your last Dense layer says that you are predicting 7 points at a time. Save those predictions and feed them to the model again to predict next 7. That makes it 14 predictions simultaneously. And so on. Or change the number of nodes and shape of y from 7 to corresponding number and train again.

Why Does PyTorch Run Slower Than Keras For Me, and How To Make The Code Concise?

I read PyTorch is better and faster, so I'm trying to see if I want to transition to PyTorch from Keras.
I made the simple testing code below, but Keras consistently runs faster for me, and usually (not always) gets better score than PyTorch.
Also is there a better way to write code for PyTorch more concise like Keras?
#output:
Keras:
Total runtime = 18.451340198516846
LRL: 0.145 LRAP: 0.493
PyTorch:
Total runtime = 19.641956329345703
LRL: 0.092 LRAP: 0.491
def score(true, pred):
lrl = label_ranking_loss(true, pred)
lrap = label_ranking_average_precision_score(true, pred)
print('LRL:', round(lrl), 'LRAP:', round(lrap))
def main():
x,y = load()
x_train, x_test, y_train, y_test = train_test_split(x, y)
scaler = StandardScaler()
x_train= scaler.fit_transform(x_train)
x_test= scaler.transform(x_test)
epochs = 100
batch_size = 32
print("Keras:")
t_start = time.time()
model= Sequential()
model.add(Dense(60, activation="relu", input_shape=(120,)))
model.add(Dense(30, activation="relu"))
model.add(Dense(10, activation="sigmoid"))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs)
pred = model.predict(x_test)
t_finish = time.time()
total_time = t_finish-t_start
print('Total runtime = ', total_time)
score(y_test, pred)
print("PyTorch:")
t_start = time.time()
model = torch.nn.Sequential(
torch.nn.Linear(120, 60),
torch.nn.ReLU(),
torch.nn.Linear(60, 30),
torch.nn.ReLU(),
torch.nn.Linear(30, 10),
torch.nn. Sigmoid())
loss_fn = torch.nn. BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
n_batch = int(x_train.shape[0]/batch_size)
for epoch in range(epochs):
avg_cost = 0
for i in range(n_batch):
x_batch = x_train[i*batch_size:(i+1)*batch_size]
y_batch = y_train[i*batch_size:(i+1)*batch_size]
x, y = Variable(torch.from_numpy(x_batch).float()), Variable(torch.from_numpy(y_batch).float(), requires_grad=False)
pred = model(x)
loss = loss_fn(pred, y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
avg_cost += loss.item()/n_batch
print(epoch, avg_cost)
x, y = Variable(torch.from_numpy(x_test).float()), Variable(torch.from_numpy(y_test).float(), requires_grad=False)
pred = model(x).data.numpy()
t_finish = time.time()
total_time = t_finish-t_start
print('Total runtime = ', total_time)
score(y_test, pred)
if __name__ == '__main__':
main()

Keras fit_generator() not working due to shape error

I am running MNIST prediction using Keras, with tensorflow backend.
I have code that runs with batches , using Keras fit() as
(X_train, y_train), (X_test, y_test) = mnist.load_data()
N1 = X_train.shape[0]
N2 = X_test.shape[0]
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
# reshape N1 samples to num_pixels
x_train = X_train.reshape(N1, num_pixels).astype('float32') # shape is now (60000,784)
x_test = X_test.reshape(N2, num_pixels).astype('float32') # shape is now (10000,784)
x_train = x_train / 255
x_test = x_test / 255
y_train = np_utils.to_categorical(y_train) #(60000,10)
y_test = np_utils.to_categorical(y_test) # (10000,10):
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
max_batches = 2 * len(x_train) / batch_size # 2*60000/200
# reshape to be [samples][width][height][ channel] for ImageDataGenerator
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
datagen = ImageDataGenerator(rescale= 1./255)
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
for e in range(epochs):
batches = 0
for x_batch, y_batch in train_gen:
# x_batch is of size [batch_sz,w,h,ch]: resize to [bth_sz,pixel_sz]: (200,28,28,1)-> (200,784)
# for model.fit
x_batch = np.reshape(x_batch, [-1, num_pixels])
model.fit(x_batch, y_batch,validation_split=0.15,verbose=0)
batches += 1
print("Epoch %d/%d, Batch %d/%d" % (e+1, epochs, batches, max_batches))
if batches >= max_batches:
break
scores = model.evaluate(x_test, y_test, verbose=0)
However, when I try to implement similar code using fit_generator(), I get an error.
the code is as below:
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# separate data into train and validation
from sklearn.model_selection import train_test_split
# Split the data
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.15, shuffle= True)
# number of training samples
N1 = X_train.shape[0] # training size
N2 = X_test.shape[0] # test size
N3 = X_valid.shape[0] # valid size
h = X_train.shape[1]
w = X_train.shape[2]
num_pixels = h*w
y_train = np_utils.to_categorical(y_train)
y_valid = np_utils.to_categorical(y_valid)
y_test = np_utils.to_categorical(y_test)
num_classes = y_test.shape[1]
def baseline_model():
# create model
model = Sequential()
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model
model = baseline_model()
batch_size = 200
epochs = 20
steps_per_epoch_tr = int(N1/ batch_size) # 51000/200
steps_per_epoch_val = int(N3/batch_size)
# reshape to be [samples][width][height][ channel] for ImageData Gnerator->datagen.flow
x_t = X_train.reshape(N1, w, h, 1).astype('float32')
x_v = X_valid.reshape(N3, w, h, 1).astype('float32')
# define data preparation
datagen = ImageDataGenerator(rescale=1./255) # scales x_t/x_v
train_gen = datagen.flow(x_t, y_train, batch_size=batch_size)
valid_gen = datagen.flow(x_v,y_valid, batch_size=batch_size)
model.fit_generator(train_gen,steps_per_epoch = steps_per_epoch_tr,validation_data = valid_gen,
validation_steps = steps_per_epoch_val,epochs=epochs)
This gives an error:
This is due to expected image dimension error, but I am not sure where/how to fix this. any help is greatly appreciated.
Thanks
sedy
In the model.fit() case, this line flattened the input before feeding it for training.
x_batch = np.reshape(x_batch, [-1, num_pixels])
But in the generator case, there is nothing to flatten the input before feeding it to the Dense layer. The Dense layer cannot process 2D input (28 x 28). Adding, a Flatten() layer to the model should do the trick as shown below.
def baseline_model():
# create model
model = Sequential()
model.add(Flatten(input_shape=(28,28,1)))
model.add(Dense(num_pixels, input_dim=num_pixels, kernel_initializer='normal', activation='relu'))
model.add(Dense(num_classes, kernel_initializer='normal', activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
return model

Resources