LSTM Prediction with Low Accuracy [duplicate] - keras

This question already has answers here:
What function defines accuracy in Keras when the loss is mean squared error (MSE)?
(3 answers)
Closed 9 months ago.
I used an LSTM model for this prediction. But the accuracy is very low. How could I fix this issue?
from keras.layers import Dropout
from keras.layers import Bidirectional
model=Sequential()
model.add(LSTM(50,activation='relu',return_sequences=True,input_shape=(look_back,1)))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(LSTM(50, activation='relu', return_sequences=True))
model.add(LSTM(50, activation='sigmoid', return_sequences=False))
model.add(Dense(50))
model.add(Dense(50))
model.add(Dropout(0.2))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mean_squared_error',metrics=['accuracy'])
model.optimizer.learning_rate = 0.0001
Test and Train Prediction Plot
Epochs

your structure seems correct. try my code.
from keras.models import Sequential
from keras.layers import LSTM, Dense,Dropout, Bidirectional
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
#from keras.utils import plot_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.layers.merge import Concatenate
import matplotlib.gridspec as gridspec
import random
import scikitplot as skplot
import datetime
from datetime import date
from pandas_datareader import data as pdr
def create_dataset(dataset, look_back=3):
dataX, dataY = [], []
for i in range(len(dataset)-look_back-1):
a = dataset[i:(i+look_back)]
dataX.append(a)
dataY.append(dataset[i + look_back])
return np.array(dataX), np.array(dataY)
COLUMNS=['your_data_column']
dataset=df[COLUMNS]
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(np.array(dataset).reshape(-1,1))
train_size = int(len(dataset) * 0.60)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size], dataset[train_size:len(dataset)]
look_back=3
trainX=[]
testX=[]
y_train=[]
n_future = 1
features=2
timeSteps=4
model = Sequential()
model.add(Bidirectional(LSTM(units=50, return_sequences=True,
input_shape=(X_train.shape[1], 1))))
model.add(LSTM(units= 50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 50, return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(units= 50))
model.add(Dropout(0.2))
model.add(Dense(units = n_future))
model.compile(optimizer="adam", loss="mean_squared_error", metrics=["acc"])

Related

How can I get the history of the KerasRegressor?

I want to get KerasRegressor history but all the time I get (...) object has no attribute 'History'
'''
# Regression Example With Boston Dataset: Standardized and Wider
import numpy as np
from pandas import read_csv
from keras.models import Sequential
from keras.layers import Dense
#from keras.wrappers.scikit_learn import KerasRegressor
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import keras.backend as K
# load dataset
dataframe = read_csv("Data 1398-2.csv")
dataset = dataframe.values
# split into input (X) and output (Y) variables
X = dataset[:,0:10]
Y = dataset[:,10]
############
from sklearn import preprocessing
from sklearn.metrics import r2_score
min_max_scaler = preprocessing.MinMaxScaler()
X_scale = min_max_scaler.fit_transform(X)
from sklearn.model_selection import train_test_split
X_train, X_val_and_test, Y_train, Y_val_and_test = train_test_split(X_scale, Y, test_size=0.25)
X_val, X_test, Y_val, Y_test = train_test_split(X_val_and_test, Y_val_and_test, test_size=0.55)
##################
# define wider model
def wider_model():
# create model
model = Sequential()
model.add(Dense(40, input_dim=10, kernel_initializer='normal', activation='relu'))
model.add(Dense(20, kernel_initializer='normal', activation='relu'))
model.add(Dense(1, kernel_initializer='normal'))
# Compile model
model.compile(loss='mean_squared_error',metrics=['mae'], optimizer='adam')
#history = model.fit(X, Y, epochs=10, batch_size=len(X), verbose=1)
return model
# evaluate model with standardized dataset
from keras.callbacks import History
estimators = []
estimators.append(('standardize', StandardScaler()))
estimators.append(('mlp',KerasRegressor(model=wider_model, epochs=100, batch_size=2, verbose=0) ))
pipeline = Pipeline(estimators)
kfold = KFold(n_splits=5)
results = cross_val_score(pipeline, X_train, Y_train, cv=kfold)
print("Wider: %.2f (%.2f) MSE" % (results.mean(), results.std()))
import matplotlib.pyplot as plt
#plt.plot(history.history['loss'])
#plt.plot(history.history['val_loss'])
#plt.title('Model loss')
#plt.ylabel('Loss')
#plt.xlabel('Epoch')
#plt.legend(['Train', 'Val'], loc='upper right')
#plt.show()
'''
Model is at index 1 in your case, but you can also find it. Now to get history object:
pipeline.steps[1][1].model.history.history
If you are sure that Keras Model is always the last estimator, you can also use:
pipeline._final_estimator.model.history.history

Module object is not callable (kerastuner)

I am trying to optimize my keras NN using kerastuner but for some reason its giving me a 'module' object is not callable error on the tuner=randomsearch line.
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot as plt
from keras_tuner.tuners import randomsearch
from keras_tuner.engine import hyperparameters
import time
LOG_DIR = f"{int(time.time())}"
def build_model(hp):
model = Sequential()
model.add(Dense(hp.Int("input_units", min_value=1, max_value=105, step=1), input_dim=X_train.shape[1], activation='relu'))
for i in range(hp.Int("n_layers", 1, 5)):
model.add(Dense(hp.Int(f"conv_{i}_units", min_value=1, max_value=105, step=1), activation='relu'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
return model
tuner = randomsearch(build_model, objective = "val_accuracy", max_trials = 1, executions_per_trial = 1, directory = LOG_DIR)
tuner.search(x=X_train, y=y_train, epochs=1, batch_size=146, validation_data=(X_test, y_test))
I figured it out I imported the wrong randomsearch and hyperparameters.

Keras LSTM to Pytorch

I am using the following code to apply sequential LSTM to time-series data with one value. It works fine with a Keras version. I am wondering how could I do the same using PyTorch?
import tensorflow
from tensorflow.keras import optimizers
from tensorflow.keras import losses
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, Embedding, LSTM
from tensorflow.keras.optimizers import RMSprop, Adam, Nadam
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import TensorBoard
# training_dataset.shape = (303, 24, 1)
time_steps = 24
metric = 'mean_absolute_error'
model = Sequential()
model.add(LSTM(units=32, activation='tanh', input_shape=(time_steps, 1), return_sequences=True))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam', loss='mean_absolute_error', metrics=[metric])
print(model.summary())
batch_size=32
epochs=20
model.fit(x=training_dataset, y=training_dataset,
batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(training_dataset, training_dataset),
callbacks=[TensorBoard(log_dir='../logs/{0}'.format(tensorlog))])
testing_pred = model.predict(x=testing_dataset)
You can check the pytorch documentation for that: https://pytorch.org/docs/master/generated/torch.nn.LSTM.html
the simplest code is the following:
import torch, torch.nn as nn, torch.optim.Adam as Adam
model = nn.Sequential(nn.LSTM(input_size=1, hidden_size=32, output_size=1), nn.Sigmoid)
opt = Adam(model.parameters())
loss_func = nn.MSELoss()
for (x, y) in dataloader:
opt.zero_grad()
pred = model(x)
loss = loss_func(y, pred)
loss.backward()
opt.step()

Error when checking input: expected lstm_132_input to have 3 dimensions, but got array with shape (23, 1, 3, 1)

I have a data set include with temperature, humidity and wind. Here I want to predict future temperature value in next hour.
I used LSTM to predict future temperature value.
But when I run the model it showed up this error Error when checking input: expected lstm_132_input to have 3 dimensions, but got array with shape (23, 1, 3, 1)
Can anyone help me to solve this problem?
Here is my code:
import datetime
import time
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from keras.layers.core import Dense, Dropout, Activation
from keras.activations import linear
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
data = pd.read_csv('data6.csv' , sep=',')
data['date'] = pd.to_datetime(data['date'] + " " + data['time'], format='%m/%d/%Y %H:%M:%S')
data.set_index('time', inplace=True)
data = data.values
data = data.astype('float32')
# normalize the dataset
def create_data(train,X,n_out=1):
#data = np.reshape(train, (train.shape[0], train_shape[1], train_shape[2]))
x,y=list(),list()
start =0
for _ in range(len(data)):
in_end = start+X
out_end= in_end + n_out
if out_end < len(data):
x_input = data[start:in_end]
x.append(x_input)
y.append(data[in_end:out_end,0])
start +=1
return np.array(x),np.array(y)
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
# split into train and test sets
train = int(len(data) * 0.6)
test = len(data) - train
train, test = data[0:train,:], data[train:len(data),:]
X=1
x_train, y_train = create_data(train,X)
x_test, y_test = create_data(test,X)
x_train=x_train.reshape(x_train.shape +(1,))
x_test=x_test.reshape(x_test.shape + (1,))
n_timesteps, n_features, n_outputs = x_train.shape[1], x_train.shape[2], x_train.shape[1]
model = Sequential()
model.add(LSTM(8, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dense(8,activation='relu'))
model.add(Dense(n_outputs))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(x_train,y_train, epochs=10,batch_size=1, verbose=0)
My csv file:
My csv file.
My error:
model summary :
you need to add activation to your last layer
model = Sequential()
model.add(LSTM(8, activation='relu', input_shape=(n_timesteps, n_features)))
model.add(Dense(8,activation='relu'))
# here
model.add(Dense(n_outputs,activation='relu'))
model.compile(loss='mse', optimizer='adam')
# fit network
model.fit(x_train,y_train, epochs=10,batch_size=1, verbose=0)

Keras Conv1D for Time Series

I am just a novice in area of deep learning.
I made my first basic attempt with Keras Conv1D. Not sure what I did and whether I did it right. My input data is simply total sales by every week (total of 313 weeks), for stores across US and with a time step of 1.
Here is my code:
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return numpy.array(dataX), numpy.array(dataY)
seed = 7
numpy.random.seed(seed)
dataframe = read_csv('D:/MIS793/Dataset/Academic Dataset External 2/Python scripts/totalsale _byweek.csv', usecols=[1], engine='python')
plt.plot(dataframe)
plt.show()
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1], 1).astype('float32')
testX = testX.reshape(testX.shape[0], testX.shape[1], 1).astype('float32')
model = Sequential()
model.add(Conv1D(filters=10, kernel_size=1, padding='same', strides=1, activation='relu',input_shape=(1,1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(250, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])
print(model.summary())
model.fit(trainX, trainY, validation_data=(testX, testY), epochs=10, batch_size=100)
scores = model.evaluate(testX, testY, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
Not sure about few things here:
Reshaping of trainX and testX.
Value of kernel_size and input_shape
My idea here is it's just one vector of sales value. 10 filters, each of size 1 move from one value to another. Input shape is of the format time step, dimensions.
I only got accuracy of 10.91%! So my first question is whether I am feeding in the right parameters.
Thanks
ASC
With model.metrics_names you can get the labels of your scores variable.
In your case it will be ['loss', 'mean_absolute_error'].
So what you are printing is not the accuracy, but the mae, multiplied by 100.
I tried using accuracy instead of mae. However I got accuracy as 0%. Just wondering as this was about predicting numerical values, should I really use accuracy? Here is my latest code.
from pandas import read_csv
import matplotlib.pyplot as plt
import numpy
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
def create_dataset(dataset, look_back=1):
dataX, dataY = [], []
for i in range(len(dataset)-look_back):
a = dataset[i:(i+look_back), 0]
dataX.append(a)
dataY.append(dataset[i + look_back, 0])
return numpy.array(dataX), numpy.array(dataY)
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
dataframe = read_csv('D:/MIS793/Dataset/Academic Dataset External 2/Python scripts/totalsale _byweek.csv', usecols=[1], engine='python')
plt.plot(dataframe)
plt.show()
dataset = dataframe.values
dataset = dataset.astype('float32')
# normalize the dataset
scaler = MinMaxScaler(feature_range=(0, 1))
dataset = scaler.fit_transform(dataset)
train_size = int(len(dataset) * 0.67)
test_size = len(dataset) - train_size
train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
# reshape into X=t and Y=t+1
look_back = 1
trainX, trainY = create_dataset(train, look_back)
testX, testY = create_dataset(test, look_back)
trainX = trainX.reshape(trainX.shape[0], trainX.shape[1],1).astype('float32')
testX = testX.reshape(testX.shape[0], testX.shape[1],1).astype('float32')
model = Sequential()
model.add(Conv1D(filters=20, kernel_size=1, padding='same', strides=1, activation='relu',input_shape=(1,1)))
model.add(MaxPooling1D(pool_size=1))
model.add(Conv1D(filters=10, kernel_size=1, padding='same', strides=1, activation='relu'))
model.add(MaxPooling1D(pool_size=1))
model.add(Flatten())
model.add(Dense(4, activation='relu'))
model.add(Dense(1, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['accuracy'])
print(model.summary())
model.fit(trainX, trainY, validation_data=(testX, testY), epochs=10, batch_size=100)
scores = model.evaluate(testX, testY, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))
OR should I go with MAE?
If I go with MAE, my scores will look like below:
[0.12740663779013364, 0.31208728355111426]
First one is loss and second one is MAE. Isn't that a better metrics in this case?
The final line will be like this:
print("MAE: %.2f%%" % (scores[1]))
Thanks
Anindya

Resources