Multivariate Encoder-Decoder Model approaches value

Multivariate Encoder-Decoder Model approaches value - keras

I am creating my first multivariate multistep encoder-decoder LSTM to forecast revenues.
As you can see, the values move towards a value and then stop at that value. The aim is to create a forecast for a longer period, but there is no deviation at all from this standard value after the first week.
What is wrong and what can I do? To me it doesn't look like it is working at all.
code:
class ModelTrainer:
def __init__(self, prediction_length=30, offset=1):
self.prediction_length = prediction_length
self.offset = offset
self._setup_values()
self.use_scaling = True
self.__prepare_data()
def _setup_values(self):
# Model configuration
self.additional_metrics = ['accuracy']
self.embedding_output_dims = 15
self.max_sequence_length = 300
self.num_distinct_words = 5000
self.verbosity_mode = 1
# DATA
self.WINDOW_LENGTH = 70 # ! SHOULD BE ADJUSTED TO THE AMOUNT OF FORECASTING DAYS
self.SAMPLING_RATE = 1
self.BATCH_SIZE = 128
# MODEL
self.DROPOUT = 0.3
self.NODES_PER_LAYER = 256
self.NUMBER_OF_LAYERS = 3
# TRAINING
self.LEARNING_RATE = 0.001
self.OPTIMIZER = Adam(learning_rate=self.LEARNING_RATE)
self.VALIDATION_SPLIT = 0.20
self.NUMBER_OF_EPOCHS = 10
self.TEST_SIZE = 0.1
self.RANDOM_STATE = 123
self.LOSS_FUNCTION = MeanSquaredError()
def __import_data(self):
self.series = DataOrganizer().df
def __prepare_data(self):
self.__import_data()
self.scaler = preprocessing.MinMaxScaler()
data_scaled = self.scaler.fit_transform(self.series)
self.features, self.target = self._create_feature_target_values_window(
data_scaled)
def _create_feature_target_values_window(self, data):
self.number_of_output_columns = 4
feature_data = data
target_data = data[:, :self.number_of_output_columns]
features, target = list(), list()
in_start = 0
for _ in range(len(data)):
in_end = in_start + self.WINDOW_LENGTH
out_end = in_end + self.prediction_length
if out_end <= len(data):
features.append(feature_data[in_start:in_end, :])
target.append(
target_data[in_end:out_end, 0:self.number_of_output_columns])
in_start += 1
return np.array(features), np.array(target)
def __create_LSTM_model(self):
num_feature_columns = self.features.shape[2]
num_output_columns = self.target.shape[2]
model = Sequential()
model.add(LSTM(self.NODES_PER_LAYER, input_shape=(
self.WINDOW_LENGTH, num_feature_columns)))
model.add(Dropout(self.DROPOUT))
model.add(RepeatVector(self.prediction_length))
model.add(LSTM(self.NODES_PER_LAYER, return_sequences=True))
model.add(Dropout(self.DROPOUT))
model.add(TimeDistributed(Dense(self.NODES_PER_LAYER)))
model.add(Dropout(self.DROPOUT))
model.add(TimeDistributed(Dense(num_output_columns)))
model.summary()
return model
def train_model(self, callbacks=[]):
model = self.__create_LSTM_model()
model.compile(loss=self.LOSS_FUNCTION,
optimizer=self.OPTIMIZER,
metrics=['accuracy', MeanAbsoluteError()]
)
model.fit(
x=self.features,
y=self.target,
epochs=self.NUMBER_OF_EPOCHS,
validation_split=self.TEST_SIZE,
shuffle=False,
callbacks=callbacks
)
self.model = model
def create_forecast(self):
prediction = self.model.predict(self.features[-1:])
# prediction = self.model.predict(self.features[-30:-29]) # Show forecast from a month old
test_X = self.features.copy()
test_X = test_X[:self.prediction_length,
:1, self.number_of_output_columns:]
test_X = test_X.reshape(
self.prediction_length, self.series.shape[1] - self.number_of_output_columns)
prediction = prediction.reshape(self.prediction_length,
self.number_of_output_columns)
inv_yhat = np.concatenate((prediction, test_X), axis=1)
inv_yhat = self.scaler.inverse_transform(inv_yhat)
prediction_df = pd.DataFrame(
inv_yhat, columns=self.scaler.feature_names_in_)
first_date = self.series.last_valid_index() + timedelta(days=1)
last_date = first_date + timedelta(days=self.prediction_length-1)
days = pd.date_range(first_date, last_date, freq='D')
prediction_df.set_index(days, inplace=True)
prediction_df = prediction_df[self.series.columns[0:4]]
Actual
Forecast:
(I know the x-axis description is incorrect. Don't worry about it)

Related

how can I predict only 5 days price in this LSTM model (by pytorch)

class StockDataset(Dataset):
#데이터 셋은 i번째 레코드 값을 주는 역할 수행
def __init__(self, symbol, x_frames, y_frames, start, end):
self.symbol = symbol
self.x_frames = x_frames
self.y_frames = y_frames
self.start = datetime.datetime(*start)
self.end = datetime.datetime(*end)
#위에서 지정했던 데이터, 날짜 값들 다 받아옴
self.data = pdr.DataReader(self.symbol, 'yahoo', self.start, self.end)
def __len__(self):
return len(self.data) - (self.x_frames + self.y_frames) + 1
def __getitem__(self, idx):
global data
#global data_set
#데이터셋 i번째 값 입력받았을때 그걸 출력해줌 데이터를 '리스트'화 하는 것
idx += self.x_frames
data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
data = data[['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']]
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1)) #로그수익률 변환 한뒤, 혹시모를 결측값 위해 1더해줌
global x_ex
global y_ex
x_ex= data[:self.x_frames]
y_ex= data[self.x_frames:]
data = data.values #numpy array로 변환한거
X = data[:self.x_frames]
y = data[self.x_frames:]
return X, y
This one is dataset
class LSTM(nn.Module):
#50분 이후부터 모델설명
def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.batch_size = batch_size
self.dropout = dropout
self.use_bn = use_bn
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
self.hidden = self.init_hidden()
self.regressor = self.make_regressor()
def init_hidden(self):
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def make_regressor(self):
layers = []
if self.use_bn:
layers.append(nn.BatchNorm1d(self.hidden_dim))
layers.append(nn.Dropout(self.dropout))
layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
layers.append(nn.ReLU())
layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
regressor = nn.Sequential(*layers)
return regressor
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
return y_pred
This one is model
def test(model, partition, args):
global y_true
global y_pred
global X
testloader = DataLoader(partition['test'],
batch_size=args.batch_size,
shuffle=False, drop_last=True)
model.eval()
test_acc = 0.0
with torch.no_grad():
for i, (X, y) in enumerate(testloader):
X = X.transpose(0, 1).float().to(args.device)
y_true = y[:, :, 3].float().to(args.device)
model.hidden = [hidden.to(args.device) for hidden in model.init_hidden()]
y_pred = model(X)
test_acc += metric(y_pred, y_true)[0]
test_acc = test_acc / len(testloader)
return test_acc
This is test data loader.
# ====== Random Seed Initialization ====== #
seed = 666
np.random.seed(seed)
torch.manual_seed(seed)
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_lr"
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ====== Data Loading ====== #
args.symbol = '005930.KS' #원하는 종목
args.batch_size = 4 #배치사이즈
args.x_frames = 5 #수정x 이전 n일치 데이터 이게 너무 길면 1주일 예측 불가능
args.y_frames = 5 #수정y 이후 n일치 데이터 이게 너무 길면 1주일 예측 불가능
# ====== Model Capacity ===== #
args.input_dim = 6
args.hid_dim = 50
args.n_layers = 2 # (은닉층의 레이어 갯수) https://justkode.kr/deep-learning/pytorch-rnn링크 참고
# ====== Regularization ======= #
args.l2 = 0.0001
args.dropout = 0.3
args.use_bn = True
# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.001
args.epoch = 1
# ====== Experiment Variable ====== #
name_var1 = 'lr' # lr=러닝레이트
name_var2 = 'n_layers'#뉴럴 네트워크 몇개를 쌓을것인지?
list_var1 = [0.001, 0.0001, 0.00001]
list_var2 = [1,2,3]
#데이터셋 실제 형성
trainset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2012,1,1), (2021,1,1)) #학습기간
valset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2021,1,2), (2021,12,30)) #검증기간 최소 +6 월, +19 일 안하면 float division by zero 에러 발생 왜?? 21년 기준
testset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2022,1,10), (2022,1,14)) #테스트기간 최소 +6 월, + 25일 안하면 float division by zero 에러 발생. 22년기준
#기간이 일정 영업일 이상을 요구하는듯? <<146 영업일 이상 데이터 요구. 그만큼 안주면 오류남 왜??
partition = {'train': trainset, 'val':valset, 'test':testset}
for var1 in list_var1:
for var2 in list_var2:
setattr(args, name_var1, var1)
setattr(args, name_var2, var2)
print(args)
setting, result = experiment(partition, deepcopy(args))
save_exp_result(setting, result)
#꼭 디렉토리에 있는 파일들 지운다음에 그래프 그려야한다. 안그러면 결과값 전부 겹쳐서 나옴
This one is hyper parameter regulate.
I wonder how can I get result when I set testset length in 5days? (like (2022,1,10) (2022,1,14))
This cord didn't work when I set testset length at least 7month (maybe + 146 trade day)
error is float divided by zero. (when I use lower 146 days.)
if I set length +146 days, then codes work well.
I think this code cause error:
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
log data was so small, so error occurred. (my opinion)
data is yahoo finance data. Thanx to read
When I # below code, then data got infinite.
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))

Must the input height of a 1D CNN be constant?

I'm currently doing my honours research project on online/dynamic signature verification. I am using the SVC 2004 dataset (Task 2). I have done the following data processing:
def load_dataset_normalized(path):
file_names = os.listdir(path)
num_of_persons = len(file_names)
initial_starting_point = np.zeros(np.shape([7]))
x_dataset = []
y_dataset = []
for infile in file_names:
full_file_name = os.path.join(path, infile)
file = open(full_file_name, "r")
file_lines = file.readlines()
num_of_points = int(file_lines[0])
x = []
y = []
time_stamp = []
button_status = []
azimuth_angles = []
altitude = []
pressure = []
for idx, line in enumerate(file_lines[1:]):
idx+=1
nums = line.split(' ')
if idx == 1:
nums[2] = 0
initial_starting_point = nums
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(0)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
else:
x.append(int(nums[0]))
y.append(int(nums[1]))
time_stamp.append(10)
button_status.append(int(nums[3]))
azimuth_angles.append(int(nums[4]))
altitude.append(int(nums[5]))
pressure.append(int(nums[6]))
max_x = max(x)
max_y = max(y)
max_azimuth_angle = max(azimuth_angles)
max_altitude = max(altitude)
max_pressure = max(pressure)
min_x = min(x)
min_y = min(y)
min_azimuth_angle = min(azimuth_angles)
min_altitude = min(altitude)
min_pressure = min(pressure)
#Alignment normalization:
for i in range(num_of_points):
x[i] -= int(initial_starting_point[0])
y[i] -= int(initial_starting_point[1])
azimuth_angles[i] -= int(initial_starting_point[4])
altitude[i] -= int(initial_starting_point[5])
pressure[i] -= int(initial_starting_point[6])
#Size normalization
for i in range(num_of_points):
x[i] = ((x[i] - max_x) / (min_x - max_x))
y[i] = ((y[i] - max_y) / (min_y - max_y))
azimuth_angles[i] = ((azimuth_angles[i] - max_azimuth_angle) / (min_azimuth_angle - max_azimuth_angle))
altitude[i] = ((altitude[i] - max_altitude) / (min_altitude - max_altitude))
pressure[i] = ((pressure[i] - max_pressure) / (min_pressure - max_pressure))
#data points to dataset
x_line = []
for i in range (num_of_points):
x_line.append([x[i], y[i], time_stamp[i], button_status[i], azimuth_angles[i], altitude[i], pressure[i]])
if i == num_of_points-1:
x_dataset.append(x_line)
infile_without_extension = infile.replace('.TXT','')
index_of_s = infile_without_extension.find("S")
index_of_num = index_of_s + 1
sig_ID = int(infile_without_extension[index_of_num:])
if sig_ID < 21:
y_dataset.append([1,0])
else:
y_dataset.append([0,1])
x_dataset = np.asarray(x_dataset)
y_dataset = np.asarray(y_dataset)
return x_dataset, y_dataset
I also have another method that takes the values as they are in the text file and created an "original" dataset.
Now, the aim of my research is to create a CRNN (convolutional recurrent neural network) that can identify if a signature is authentic or forged. Here is the code for the model:
class crnn_model:
def __init__(self, trainX, trainy, testX, testy, optimizer_method):
self.trainX = trainX
self.trainy = trainy
self.testX = testX
self.testy = testy
self.evaluate_model(optimizer_method)
def evaluate_model(self, optimizer_method):
verbose, epochs, batch_size = 0, 40, 10
n_timesteps, n_features, n_outputs = len(self.trainX), 7, 2
print(n_timesteps)
model = keras.Sequential()
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features), use_bias=True))
model.add(keras.layers.Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(keras.layers.Dropout(0.5))
model.add(keras.layers.MaxPooling1D(pool_size=2))
model.add(keras.layers.Flatten())
model.add(keras.layers.LSTM(2, input_shape=[30592,1], return_sequences=True))
model.summary()
# Compile the model
model.compile(optimizer=optimizer_method, loss='categorical_crossentropy', metrics=['accuracy'])
#fit model
model.fit(self.trainX, self.trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
# evaluate model
_, accuracy = model.evaluate(self.testX, self.testy, batch_size=batch_size, verbose=0)
return accuracy
Here is the problem I am having: the number of points used to store each signature is different, hence making the input height of the input matrix vary from one signature to the next. Must I now force the dataset to some uniform/constant number of points?
Much appreciated for your time.

DDPG - Actor Critic Network does not converge with continous Actionspace

I'm currently a bit confused. I implemented an Actor-Critic Network and depending on the setup ist either begins to converge a little but produces values far from right. Or it produces the nearly the same loss values over and over again but right from the start produces values which are kind of correct.
I really have no clue how that is possible.
This is my current Model which produces values but does not converge:
def create_actor_model(self):
state_input = Input(shape=self.observation_space_shape)
h1 = Dense(18, activation='linear')(state_input)
h1l = LeakyReLU(alpha=0.01)(h1)
h3 = Dense(18, activation='tanh')(h1l)
h3n = BatchNormalization()(h3)
output = Dense(self.action_space_shape[0], activation='tanh')(h3n)
model = Model(input=state_input, output=output)
adam = Adam(lr=self.action_space_shape)
model.compile(loss="mse", optimizer=adam)
return state_input, model
def create_critic_model(self):
state_input = Input(shape=self.observation_space_shape)
state_h1 = Dense(18, activation='relu')(state_input)
state_h2 = Dense(36)(state_h1)
action_input = Input(shape=self.action_space_shape)
action_h1 = Dense(36)(action_input)
merged = Add()([state_h2, action_h1])
l_h1 = LeakyReLU(alpha=0.01)(merged)
merged_h1 = Dense(18, activation='tanh')(l_h1)
h1n = BatchNormalization()(merged_h1)
output = Dense(1, activation='tanh')(h1n)
model = Model(input=[state_input, action_input], output=output)
adam = Adam(lr=self.action_space_shape)
model.compile(loss="mse", optimizer=adam, metrics=['mae', 'mse', 'msle'])
return state_input, action_input, model
def _train_actor_batch(self, batch_size, s_batch, a_batch, r_batch, s2_batch):
predicted_action = self.actor_model.predict_on_batch(s_batch)
grads = self.sess.run(self.critic_grads, feed_dict={
self.critic_state_input: s_batch,
self.critic_action_input: predicted_action
})
self.sess.run(self.optimize, feed_dict={
self.actor_state_input: s_batch,
self.actor_critic_grad: grads[0]
})
def _train_critic_batch(self, batch_size, s_batch, a_batch, r_batch, s2_batch):
target_action = self.target_actor_model.predict_on_batch(s2_batch)
future_reward = self.target_critic_model.predict_on_batch([s2_batch, target_action])
rewards = []
for k in range(batch_size):
this_future_reward = future_reward[k] if batch_size > 1 else future_reward
rewards.append(r_batch[k] + self.gamma * this_future_reward)
return self.critic_model.train_on_batch([s_batch, a_batch], np.reshape(rewards, batch_size))
def replay(self, batch_size):
memory_length = len(self.memory)
if memory_length < batch_size:
samples = random.sample(self.memory, memory_length)
else:
samples = random.sample(self.memory, batch_size)
s_batch = np.array([cur_state[0] for cur_state, _, _, _ in samples])
a_batch = np.array([float(action[0]) for _, action, _, _ in samples])
r_batch = np.array([reward[0] for _, _, reward, _ in samples])
s2_batch = np.array([new_state[0] for _, _, _, new_state in samples])
critic_loss = self._train_critic_batch(len(s_batch), s_batch, a_batch, r_batch, s2_batch)
self._train_actor_batch(len(s_batch), s_batch, a_batch, r_batch, s2_batch)
self.update_target()
return critic_loss
def _update_actor_target(self):
actor_model_weights = self.actor_model.get_weights()
actor_target_weights = self.target_actor_model.get_weights()
for i in range(len(actor_target_weights)):
actor_target_weights[i] = actor_model_weights[i] * self.tau + actor_target_weights[i] * (1 - self.tau)
self.target_actor_model.set_weights(actor_target_weights)
def _update_critic_target(self):
critic_model_weights = self.critic_model.get_weights()
critic_target_weights = self.target_critic_model.get_weights()
for i in range(len(critic_target_weights)):
critic_target_weights[i] = critic_model_weights[i] * self.tau + critic_target_weights[i] * (1 - self.tau)
self.target_critic_model.set_weights(critic_target_weights)
def update_target(self):
self._update_actor_target()
self._update_critic_target()
def __init__(self):
self.memory = deque(maxlen=2000)
self.actor_state_input, self.actor_model = self.create_actor_model()
_, self.target_actor_model = self.create_actor_model()
self.actor_critic_grad = tf.placeholder(tf.float32, [None, self.action_space_shape[0]])
actor_model_weights = self.actor_model.trainable_weights
self.actor_grads = tf.gradients(self.actor_model.output,
actor_model_weights, -self.actor_critic_grad)
grads = zip(self.actor_grads, actor_model_weights)
self.optimize = tf.train.AdamOptimizer(self.learning_rate).apply_gradients(grads)
self.critic_state_input, self.critic_action_input, \
self.critic_model = self.create_critic_model()
_, _, self.target_critic_model = self.create_critic_model()
self.critic_grads = tf.gradients(self.critic_model.output,
self.critic_action_input)
self.sess.run(tf.initialize_all_variables())
And then i am training with the following method which is called for each epoch (at the end, the memory which is getting cleared is the experience-replay memory):
def train(self, states, epoch, env, is_new_epoch):
train_size = int(len(states) * 0.70)
train = dict(list(states.items())[0:train_size])
test = dict(list(states.items())[train_size:len(states)])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
working_states = copy(train)
critic_eval = list()
rewards = dict()
for last_day, (last_state_vec, _, last_action) in working_states.items():
this_day = last_day + timedelta(days=1)
if this_day in working_states:
(new_state_vec, _, _) = working_states.get(this_day)
rewards[last_day] = env.get_reward_by_states(last_state_vec, new_state_vec)
amt = len(working_states)
i = 0
for last_day, (last_state_vec, _, last_action) in working_states.items():
i+= 1
this_day = last_day + timedelta(days=1)
if this_day in working_states:
(new_state_vec, _, _) = working_states.get(this_day)
reward = np.reshape(rewards[last_day], [1, ])
self.remember(last_state_vec, [last_action], reward, new_state_vec)
new_eval = self.replay(env.batch_size)
critic_eval.append(new_eval)
self.memory.clear()
These are the loss values i got over 15 epochs:
One Sample as it comes from the memory:
state
[8 79 48246 53607 29 34 37 Decimal('1.0000000000') 6]
action
0.85
reward
0.2703302
next state
[9 79 48074 57869 27 28 32 Decimal('1.0000000000') 0]

Tensorflow : ValueError: Can't load save_path when it is None

import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.

The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.

How do I predict the sentiment in the LSTM model using python3 in tensorflow?

wordsList = np.load('training_data/wordsList.npy')
wordsList = wordsList.tolist() #Originally loaded as numpy array
wordsList = [word.decode('UTF-8') for word in wordsList] #Encode words as UTF-8
wordVectors = np.load('training_data/wordVectors.npy')
loaded some positiveFiles and negativeFiles in the variable
with tf.device('/gpu:0'):
ids = np.zeros((numFiles, maxSeqLength), dtype='int32')
fileCounter = 0
for pf in positiveFiles:
with open(pf, "r") as f:
indexCounter = 0
line=f.readline()
cleanedLine = cleanSentences(line)
split = cleanedLine.split()
for word in split:
try:
ids[fileCounter][indexCounter] = wordsList.index(word)
except ValueError:
ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
#print('value :' + str(ids))
indexCounter = indexCounter + 1
if indexCounter >= maxSeqLength:
break
fileCounter = fileCounter + 1
for nf in negativeFiles:
with open(nf, "r") as f:
indexCounter = 0
line=f.readline()
cleanedLine = cleanSentences(line)
split = cleanedLine.split()
for word in split:
try:
ids[fileCounter][indexCounter] = wordsList.index(word)
except ValueError:
ids[fileCounter][indexCounter] = 399999 #Vector for unkown words
# print('value :' + str(ids))
indexCounter = indexCounter + 1
if indexCounter >= maxSeqLength:
break
fileCounter = fileCounter + 1
#Pass into embedding function and see if it evaluates.
np.save('idsMatrix', ids)
batchSize = 24
Training and testing methods
def getTrainBatch():
labels = []
arr = np.zeros([batchSize, maxSeqLength])
for i in range(batchSize):
if (i % 2 == 0):
num = randint(1,11499)
labels.append([1,0])
else:
num = randint(13499,24999)
labels.append([0,1])
arr[i] = ids[num-1:num]
return arr, labels
def getTestBatch():
labels = []
arr = np.zeros([batchSize, maxSeqLength])
for i in range(batchSize):
num = randint(11499,13499)
if (num <= 12499):
labels.append([1,0])
else:
labels.append([0,1])
arr[i] = ids[num-1:num]
return arr, labels
with tf.device('/gpu:0'):
batchSize = 24
lstmUnits = 64
numClasses = 2
iterations = 100000
tf.reset_default_graph()
labels = tf.placeholder(tf.float32, [batchSize, numClasses])
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength])
data = tf.Variable(tf.zeros([batchSize, maxSeqLength, numDimensions]), dtype=tf.float32)
data = tf.nn.embedding_lookup(wordVectors, input_data)
lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits)
lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.75)
value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32)
with tf.device('/gpu:0'):
weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses]))
bias = tf.Variable(tf.constant(0.1, shape=[numClasses]))
value = tf.transpose(value, [1, 0, 2])
last = tf.gather(value, int(value.get_shape()[0]) - 1)
prediction = (tf.matmul(last, weight) + bias)
correctPred = tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32))
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=labels))
optimizer = tf.train.AdamOptimizer().minimize(loss)
sess = tf.InteractiveSession()
saver = tf.train.Saver()
sess.run(tf.global_variables_initializer())
with tf.device('/gpu:0'):
for i in range(iterations):
nextBatch, nextBatchLabels = getTrainBatch();
sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels})
iterations = 10
for i in range(iterations):
nextBatch, nextBatchLabels = getTestBatch();
sess.run(accuracy, {input_data: nextBatch, labels: nextBatchLabels})
Here I am trying to predict the output in the form of 1 or 0 for a given sentence.
after loading this file from the checkpoint by this..How am I suppose to test the sentence is Positive(1) or Negative(0).
new_saver = tf.train.import_meta_graph('models/pretrained....')
new_saver.restore(sess, tf.train.latest_checkpoint('models/./'))
Please help.

Use naming for inputs and output, then retrieve the tensor from graph to do prediction; I have suggested few required changes and additional code to get prediction going
...
input_data = tf.placeholder(tf.int32, [batchSize, maxSeqLength], name='inputs')
...
prediction = (tf.matmul(last, weight) + bias)
# you may use softmax if you want probabilities for prediction, but not for calculating the loss
# prediction = tf.nn.softmax(prediction)
prediction = tf.identity(prediction, name='prediction')
...
with tf.device('/gpu:0'):
for i in range(iterations):
nextBatch, nextBatchLabels = getTrainBatch();
sess.run(optimizer, {input_data: nextBatch, labels: nextBatchLabels}
saver.save(sess, 'model')
code for restoring: here use the relative/absolute path to model.meta and model
new_saver = tf.train.import_meta_graph('/path/to/model.meta')
new_saver.restore(sess, '/path/to/model')
with tf.Session() as sess:
g = tf.get_default_graph()
inputs = g.get_tensor_by_name('inputs:0')
prediction = g.get_tensor_by_name('prediction:0')
prediction_ = sess.run(prediction, {inputs: your_inputs})

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Multivariate Encoder-Decoder Model approaches value - keras

Related

how can I predict only 5 days price in this LSTM model (by pytorch)

Must the input height of a 1D CNN be constant?

DDPG - Actor Critic Network does not converge with continous Actionspace

Tensorflow : ValueError: Can't load save_path when it is None

How do I predict the sentiment in the LSTM model using python3 in tensorflow?

Categories

Resources