I am trying to solve a ANN model using Tensorflow. At the moment, I am able to run the program as a long string of text. Now however, I would like to convert my code to something that is easier to use. So I converted my code to a class. Here is what I did. (basically copied the entire set of code to a class.
import os
import tensorflow as tf
class NNmodel:
def __init__(self,
layers, inpShape, outShape,
features,
learning_rate=0.1, nSteps = 100,
saveFolder='models'):
self.layers = layers
self.features = features
self.learning_rate = learning_rate
self.saveFolder = saveFolder
self.nSteps = 100
self.d = tf.placeholder(shape = inpShape, dtype = tf.float32, name='d') # input layer
self.dOut = tf.placeholder(shape = outShape, dtype = tf.float32, name='dOut') # output layer
self.weights = []
self.biases = []
self.compute = [self.d]
layerSizes = [self.features] + [l['size'] for l in self.layers]
for i, (v1, v2) in enumerate(zip(layerSizes, layerSizes[1:])):
self.weights.append(
tf.Variable(np.random.randn(v1, v2)*0.1, dtype = tf.float32, name='W{}'.format(i)))
self.biases.append(
tf.Variable(np.zeros((1,1)), dtype = tf.float32, name='b{}'.format(i)) )
self.compute.append( tf.matmul(
self.compute[-1], self.weights[i]) + self.biases[i] )
if self.layers[i]['activation'] == 'tanh':
self.compute.append( tf.tanh( self.compute[-1] ) )
if self.layers[i]['activation'] == 'relu':
self.compute.append( tf.nn.relu( self.compute[-1] ) )
if self.layers[i]['activation'] == 'sigmoid':
self.compute.append( tf.sigmoid ( self.compute[-1] ) )
self.result = self.compute[-1]
self.delta = self.dOut - self.result
self.cost = tf.reduce_mean(self.delta**2)
self.optimizer = tf.train.AdamOptimizer(
learning_rate = self.learning_rate).minimize(self.cost)
return
def findVal(self, func, inpDict, restorePt=None):
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
if restorePt is not None:
try:
saver.restore(sess, tf.train.latest_checkpoint(restorePt) )
print('Session restored')
except Exception as e:
print('Unable to restore the session ...')
return None
else:
print('Warning, no restore point selected ...')
result = sess.run(func, feed_dict = inpDict)
sess.close()
return result
def optTF(self, inpDict, printSteps=50, modelFile=None):
cost = []
saver = tf.train.Saver()
sess = tf.Session()
init = tf.global_variables_initializer()
sess.run(init)
print('x'*100)
for i in range(self.nSteps):
# First run the optimizer ...
sess.run(self.optimizer, feed_dict = inpDict)
# Save all the data you want to save
c = sess.run( self.cost, feed_dict = inpDict)
cost.append(c)
if (i%printSteps) == 0:
print('{:5d}'.format(i))
result = self.run(self.result, feed_dict = inpDict)
if modelFile is not None:
path = saver.save(sess, os.path.join(
self.saveFolder, modelFile))
print('Model saved in: {}'.format(path))
else:
print('Warning! model not saved')
sess.close()
return cost, result
When I use this model, I see that there seems to be a problem:
N = 500
features = 2
nSteps = 1000
X = [ (np.random.random(N))*np.random.randint(1000, 2000) for i in range(features)]
X = np.array([np.random.random(N), np.random.random(N)])
data = [X.T, X[0].reshape(-1, 1)]
layers = [
{'name':'6', 'size': 10, 'activation':'tanh'},
{'name':'7', 'size': 1, 'activation':'linear'},
]
m1 = NNmodel(layers, inpShape=np.shape(data[0]), outShape = np.shape(data[1]),
features=features,
learning_rate=0.1, nSteps = 100,
saveFolder='models1')
d = tf.placeholder(shape = np.shape(data[0]), dtype = tf.float32, name='d_4')
dOut = tf.placeholder(shape = np.shape(data[1]), dtype = tf.float32, name='dOut')
m1.findVal(m1.result, {d: data[0], dOut:data[1]})
Now it appears that there is a mismatch between the placeholders that I am using d and dOut that I provide form outside, and the ones that are already present within the model self.d and self.dOut. How do I solve this problem?
Why not to just use the placeholders declared within the model?
m1.findVal(m1.result, {m1.d: data[0], m1.dOut:data[1]})
Related
Screenshots of all error messages are in the picture link
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm #产生进度条
import dataloader4kg
from sklearn.metrics import precision_score,recall_score,accuracy_score
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
class KGCN( nn.Module ):
def __init__( self, n_users, n_entitys, n_relations,
e_dim, adj_entity, adj_relation, n_neighbors,
aggregator_method = 'sum',
act_method = F.relu, drop_rate=0.5):
super( KGCN, self ).__init__()
self.e_dim = e_dim # 特征向量维度
self.aggregator_method = aggregator_method #消息聚合方法
self.n_neighbors = n_neighbors #邻居的数量
self.user_embedding = nn.Embedding( n_users, e_dim, max_norm = 1 )#
self.entity_embedding = nn.Embedding( n_entitys, e_dim, max_norm = 1)
self.relation_embedding = nn.Embedding( n_relations, e_dim, max_norm = 1)
self.adj_entity = adj_entity #节点的邻接列表
self.adj_relation = adj_relation #关系的邻接列表
#线性层
self.linear_layer = nn.Linear(
in_features = self.e_dim * 2 if self.aggregator_method == 'concat' else self.e_dim,
out_features = self.e_dim,
bias = True)
self.act = act_method #激活函数
self.drop_rate = drop_rate #drop out 的比率
def forward(self, users, items, is_evaluate = False):
neighbor_entitys, neighbor_relations = self.get_neighbors( items )
user_embeddings = self.user_embedding( users)
item_embeddings = self.entity_embedding( items )
#得到v波浪线
neighbor_vectors = self.__get_neighbor_vectors( neighbor_entitys, neighbor_relations, user_embeddings )
out_item_embeddings = self.aggregator( item_embeddings, neighbor_vectors,is_evaluate)
# print(self.user_embedding)
# print(self.entity_embedding)
# print(self.relation_embedding)
# print(out_item_embeddings)
out = torch.sigmoid( torch.sum( user_embeddings * out_item_embeddings, axis = -1 ) )
# print(out)
return out
def get_neighbors( self, items ):#得到邻居的节点embedding,和关系embedding
#[[1,2,3,4,5],[2,1,3,4,5]...[]]#总共batchsize个n_neigbor的id
entity_ids = [ self.adj_entity[item] for item in items ]
relation_ids = [ self.adj_relation[item] for item in items ]
neighbor_entities = [ torch.unsqueeze(self.entity_embedding(torch.LongTensor(one_ids)),0) for one_ids in entity_ids]
neighbor_relations = [ torch.unsqueeze(self.relation_embedding(torch.LongTensor(one_ids)),0) for one_ids in relation_ids]
# [batch_size, n_neighbor, dim]
neighbor_entities = torch.cat( neighbor_entities, dim=0 )
neighbor_relations = torch.cat( neighbor_relations, dim=0 )
return neighbor_entities, neighbor_relations
#得到v波浪线
def __get_neighbor_vectors(self, neighbor_entitys, neighbor_relations, user_embeddings):
# [batch_size, n_neighbor, dim]
user_embeddings = torch.cat([torch.unsqueeze(user_embeddings,1) for _ in range(self.n_neighbors)],dim=1)
# [batch_size, n_neighbor]
user_relation_scores = torch.sum(user_embeddings * neighbor_relations, axis=2)
# [batch_size, n_neighbor]
user_relation_scores_normalized = F.softmax(user_relation_scores, dim=-1)
# [batch_size, n_neighbor, 1]
user_relation_scores_normalized = torch.unsqueeze(user_relation_scores_normalized, 2)
# [batch_size, dim]
neighbor_vectors = torch.sum(user_relation_scores_normalized * neighbor_entitys, axis=1)
return neighbor_vectors
#经过进一步的聚合与线性层得到v
def aggregator(self,item_embeddings, neighbor_vectors, is_evaluate):
# [batch_size, dim]
if self.aggregator_method == 'sum':
output = item_embeddings + neighbor_vectors
elif self.aggregator_method == 'concat':
# [batch_size, dim * 2]
output = torch.cat([item_embeddings, neighbor_vectors], axis=-1)
else:#neighbor
output = neighbor_vectors
if not is_evaluate:
output = F.dropout(output, self.drop_rate)
# [batch_size, dim]
output = self.linear_layer(output)
return self.act(output)
#验证
def do_evaluate( model, testSet ):
testSet = torch.LongTensor(testSet)
model.eval()
with torch.no_grad():
user_ids = testSet[:, 0]
item_ids = testSet[:, 1]
labels = testSet[:, 2]
user_ids = user_ids.to(device)
item_ids = item_ids.to(device)
labels = labels.to(device)
logits = model( user_ids, item_ids, True )
predictions = [1 if i >= 0.5 else 0 for i in logits]
p = precision_score(y_true = labels, y_pred = predictions)
r = recall_score(y_true = labels, y_pred = predictions)
acc = accuracy_score(labels, y_pred = predictions)
return p,r,acc
def train( epochs, batchSize, lr,
n_users, n_entitys, n_relations,
adj_entity, adj_relation,
train_set, test_set,
n_neighbors,
aggregator_method = 'sum',
act_method = F.relu, drop_rate = 0.5, weight_decay=5e-4
):
model = KGCN( n_users, n_entitys, n_relations,
10, adj_entity, adj_relation,
n_neighbors = n_neighbors,
aggregator_method = aggregator_method,
act_method = act_method,
drop_rate = drop_rate ).to(device)
optimizer = torch.optim.Adam( model.parameters(), lr = lr, weight_decay = weight_decay )
loss_fcn = nn.BCELoss()
dataIter = dataloader4kg.DataIter()
print(len(train_set)//batchSize)
for epoch in range( epochs ):
total_loss = 0.0
for datas in tqdm( dataIter.iter( train_set, batchSize = batchSize ) ):
user_ids = datas[:, 0]
item_ids = datas[:, 1]
labels = datas[:, 2]
user_ids=user_ids.to(device)
item_ids = item_ids.to(device)
labels = labels.to(device)
logits = model.forward( user_ids, item_ids )
loss = loss_fcn( logits, labels.float() )
optimizer.zero_grad()
loss.backward()
optimizer.step()
total_loss += loss.item()
p, r, acc = do_evaluate(model,test_set)
print("Epoch {} | Loss {:.4f} | Precision {:.4f} | Recall {:.4f} | Accuracy {:.4f} "
.format(epoch, total_loss/(len(train_set)//batchSize), p, r, acc))
if __name__ == '__main__':
n_neighbors = 20
users, items, train_set, test_set = dataloader4kg.readRecData(dataloader4kg.Ml_100K.RATING,dataloader4kg.Ml_100K.KG)
entitys, relations, kgTriples = dataloader4kg.readKgData(dataloader4kg.Ml_100K.KG)
adj_kg = dataloader4kg.construct_kg(kgTriples)
adj_entity, adj_relation = dataloader4kg.construct_adj(n_neighbors, adj_kg, len(entitys))
train( epochs = 40, batchSize = 1024, lr = 0.001,
n_users = max( users ) + 1, n_entitys = max( entitys ) + 1,
n_relations = max( relations ) + 1, adj_entity = adj_entity,
adj_relation = adj_relation, train_set = train_set,
test_set = test_set, n_neighbors = n_neighbors,
aggregator_method = 'sum', act_method = F.relu, drop_rate = 0.5 )
This is the code of my model.I have already loaded the model into cuda, and the data has also been loaded into cuda during training. Why is there the problem that it is not on the same device? How should I modify it
I have already loaded the model into cuda, and the data has also been loaded into cuda during training.
Code loaded into gpu
Code loaded into gpu
class StockDataset(Dataset):
#데이터 셋은 i번째 레코드 값을 주는 역할 수행
def __init__(self, symbol, x_frames, y_frames, start, end):
self.symbol = symbol
self.x_frames = x_frames
self.y_frames = y_frames
self.start = datetime.datetime(*start)
self.end = datetime.datetime(*end)
#위에서 지정했던 데이터, 날짜 값들 다 받아옴
self.data = pdr.DataReader(self.symbol, 'yahoo', self.start, self.end)
def __len__(self):
return len(self.data) - (self.x_frames + self.y_frames) + 1
def __getitem__(self, idx):
global data
#global data_set
#데이터셋 i번째 값 입력받았을때 그걸 출력해줌 데이터를 '리스트'화 하는 것
idx += self.x_frames
data = self.data.iloc[idx-self.x_frames:idx+self.y_frames]
data = data[['High', 'Low', 'Open', 'Close', 'Adj Close', 'Volume']]
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1)) #로그수익률 변환 한뒤, 혹시모를 결측값 위해 1더해줌
global x_ex
global y_ex
x_ex= data[:self.x_frames]
y_ex= data[self.x_frames:]
data = data.values #numpy array로 변환한거
X = data[:self.x_frames]
y = data[self.x_frames:]
return X, y
This one is dataset
class LSTM(nn.Module):
#50분 이후부터 모델설명
def __init__(self, input_dim, hidden_dim, output_dim, num_layers, batch_size, dropout, use_bn):
super(LSTM, self).__init__()
self.input_dim = input_dim
self.hidden_dim = hidden_dim
self.output_dim = output_dim
self.num_layers = num_layers
self.batch_size = batch_size
self.dropout = dropout
self.use_bn = use_bn
self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)
self.hidden = self.init_hidden()
self.regressor = self.make_regressor()
def init_hidden(self):
return (torch.zeros(self.num_layers, self.batch_size, self.hidden_dim),
torch.zeros(self.num_layers, self.batch_size, self.hidden_dim))
def make_regressor(self):
layers = []
if self.use_bn:
layers.append(nn.BatchNorm1d(self.hidden_dim))
layers.append(nn.Dropout(self.dropout))
layers.append(nn.Linear(self.hidden_dim, self.hidden_dim // 2))
layers.append(nn.ReLU())
layers.append(nn.Linear(self.hidden_dim // 2, self.output_dim))
regressor = nn.Sequential(*layers)
return regressor
def forward(self, x):
lstm_out, self.hidden = self.lstm(x, self.hidden)
y_pred = self.regressor(lstm_out[-1].view(self.batch_size, -1))
return y_pred
This one is model
def test(model, partition, args):
global y_true
global y_pred
global X
testloader = DataLoader(partition['test'],
batch_size=args.batch_size,
shuffle=False, drop_last=True)
model.eval()
test_acc = 0.0
with torch.no_grad():
for i, (X, y) in enumerate(testloader):
X = X.transpose(0, 1).float().to(args.device)
y_true = y[:, :, 3].float().to(args.device)
model.hidden = [hidden.to(args.device) for hidden in model.init_hidden()]
y_pred = model(X)
test_acc += metric(y_pred, y_true)[0]
test_acc = test_acc / len(testloader)
return test_acc
This is test data loader.
# ====== Random Seed Initialization ====== #
seed = 666
np.random.seed(seed)
torch.manual_seed(seed)
parser = argparse.ArgumentParser()
args = parser.parse_args("")
args.exp_name = "exp1_lr"
args.device = 'cuda' if torch.cuda.is_available() else 'cpu'
# ====== Data Loading ====== #
args.symbol = '005930.KS' #원하는 종목
args.batch_size = 4 #배치사이즈
args.x_frames = 5 #수정x 이전 n일치 데이터 이게 너무 길면 1주일 예측 불가능
args.y_frames = 5 #수정y 이후 n일치 데이터 이게 너무 길면 1주일 예측 불가능
# ====== Model Capacity ===== #
args.input_dim = 6
args.hid_dim = 50
args.n_layers = 2 # (은닉층의 레이어 갯수) https://justkode.kr/deep-learning/pytorch-rnn링크 참고
# ====== Regularization ======= #
args.l2 = 0.0001
args.dropout = 0.3
args.use_bn = True
# ====== Optimizer & Training ====== #
args.optim = 'RMSprop' #'RMSprop' #SGD, RMSprop, ADAM...
args.lr = 0.001
args.epoch = 1
# ====== Experiment Variable ====== #
name_var1 = 'lr' # lr=러닝레이트
name_var2 = 'n_layers'#뉴럴 네트워크 몇개를 쌓을것인지?
list_var1 = [0.001, 0.0001, 0.00001]
list_var2 = [1,2,3]
#데이터셋 실제 형성
trainset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2012,1,1), (2021,1,1)) #학습기간
valset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2021,1,2), (2021,12,30)) #검증기간 최소 +6 월, +19 일 안하면 float division by zero 에러 발생 왜?? 21년 기준
testset = StockDataset(args.symbol, args.x_frames, args.y_frames, (2022,1,10), (2022,1,14)) #테스트기간 최소 +6 월, + 25일 안하면 float division by zero 에러 발생. 22년기준
#기간이 일정 영업일 이상을 요구하는듯? <<146 영업일 이상 데이터 요구. 그만큼 안주면 오류남 왜??
partition = {'train': trainset, 'val':valset, 'test':testset}
for var1 in list_var1:
for var2 in list_var2:
setattr(args, name_var1, var1)
setattr(args, name_var2, var2)
print(args)
setting, result = experiment(partition, deepcopy(args))
save_exp_result(setting, result)
#꼭 디렉토리에 있는 파일들 지운다음에 그래프 그려야한다. 안그러면 결과값 전부 겹쳐서 나옴
This one is hyper parameter regulate.
I wonder how can I get result when I set testset length in 5days? (like (2022,1,10) (2022,1,14))
This cord didn't work when I set testset length at least 7month (maybe + 146 trade day)
error is float divided by zero. (when I use lower 146 days.)
if I set length +146 days, then codes work well.
I think this code cause error:
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
log data was so small, so error occurred. (my opinion)
data is yahoo finance data. Thanx to read
When I # below code, then data got infinite.
data = data.apply(lambda x: np.log(x+1) - np.log(x[self.x_frames-1]+1))
I am using PyTorch Lightning version 1.4.0 and have defined the following class for the dataset:
class CustomTrainDataset(Dataset):
'''
Custom PyTorch Dataset for training
Args:
data (pd.DataFrame) - DF containing product info (and maybe also ratings)
all_itemIds (list) - Python3 list containing all Item IDs
'''
def __init__(self, data, all_orderIds):
self.users, self.items, self.labels = self.get_dataset(data, all_orderIds)
def __len__(self):
return len(self.users)
def __getitem__(self, idx):
return self.users[idx], self.items[idx], self.labels[idx]
def get_dataset(self, data, all_orderIds):
users, items, labels = [], [], []
user_item_set = set(zip(train_ratings['CustomerID'], train_ratings['ItemCode']))
num_negatives = 7
for u, i in user_item_set:
users.append(u)
items.append(i)
labels.append(1)
for _ in range(num_negatives):
negative_item = np.random.choice(all_itemIds)
while (u, negative_item) in user_item_set:
negative_item = np.random.choice(all_itemIds)
users.append(u)
items.append(negative_item)
labels.append(0)
return torch.tensor(users), torch.tensor(items), torch.tensor(labels)
followed by the PL class:
class NCF(pl.LightningModule):
'''
Neural Collaborative Filtering (NCF)
Args:
num_users (int): Number of unique users
num_items (int): Number of unique items
data (pd.DataFrame): Dataframe containing the food ratings for training
all_orderIds (list): List containing all orderIds (train + test)
'''
def __init__(self, num_users, num_items, data, all_itemIds):
# def __init__(self, num_users, num_items, ratings, all_movieIds):
super().__init__()
self.user_embedding = nn.Embedding(num_embeddings = num_users, embedding_dim = 8)
# self.user_embedding = nn.Embedding(num_embeddings = num_users, embedding_dim = 10)
self.item_embedding = nn.Embedding(num_embeddings = num_items, embedding_dim = 8)
# self.item_embedding = nn.Embedding(num_embeddings = num_items, embedding_dim = 10)
self.fc1 = nn.Linear(in_features = 16, out_features = 64)
# self.fc1 = nn.Linear(in_features = 20, out_features = 64)
self.fc2 = nn.Linear(in_features = 64, out_features = 64)
self.fc3 = nn.Linear(in_features = 64, out_features = 32)
self.output = nn.Linear(in_features = 32, out_features = 1)
self.data = data
# self.ratings = ratings
# self.all_movieIds = all_movieIds
self.all_orderIds = all_orderIds
def forward(self, user_input, item_input):
# Pass through embedding layers
user_embedded = self.user_embedding(user_input)
item_embedded = self.item_embedding(item_input)
# Concat the two embedding layers
vector = torch.cat([user_embedded, item_embedded], dim = -1)
# Pass through dense layer
vector = nn.ReLU()(self.fc1(vector))
vector = nn.ReLU()(self.fc2(vector))
vector = nn.ReLU()(self.fc3(vector))
# Output layer
pred = nn.Sigmoid()(self.output(vector))
return pred
def training_step(self, batch, batch_idx):
user_input, item_input, labels = batch
predicted_labels = self(user_input, item_input)
loss = nn.BCELoss()(predicted_labels, labels.view(-1, 1).float())
return loss
def configure_optimizers(self):
return torch.optim.Adam(self.parameters())
def train_dataloader(self):
return DataLoader(
ChupsTrainDataset(
self.data, self.all_orderIds
),
batch_size = 32, num_workers = 2
# Google Colab's suggested max number of worker in current
# system is 2 and not 4.
)
print(f"num_users = {num_users}, num_items = {num_items} & all_itemIds = {len(all_itemIds)}")
# num_users = 12958, num_items = 511238 & all_itemIds = 9114
# Initialize NCF model-
model = NCF(num_users, num_items, train_ratings, all_itemIds)
trainer = pl.Trainer(
max_epochs = 75, gpus = 1,
# max_epochs = 5,
reload_dataloaders_every_n_epochs = True,
# reload_dataloaders_every_epoch = True, # deprecated!
progress_bar_refresh_rate = 50,
logger = False, checkpoint_callback = False)
trainer.fit(model)
# Save trained model as a checkpoint-
trainer.save_checkpoint("NCF_Trained.ckpt")
To load the saved checkpoint, I have tried:
trained_model = NCF.load_from_checkpoint(
"NCF_Trained.ckpt", num_users = num_users,
num_items = train_ratings, data = train_ratings,
all_itemIds = all_itemIds)
trained_model = NCF(num_users, num_items, train_ratings, all_orderIds).load_from_checkpoint(checkpoint_path = "NCF_Trained.ckpt")
But these don't seem to work. How do I load this saved checkpoint?
Thanks!
add a line in your init method:
self.save_hyperparameters(logger=False)
Then call
trained_model = NCF.load_from_checkpoint("NCF_Trained.ckpt")
As shown in here, load_from_checkpoint is a primary way to load weights in pytorch-lightning and it automatically load hyperparameter used in training. So you do not need to pass params except for overwriting existing ones. My suggestion is to try trained_model = NCF.load_from_checkpoint("NCF_Trained.ckpt")
In my case it was crucial to set the model into the evaluation mode via model.eval(). Otherwise it would produce wrong results.
import os
import tarfile
from six.moves import urllib
URL = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz'
PATH = 'aclImdb'
def fetch_data(url = URL, path = PATH):
if not os.path.isdir(path):
os.makedirs(path)
file_path = os.path.join(oath, "aclImdb_v1.tar.gz")
urllib.request.urlretrieve(url, file_path)
file_gz = tarfile.open(file_path)
file_gz.extractall(path = path)
file_gz.close()
import pyprind # for progress visualisation
import pandas as pd
PATH = 'aclImdb'
labels = {'pos': 1, 'neg': 0} # int class labels for 'positive' and 'negative'
pbar = pyprind.ProgBar(50000) # initialise a progress bar with 50k iterations = no. of docs
df = pd.DataFrame()
# use nested for loops to iterate over 'train' & 'test' subdir
for s in ('test', 'train'):
for l in ('pos', 'neg'): # and read text files from 'pos' and 'neg' subdir
path = os.path.join(PATH, s, l)
for file in os.listdir(path):
# append to the df pandas DataFrame with an int class (post = 1, neg = 0)
with open(os.path.join(path, file), 'r', encoding = 'utf-8') as infile:
txt = infile.read()
df = df.append([[txt, labels[l]]], ignore_index = True)
pbar.update()
df.columns = ['review', 'sentiment']
import numpy as np
np. random.seed(0)
df = df.reindex(np.random.permutation(df.index))
df.to_csv('movie_data.csv', index = False, encoding = 'utf-8')
n_words = max(list(word_to_int.values())) + 1
df = pd.read_csv('movie_data.csv', encoding = 'utf-8')
df.head(3)
# Separate words and count each word's occurence
import pyprind # for progress visualisation
from collections import Counter
from string import punctuation
import re
counts = Counter() # collects the counts of occurence of each unique word
pbar = pyprind.ProgBar(len(df['review']),
title = 'Counting word occurences...') # progress bar
for i, review in enumerate(df['review']):
text = ''.join([c if c not in punctuation else ' '+c+' '
for c in review]).lower()
df.loc[i, 'review'] = text
pbar.update()
counts.update(text.split())
# Mapping each unique word to an int
word_counts = sorted(counts, key = counts.get, reverse = True)
print(word_counts[:5])
word_to_int = {word: ii for ii, word in enumerate(word_counts, 1)}
mapped_reviews = []
pbar = pyprind.ProgBar(len(df['review']),
title = 'Map movie reviews to integers...')
# Left-pad with zeros if the sequence length < 200
# Use 200 elements if the length > 200
sequence_length = 200
sequences = np.zeros((len(mapped_reviews), sequence_length), dtype = int)
for i, row in enumerate(mapped_reviews):
review_arr = np.array(row)
sequences[i, -len(row):] = review_arr[-sequence_length:]
# Split the dataset into training and test sets
X_train = sequences[:25000, :]
y_train = df.loc[:25000, 'sentiment'].values
X_test = sequences[25000:, :]
y_test = df.loc[25000:, 'sentiment'].values
# Define the mini-batches generator
np.random.seed(123)
def batch_gen(x, y = None, batch_size = 64):
n_batches = len(x) // batch_size
x = x[:n_batches * batch_size]
if y is not None:
y = y[:n_batches * batch_size]
for ii in range(0, len(x), batch_size):
if y is not None:
yield x[ii : ii + batch_size], y[ii : ii + batch_size]
else:
yield x[ii : ii + batch_size]
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' ## suppress the 3.5 warning if using TF 1.4
class SentimentRNN(object):
# Define __init__
def __init__(self,
n_words,
seq_len = 200,
lstm_size = 256,
num_layers = 1,
batch_size = 64,
learning_rate = 0.0001,
embed_size = 200):
self.n_words = n_words
self.seq_len = seq_len
self.lstm_size = lstm_size # no. of hidden units
self.num_layers = num_layers
self.batch_size = batch_size
self.learning_rate = learning_rate
self.embed_size = embed_size
self.g = tf.Graph()
with self.g.as_default():
tf.set_random_seed(123)
self.build()
self.saver = tf.train.Saver()
self.init_op = tf.global_variables_initializer()
# Define the build method
def build(self):
# Define the placeholders
tf_x = tf.placeholder(tf.int32,
shape = (self.batch_size, self.seq_len),
name = 'tf_x')
tf_y = tf.placeholder(tf.float32,
shape = (self.batch_size),
name = 'tf_y')
tf_keepprob = tf.placeholder(tf.float32,
name = 'tf_keepprob')
# Create the embedding layer
embedding = tf.Variable(
tf.random_uniform(
shape = (self.n_words, self.embed_size),
minval = -1,
maxval = 1),
name = 'embedding')
embed_x = tf.nn.embedding_lookup(embedding,
tf_x,
name = 'embed_x')
# Define LSTM cells and stack them
cells = tf.contrib.rnn.MultiRNNCell(
[tf.contrib.rnn.DropoutWrapper(
tf.contrib.rnn.BasicLSTMCell(num_units = self.lstm_size),
output_keep_prob = tf_keepprob)
for i in range(self.num_layers)])
# Define the initial state:
self.initial_state = cells.zero_state(
self.batch_size, tf.float32)
print(' << initial state >> ', self.initial_state)
# Put together components with tf.nn.dynamic_rnn
lstm_outputs, self.final_state = tf.nn.dynamic_rnn(
cell = cells,
inputs = embed_x,
initial_state = self.initial_state)
## lstm_outputs shape: [batch_size, max_time, cells.output_size]
print('\n << lstm_output >> ', lstm_outputs)
print('\n << final state >> ', self.final_state)
# Apply a full-connected layer on the RNN output
logits = tf.layers.dense(
inputs = lstm_outputs[:, -1],
units = 1, # dimensionality of the output space
activation = None,
name = 'logits')
# Remove dimensions of size 1 from the tensor shape
logits = tf.squeeze(input = logits,
name = 'logits_squeezed')
print ('\n << logits >> ', logits)
# If you want prob's
y_proba = tf.nn.sigmoid(logits, name = 'probabilities')
predictions = {'probabilities' : y_proba,
'labels' : tf.cast(tf.round(y_proba),
tf.int32,
name = 'labels')}
print('\n << predictions >> ', predictions)
# Define the cost function
cost = tf.reduce_mean(
tf.nn.sigmoid_cross_entropy_with_logits(
labels = tf_y,
logits = logits),
name = 'cost')
# Define the optimiser
optimizer = tf.train.AdamOptimizer(self.learning_rate)
train_op = optimizer.minimize(cost, name = 'train_op')
# Define the train method
def train(self, X_train, y_train, num_epochs):
with tf.Session(graph = self.g) as sess:
sess.run(self.init_op)
iteration = 1
for epoch in range(num_epochs):
state = sess.run(self.initial_state)
for batch_x, batch_y in batch_gen(
X_train,
y_train,
batch_size = self.batch_size):
feed = {'tf_x:0' : batch_x,
'tf_y:0' : batch_y,
'tf_keepprob:0' : 0.5,
self.initial_state : state}
loss, _, state = sess.run(
['cost:0',
'train_op',
self.final_state],
feed_dict=feed)
if iteration % 20 == 0:
print("Epoch: %d/%d Iteration: %d "
"| Train loss: %.5f" % (
epoch + 1,
num_epochs,
iteration,
loss))
iteration += 1
if (epoch + 1) % 10 == 0:
self.saver.save(
sess,
"model/sentiment-%d.ckpt" % epoch)
# Define the predict method
def predict(self, X_data, return_proba=False):
preds = []
with tf.Session(graph = self.g) as sess:
self.saver.restore(
sess,
tf.train.latest_checkpoint('model/'))
test_state = sess.run(self.initial_state)
for ii, batch_x in enumerate(batch_gen(
x = X_data,
y = None,
batch_size = self.batch_size), 1):
feed = {'tf_x:0' : batch_x,
'tf_keepprob:0' : 1.0,
self.initial_state : test_state}
if return_proba:
pred, test_state = sess.run(
['probabilities:0', self.final_state],
feed_dict=feed)
else:
pred, test_state = sess.run(
['labels:0', self.final_state],
feed_dict=feed)
preds.append(pred)
return np.concatenate(preds)
for review in df['review']:
mapped_reviews.append([word_to_int[word] for word in review.split()])
pbar.update()
rnn = SentimentRNN(n_words = n_words,
seq_len = sequence_length,
embed_size = 256,
lstm_size = 128,
num_layers = 1,
batch_size = 100,
learning_rate = 0.001)
preds = rnn.predict(X_test)
y_true = y_test\[:len(preds)\]
print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))][1]
Create an object of the SentimentRNN class with the following parameters:
n_words = n_words, seq_len = sequence_length, embed_size = 256, lstm_size = 128, num_layers = 1, batch_size = 100, learning_rate = 0.001.
Since we have a relatively small dataset, the number of layers = 1 may generalise better
enter image description here
ValueError Traceback (most recent call last)
<ipython-input-23-a3cfe03a9a49> in <module>()
----> 1 preds = rnn.predict(X_test)
2 y_true = y_test[:len(preds)]
3 print('Test accuracy... %.3f' % (np.sum(preds == y_true) / len(y_true)))
<ipython-input-12-d83ee67c43b6> in predict(self, X_data, return_proba)
173 self.saver.restore(
174 sess,
--> 175 tf.train.latest_checkpoint('model/'))
176 test_state = sess.run(self.initial_state)
177
/usr/local/anaconda/lib/python3.6/site-packages/tensorflow/python/training/saver.py in restore(self, sess, save_path)
1680 return
1681 if save_path is None:
-> 1682 raise ValueError("Can't load save_path when it is None.")
1683 logging.info("Restoring parameters from %s", save_path)
1684 if context.in_graph_mode():
ValueError: Can't load save_path when it is None.
The error just means tf.train.latest_checkpoint didn't find anything. It returns None, then the Saver complains because it was passed None. So there's no checkpoint in that directory.
I copy a simple tensorflow test from the book written by Sam Abrahams. In chapter 4,when I test the softmax.py about the iris.data,the program has no errors but does not have any results. I debug the program for several days but don't know how to debug it.The code is as follows. This problem puzzles me almost one week and thanks to anyone answers this question. Thank you very much!
import tensorflow as tf
import os
W = tf.Variable(tf.zeros([4, 3]), name="weights")
b = tf.Variable(tf.zeros([3]), name="bias")
def combine_inputs(X):
return tf.matmul(X, W) + b
def inference(X):
return tf.nn.softmax(combine_inputs(X))
def loss(X, Y):
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=combine_inputs(X), labels=Y))
def read_csv(batch_size, file_name, record_defaults):
filename_queue = tf.train.string_input_producer([os.path.dirname(os.path.abspath(__file__)) + "/" + file_name])
reader = tf.TextLineReader()
key, value = reader.read(filename_queue)
decoded = tf.decode_csv(value, record_defaults=record_defaults)
return tf.train.shuffle_batch(decoded,
batch_size=batch_size,
capacity=batch_size * 50,
min_after_dequeue=batch_size)
def inputs():
sepal_length, sepal_width, petal_length, petal_width, label =\
read_csv(100, "iris.data", [[0.0], [0.0], [0.0], [0.0], [""]])
label_number = tf.to_int32(tf.argmax(tf.to_int32(tf.stack([
tf.equal(label, ["Iris-setosa"]),
tf.equal(label, ["Iris-versicolor"]),
tf.equal(label, ["Iris-virginica"])
])), 0))
features = tf.transpose(tf.stack([sepal_length, sepal_width, petal_length, petal_width]))
return features, label_number
def train(total_loss):
learning_rate = 0.01
return tf.train.GradientDescentOptimizer(learning_rate).minimize(total_loss)
def evaluate(sess, X, Y):
predicted = tf.cast(tf.argmax(inference(X), 1), tf.int32)
print (sess.run(tf.reduce_mean(tf.cast(tf.equal(predicted, Y), tf.float32))))
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
with tf.Session(config=config) as sess:
tf.global_variables_initializer().run()
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
X, Y = inputs()
total_loss = loss(X, Y)
train_op = train(total_loss)
training_steps = 1000
for step in range(training_steps):
sess.run([train_op])
evaluate(sess, X, Y)
coord.request_stop()
coord.join(threads)
sess.close()