I make MNIST dataset study by MLP.
However, there is an error of missing 1 required positional argument: 'test_y'
I can't handle this problem.
Please tell me how to deal with this error message.
This is a code of what I do.
def homework(train_X, train_y, test_X, test_y):
epoch = 10000
batch_size = 20
learning_rate = 1e-3
input_size = 784
hidden_size = 100
output_size = 10
data_num = train_X.shape[0]
np.random.seed(0)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
def softmax(x):
x -= np.max(x, axis=1).reshape((-1, 1))
return np.exp(x) / np.sum(np.exp(x), axis=1).reshape((-1, 1))
def cross_entropy(y, output):
batch_size = y.shape[0]
return -np.sum(np.log(output[np.arange(batch_size), y])) / batch_size
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def forward(x):
fwd = {}
fwd['h1'] = sigmoid(np.dot(x, W1) + b1)
fwd['prob'] = softmax(np.dot(fwd['h1'], W2) + b2)
return fwd
for n in range(epoch):
loss_sum = 0
for i in range(0, data_num, batch_size):
x = train_X[i:i+batch_size]
y = train_y[i:i+batch_size]
fwd = forward(x)
loss_sum += cross_entropy(y, fwd['prob'])
grad = network.gradient(x, y)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x, y)
train_loss_list.append(loss)
if np.mod(n, 1000) == 0:
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
accuracy = f1_score(test_y, pred_y, average='macro')
print("epoch: %5d, loss_sum: %.5f, accuracy: %.5f" % (n, loss_sum, accuracy))
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
return pred_y
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np
def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
mnist.target.astype('int32'), random_state=42)
mnist_X = mnist_X / 255.0
return train_test_split(mnist_X, mnist_y,
test_size=0.2,
random_state=42)
def validate_homework():
train_X, test_X, train_y, test_y = load_mnist()
# validate for small dataset
train_X_mini = train_X[:100]
train_y_mini = train_y[:100]
test_X_mini = test_X[:100]
test_y_mini = test_y[:100]
pred_y = homework(train_X_mini, train_y_mini, test_X_mini)
print(f1_score(test_y_mini, pred_y, average='macro'))
def score_homework():
train_X, test_X, train_y, test_y = load_mnist()
pred_y = homework(train_X, train_y, test_X)
print(f1_score(test_y, pred_y, average='macro'))
validate_homework()
# score_homework()
Related
I want to train a Variational Auto-Encoder for a simple task: learn 3-class dataset and generate new samples from it's latent space. The problem is that despite it seems like an easy task I can't make it work. I expect it to produce the same 3 clusters when generating new samples from decoder (2 image).
I played with different values for beta. It's doesn't make it better.
How I can fix it and get the desired output or there some restrictions? Thanks in advance.
It's the code below, you may easy copypast the 3 code pieces into 3 cells in notebook and run.
Imports:
import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import minimize
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.datasets import make_classification
Here's dataset:
class TrainDataset(Dataset):
def __init__(self, X):
self.data = X.astype('float32')
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
return self.data[idx], 0
X, y = make_classification(n_samples=100, n_features=3, n_informative=3, n_redundant=0, n_classes=3, n_clusters_per_class=1, class_sep=2, random_state=16)
train_ratio = 0.7
batch_size = 8
train_loader = torch.utils.data.DataLoader(TrainDataset(X[:int(train_ratio * len(X))]), batch_size=batch_size)
test_loader = torch.utils.data.DataLoader(TrainDataset(X[int(train_ratio * len(X)):]), batch_size=batch_size)
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(projection='3d')
ax.scatter(X[:, 0], X[:, 1], X[:, 2], c=y)
plt.show()
Now the VAE:
class VAEmini(nn.Module):
def __init__(self, latent_d=2):
super().__init__()
self.fc1 = nn.Linear(3, 3)
self.fc21 = nn.Linear(3, latent_d)
self.fc22 = nn.Linear(3, latent_d)
self.fc3 = nn.Linear(latent_d, 3)
self.fc4 = nn.Linear(3, 3)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5*logvar)
eps = torch.randn_like(std)
return mu + eps*std
def decode(self, z):
h3 = F.relu(self.fc3(z))
return self.fc4(h3)
def forward(self, x):
mu, logvar = self.encode(x)
z = self.reparameterize(mu, logvar)
return self.decode(z), mu, logvar
def loss_function(recon_x, x, mu, logvar, beta):
BCE = F.mse_loss(recon_x, x, reduction='mean')
KLD = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + beta * KLD
def train(epoch, beta):
model_mini.train()
train_loss = 0
for batch_idx, (data, _) in enumerate(train_loader):
data = data.to(device)
optimizer.zero_grad()
recon_batch, mu, logvar = model_mini(data)
loss = loss_function(recon_batch, data, mu, logvar, beta)
loss.backward()
train_loss += loss.item()
optimizer.step()
if batch_idx % log_interval == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader),
loss.item() / len(data)))
print('====> Epoch: {} Average loss: {:.4f}'.format(
epoch, train_loss / len(train_loader.dataset)))
def test(epoch, beta):
model_mini.eval()
test_loss = 0
with torch.no_grad():
for i, (data, _) in enumerate(test_loader):
data = data.to(device)
recon_batch, mu, logvar = model_mini(data)
test_loss += loss_function(recon_batch, data, mu, logvar, beta).item()
print('====> Test set loss: {:.4f}'.format(test_loss / len(test_loader.dataset)))
return test_loss / len(test_loader.dataset)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_mini = VAEmini(latent_d=2).to(device)
log_interval = 5
epochs = 100
optimizer = optim.Adam(model_mini.parameters(), lr=1e-2)
beta=0.01
for epoch in range(1, epochs + 1):
train(epoch, beta=beta)
test(epoch, beta=beta)
with torch.no_grad():
sample = torch.randn(1000, 2).to(device)
sample = model_mini.decode(sample).cpu()
fig = plt.figure(figsize=(20, 20))
ax = fig.add_subplot(projection='3d')
ax.scatter(sample[:, 0], sample[:, 1], sample[:, 2])
plt.show()
This is a learning-based rna and disease prediction code using cnn that I downloaded from github. The output is accuracy and auc values, but the result is very unstable (occasionally 0.3, occasionally 0.8).
I don't know what the reason is, but the division of training set and verification set in this article is a self-defined function, so I want to try 10 cross-verification. However, when I write the cross-validation code, the problem as shown in the title appears.
This is the code that divides the training set and the verification set in the source code.
def get_data(args):
input_data, input_label = dh.get_samples(args)
input_data = standard_scale(input_data)
dev_sample_percentage = args.dev_percentage
test_sample_percentage = args.test_percentage
x = np.array(input_data)
Randomly shuffle data
np.random.seed(10)
shuffle_indices = np.random.permutation(np.arange(len(input_label)))
input_data = [x[i] for i in shuffle_indices]
input_label = [input_label[i] for i in shuffle_indices]
dev_sample_index = -2 * int(dev_sample_percentage * float(len(input_label)))
test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
x_train, x_dev, test_data = input_data[:dev_sample_index], input_data[dev_sample_index:test_sample_index], input_data[test_sample_index:]
y_train, y_dev, test_label = input_label[:dev_sample_index], input_label[dev_sample_index:test_sample_index], input_label[test_sample_index:]
return x_train, x_dev, test_data, y_train, y_dev, test_label
This is my modified code.
def get_data(args):
input_data, input_label = dh.get_samples(args)
input_data = standard_scale(input_data)
dev_sample_percentage = args.dev_percentage
test_sample_percentage = args.test_percentage
x = np.array(input_data)
y = np.array(input_label)
kf = KFold(n_splits=10)
d = kf.split(x)
for train_idx, test_idx in d:
x_train = x[train_idx]
x_dev = x[test_idx]
l=kf.split(y)
for train_idx ,test_idx in l:
y_train=y[train_idx]
y_dev=y[test_idx]
test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
test_data=input_data[test_sample_index:]
test_lable=input_label[test_sample_index:]
return x_train,x_dev,y_train, y_dev,test_data,test_lable
This is a screenshot of the error.
This is the complete code of this part.
#! /usr/bin/env python
import tensorflow as tf
import numpy as np
import os
import argparse
import data_helpers as dh
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import precision_recall_curve
from sklearn import metrics
from tensorflow.contrib import learn
import matplotlib.pyplot as plt
import sklearn.preprocessing as prep
from sklearn.metrics import average_precision_score
from sklearn.model_selection import KFold
def parse_args():
parser = argparse.ArgumentParser(description="Run CNN.")
## the input file
##disease-gene relationships and miRNA-gene relatiohships
parser.add_argument('--input_disease_miRNA', nargs='?', default='..\..\data\CNN\disease-miro-1024-sigmoid.csv',
help='Input disease_gene_relationship file')
parser.add_argument('--input_label',nargs = '?',default='..\..\data\CNN\label.csv',
help='sample label')
parser.add_argument('--batch_size', nargs='?', default=64,
help = 'number of samples in one batch')
parser.add_argument('--training_epochs', nargs='?', default=1,
help= 'number of epochs in SGD')
parser.add_argument('--display_step', nargs='?', default=10)
parser.add_argument('--test_percentage', nargs='?', default=0.1,
help='percentage of test samples')
parser.add_argument('--dev_percentage', nargs='?', default=0.1,
help='percentage of validation samples')
parser.add_argument('--L2_norm', nargs='?', default=0.001,
help='percentage of validation samples')
parser.add_argument('--keep_prob', nargs='?', default=0.5,
help='keep_prob when using dropout option')
parser.add_argument('--optimizer', nargs='?', default=tf.train.AdamOptimizer,
help='optimizer for learning weights')
parser.add_argument('--learning_rate', nargs='?', default=1e-3,
help='learning rate for the SGD')
return parser.parse_args()
def standard_scale(X_train):
preprocessor = prep.StandardScaler().fit(X_train)
X_train = preprocessor.transform(X_train)
return X_train
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev= 0.1)
weights = tf.Variable(initial)
return weights
def bias_variable(shape):
initial = tf.constant(0.1, shape = shape)
return tf.Variable(initial)
def conv2d(x,W):
return tf.nn.conv2d(x,W,strides=[1,1,1,1], padding= "VALID")
def max_pool_2(x, W):
return tf.nn.max_pool(x, ksize = W, strides= [1,10,1,1], padding= "VALID")
def get_data(args):
input_data, input_label = dh.get_samples(args)
input_data = standard_scale(input_data)
dev_sample_percentage = args.dev_percentage
test_sample_percentage = args.test_percentage
x = np.array(input_data)
y = np.array(input_label)
kf = KFold(n_splits=10)
d = kf.split(x)
for train_idx, test_idx in d:
x_train = x[train_idx]
x_dev = x[test_idx]
l=kf.split(y)
for train_idx ,test_idx in l:
y_train=y[train_idx]
y_dev=y[test_idx]
test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
test_data=input_data[test_sample_index:]
test_lable=input_label[test_sample_index:]
return x_train,x_dev,y_train, y_dev,test_data,test_lable
# # Randomly shuffle data
# np.random.seed(10)
# shuffle_indices = np.random.permutation(np.arange(len(input_label)))
# input_data = [x[i] for i in shuffle_indices]
# input_label = [input_label[i] for i in shuffle_indices]
# dev_sample_index = -2 * int(dev_sample_percentage * float(len(input_label)))
# test_sample_index = -1 * int(test_sample_percentage * float(len(input_label)))
# x_train, x_dev, test_data = input_data[:dev_sample_index], input_data[dev_sample_index:test_sample_index], input_data[test_sample_index:]
# y_train, y_dev, test_label = input_label[:dev_sample_index], input_label[dev_sample_index:test_sample_index], input_label[test_sample_index:]
#
# return x_train, x_dev, test_data, y_train, y_dev, test_label
def deepnn(x, keep_prob, args):
with tf.name_scope('reshape'):
x = tf.reshape(x, [-1, 1024, 1, 1])
with tf.name_scope('conv_pool'):
filter_shape = [4, 1, 1, 4]
W_conv = weight_variable(filter_shape)
b_conv = bias_variable([4])
h_conv = tf.nn.relu(conv2d(x, W_conv) + b_conv)
h_pool = tf.nn.max_pool(h_conv, ksize = [1, 4, 1, 1], strides= [1,4,1,1], padding= "VALID")
# filter_shape2 = [4,1,4,4]
# W_conv2 = weight_variable(filter_shape2)
# b_conv2 = bias_variable([4])
# h_conv2 = tf.nn.relu(conv2d(h_pool, W_conv2) + b_conv2)
# h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1,4,1,1], strides= [1,4,1,1],padding="VALID")
regula = tf.contrib.layers.l2_regularizer(args.L2_norm)
h_input1 = tf.reshape(h_pool,[-1, 255 * 4])
W_fc1 = weight_variable([255* 4, 50])
b_fc1 = bias_variable([50])
h_input2 = tf.nn.relu(tf.matmul(h_input1, W_fc1) + b_fc1)
h_keep = tf.nn.dropout(h_input2, keep_prob)
W_fc2 = weight_variable([50, 2])
b_fc2 = bias_variable([2])
h_output = tf.matmul(h_keep, W_fc2) + b_fc2
regularizer = regula(W_fc1) + regula(W_fc2)
return h_output, regularizer
def main(args):
with tf.device('/cpu:0'):
x_train, x_dev, test_data, y_train, y_dev, test_label = get_data(args)
input_data = tf.placeholder(tf.float32, [None, 1024])
input_label = tf.placeholder(tf.float32, [None, 2])
keep_prob = tf.placeholder(tf.float32)
y_conv, losses = deepnn(input_data, keep_prob, args)
y_res = tf.nn.softmax(y_conv)
with tf.name_scope('loss'):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y_conv, labels=input_label)
cross_entropy = tf.reduce_mean(cross_entropy)
los = cross_entropy + losses
with tf.name_scope('optimizer'):
optimizer = args.optimizer
learning_rate = args.learning_rate
train_step = optimizer(learning_rate).minimize(los)
# optimizer = tf.train.MomentumOptimizer(learning_rate= 0.02, momentum=)
# train_step = optimizer.minimize(cross_entropy)
with tf.name_scope('accuracy'):
predictions = tf.argmax(y_conv, 1)
correct_predictions = tf.equal(predictions, tf.argmax(input_label, 1))
correct_predictions = tf.cast(correct_predictions, tf.float32)
accuracy = tf.reduce_mean(correct_predictions)
batch_size = args.batch_size
num_epochs = args.training_epochs
display_step = args.display_step
k_p = args.keep_prob
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
batches = dh.batch_iter(list(zip(x_train, y_train)), batch_size, num_epochs)
for i, batch in enumerate(batches):
x_batch, y_batch = zip(*batch)# 按batch把数据拿进来
train_step.run(feed_dict={input_data: x_batch, input_label: y_batch, keep_prob: k_p})
if i % display_step == 0:
loss = sess.run(los, feed_dict={input_data: x_train, input_label: y_train, keep_prob: 1.0})
#print('after training loss = %f' % loss)
y_predict = sess.run(y_res, feed_dict={input_data: x_dev, input_label: y_dev, keep_prob: 1.0})[:, 1]
loss = sess.run(los, feed_dict={input_data: x_dev, input_label: y_dev, keep_prob: 1.0})
#print('test loss = %f' % loss)
false_positive_rate1, true_positive_rate1, thresholds1 = roc_curve(np.array(y_dev)[:, 1], y_predict)
roc_auc1 = auc(false_positive_rate1, true_positive_rate1)
# print(roc_auc1)
###print(accuracy.eval(feed_dict={input_data: x_train, input_label:y_train, keep_prob: 1.0}))
print('accuracy=',accuracy.eval(feed_dict={input_data: test_data, input_label: test_label, keep_prob: 1.0}))
y_predict = sess.run(y_res, feed_dict={input_data: test_data, input_label: test_label, keep_prob: 1.0})[:, 1]
false_positive_rate1, true_positive_rate1, thresholds1 = roc_curve(np.array(test_label)[:, 1], y_predict)
roc_auc1 = auc(false_positive_rate1, true_positive_rate1)
print('roc_auc1=',roc_auc1)
# plt.figure()
# lw=2
# plt.title("ROC curve of %s (AUC = %.4f)")
# plt.xlabel("False Positive Rate")
# plt.ylabel("True Positive Rate")
# plt.plot(false_positive_rate1 , true_positive_rate1) # use pylab to plot x and y
# plt.show() # show the plot on the screen
#
# plt.show()
# np.savetxt("result_fp_tp_md_aver.txt", roc_curve(np.array(test_label)[:, 1], y_predict))
# precision, recall ,_ = precision_recall_curve(np.array(test_label)[:, 1], y_predict)
# #
# average_precision = average_precision_score(np.array(test_label)[:, 1], y_predict)
# #
# print('Average precision-recall score: {0:0.2f}'.format(average_precision))
# y_predict[y_predict >= 0.5] = 1
# y_predict[y_predict < 0.5] = 0
# print(y_predict)
# print(metrics.f1_score(np.array(test_label)[:, 1], y_predict))
# np.savetxt("precision_aver.txt", precision)
# np.savetxt("recall_aver.txt", recall)
if __name__ == '__main__':
args = parse_args()
main(args)
please help me!!! thanks a lot!!!
I followed Aladdin Persson's Youtube video to code up just the encoder portion of the transformer model in PyTorch, except I just used the Pytorch's multi-head attention layer. The model seems to produce the correct shape of data. However, during training, the training loss does not drop and the resulting model always predicts the same output of 0.4761. Dataset used for training is from the Sarcasm Detection Dataset from Kaggle. Would appreciate any help you guys can give on errors that I have made.
import pandas as pd
from transformers import BertTokenizer
import torch.nn as nn
import torch
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math
df = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
encoded_input = tokenizer(df['headline'].tolist(), return_tensors='pt',padding=True)
X = encoded_input['input_ids']
y = torch.tensor(df['is_sarcastic'].values).float()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
class TransformerBlock(nn.Module):
def __init__(self,embed_dim, num_heads, dropout, expansion_ratio):
super(TransformerBlock, self).__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads)
self.norm1 = nn.LayerNorm(embed_dim)
self.norm2 = nn.LayerNorm(embed_dim)
self.feed_forward = nn.Sequential(
nn.Linear(embed_dim, expansion_ratio*embed_dim),
nn.ReLU(),
nn.Linear(expansion_ratio*embed_dim,embed_dim)
)
self.dropout = nn.Dropout(dropout)
def forward(self, value, key, query):
attention, _ = self.attention(value, key, query)
x=self.dropout(self.norm1(attention+query))
forward = self.feed_forward(x)
out=self.dropout(self.norm2(forward+x))
return out
class Encoder(nn.Module):
#the vocab size is one more than the max value in the X matrix.
def __init__(self,vocab_size=30109,embed_dim=128,num_layers=1,num_heads=4,device="cpu",expansion_ratio=4,dropout=0.1,max_length=193):
super(Encoder,self).__init__()
self.device = device
self.word_embedding = nn.Embedding(vocab_size,embed_dim)
self.position_embedding = nn.Embedding(max_length,embed_dim)
self.layers = nn.ModuleList(
[
TransformerBlock(embed_dim,num_heads,dropout,expansion_ratio) for _ in range(num_layers)
]
)
self.dropout = nn.Dropout(dropout)
self.classifier1 = nn.Linear(embed_dim,embed_dim)
self.classifier2 = nn.Linear(embed_dim,1)
self.relu = nn.ReLU()
def forward(self,x):
N, seq_length = x.shape
positions = torch.arange(0,seq_length).expand(N, seq_length).to(self.device)
out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))
for layer in self.layers:
#print(out.shape)
out = layer(out,out,out)
#Get the first output for classification
#Pooled output from hugging face is: Last layer hidden-state of the first token of the sequence (classification token) further processed by a Linear layer and a Tanh activation function.
#Pooled output from hugging face will be different from out[:,0,:], which is the output from the CLS token.
out = self.relu(self.classifier1(out[:,0,:]))
out = self.classifier2(out)
return out
torch.cuda.empty_cache()
net = Encoder(device=device)
net.to(device)
batch_size = 32
num_train_samples = X_train.shape[0]
num_val_samples = X_test.shape[0]
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=1e-5)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5)
val_loss_hist=[]
loss_hist=[]
epoch = 0
min_val_loss = math.inf
print("Training Started")
patience = 0
for _ in range(100):
epoch += 1
net.train()
epoch_loss = 0
permutation = torch.randperm(X_train.size()[0])
for i in range(0,X_train.size()[0], batch_size):
indices = permutation[i:i+batch_size]
features=X_train[indices].to(device)
labels=y_train[indices].reshape(-1,1).to(device)
output = net.forward(features)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss+=loss.item()
epoch_loss = epoch_loss / num_train_samples * num_val_samples
loss_hist.append(epoch_loss)
#print("Eval")
net.eval()
epoch_val_loss = 0
permutation = torch.randperm(X_test.size()[0])
for i in range(0,X_test.size()[0], batch_size):
indices = permutation[i:i+batch_size]
features=X_test[indices].to(device)
labels = y_test[indices].reshape(-1,1).to(device)
output = net.forward(features)
loss = criterion(output, labels)
epoch_val_loss+=loss.item()
val_loss_hist.append(epoch_val_loss)
scheduler.step(epoch_val_loss)
#if epoch % 5 == 0:
print("Epoch: " + str(epoch) + " Train Loss: " + format(epoch_loss, ".4f") + ". Val Loss: " + format(epoch_val_loss, ".4f") + " LR: " + str(optimizer.param_groups[0]['lr']))
if epoch_val_loss < min_val_loss:
min_val_loss = epoch_val_loss
torch.save(net.state_dict(), "torchmodel/weights_best.pth")
print('\033[93m'+"Model Saved"+'\033[0m')
patience = 0
else:
patience += 1
if (patience == 10):
break
print("Training Ended")
Amateur problem but i cannot solve this issue on my own.
I was trying to make a neural network for churn modelling dataset on bank data
Every time i run this network i get an accuracy of 1.0 so i think there is something wrong and its not working.
Can anyone help me figure out what is wrong?
Also please explain how i can avoid problems like these in the future
The code is :
import pandas as pd
import numpy as np
data = pd.read_csv('D:\Churn_Modelling.csv')
X = data.iloc[:, 3:13].values
Y = data.iloc[:, 13].values
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
label_encoder_x_1 = LabelEncoder()
X[:, 1] = label_encoder_x_1.fit_transform(X[:, 1])
label_encoder_x_2 = LabelEncoder()
X[:, 2] = label_encoder_x_2.fit_transform(X[:, 2])
one_hot_encoder = OneHotEncoder(categorical_features = [1])
X = one_hot_encoder.fit_transform(X).toarray()
X = X[:, 1:]
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test =
train_test_split(X, Y, test_size = 0.2)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)
import tensorflow as tf
epochs = 20
batch_size = 50
learning_rate = 0.003
n_output = 1
n_input = X_train.shape[1]
X_placeholder = tf.placeholder("float32", [None, n_input], name = "X")
Y_placeholder = tf.placeholder("float32", [None, 1], name = "y")
n_neurons_1 = 64
n_neurons_2 = 32
n_neurons_3 = 16
layer_1 = {'weights': tf.Variable
(tf.random_normal([n_input, n_neurons_1])),
'biases': tf.Variable(tf.random_normal([n_neurons_1]))
}
layer_2 = {'weights': tf.Variable
(tf.random_normal([n_neurons_1, n_neurons_2])),
'biases': tf.Variable(tf.random_normal([n_neurons_2]))
}
layer_3 = {'weights': tf.Variable
(tf.random_normal([n_neurons_2, n_neurons_3])),
'biases': tf.Variable(tf.random_normal([n_neurons_3]))
}
output_layer = {'weights': tf.Variable(
tf.random_normal([n_neurons_3, n_output])),
'biases': tf.Variable(tf.random_normal([n_output]))
}
l1 = tf.add(tf.matmul(X_placeholder,
layer_1['weights']), layer_1['biases'])
l1 = tf.nn.relu(l1)
l2 = tf.add(tf.matmul(l1, layer_2['weights']),
layer_2['biases'])
l2 = tf.nn.relu(l2)
l3 = tf.add(tf.matmul(l2, layer_3['weights']),
layer_3['biases'])
l3 = tf.nn.relu(l3)
output_layer = tf.matmul(l3,
output_layer['weights']) + output_layer['biases']
output_layer = tf.nn.sigmoid(output_layer)
cost = tf.reduce_mean(tf.reduce_sum(
tf.square(Y_placeholder - output_layer), reduction_indices = [1]))
optimizer = tf.train.AdamOptimizer().minimize(cost)
correct_prediction = tf.equal(tf.argmax(
Y_placeholder, 1), tf.argmax(output_layer, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def next_batch(size, x, y):
idx = np.arange(0, len(x))
np.random.shuffle(idx)
idx = idx[:size]
x_shuffle = [x[ i] for i in idx]
y_shuffle = [y[ i] for i in idx]
return np.asarray(x_shuffle), np.asarray(y_shuffle)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
total_batches = int(len(X_train) / batch_size)
for epoch in range(epochs):
avg_cost = 0
print('epoch: ', epoch)
for batch in range(total_batches):
x_batch_data, y_batch_data =
next_batch(batch_size, X_train, Y_train)
y_batch_data = y_batch_data.reshape((50, 1))
_, c = sess.run([optimizer, cost],
feed_dict = {X_placeholder: x_batch_data,
Y_placeholder: y_batch_data})
avg_cost += c / total_batches
print("Epoch:", (epoch + 1), "cost =", "{:.3f}".format(avg_cost))
Y_test_temp = Y_test.reshape((2000, 1))
print('accuracy: ', sess.run(accuracy,
feed_dict = {X_placeholder: X_test, Y_placeholder: Y_test_temp}))
I am using MNIST dataset to study MLP(multi layer perceptron) in python. While running the code given below I get the following error message:
UnboundLocalError: local variable 'x' referenced before assignment
Where should I insert "global x" or what should I do? Here is my code:
def homework(train_X, train_y, test_X, test_y):
epoch = 10000
batch_size = 20
learning_rate = 1e-3
input_size = 784
hidden_size = 100
output_size = 10
data_num = train_X.shape[0]
np.random.seed(0)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros(hidden_size)
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros(output_size)
for n in range(epoch):
loss_sum = 0
for i in range(0, data_num, batch_size):
x = train_X[i:i+batch_size]
y = train_y[i:i+batch_size]
fwd = forward(x)
loss_sum += cross_entropy(y, fwd['prob'])
grad = network.gradient(x, y)
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x, y)
train_loss_list.append(loss)
if np.mod(n, 1000) == 0:
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
accuracy = f1_score(test_y, pred_y, average='macro')
print("epoch: %5d, loss_sum: %.5f, accuracy: %.5f" % (n, loss_sum, accuracy))
pred_y = np.argmax(forward(test_X)['prob'], axis=1)
return pred_y
def softmax(x):
x -= np.max(x, axis=1).reshape((-1, 1))
return np.exp(x) / np.sum(np.exp(x), axis=1).reshape((-1, 1))
def cross_entropy(y, output):
batch_size = y.shape[0]
return -np.sum(np.log(output[np.arange(batch_size), y])) / batch_size
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def forward(x):
fwd = {}
fwd['h1'] = sigmoid(np.dot(x, W1) + b1)
fwd['prob'] = softmax(np.dot(fwd['h1'], W2) + b2)
return fwd
from sklearn.utils import shuffle
from sklearn.metrics import f1_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
import numpy as np
def load_mnist():
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data.astype('float32'),
mnist.target.astype('int32'), random_state=42)
mnist_X = mnist_X / 255.0
return train_test_split(mnist_X, mnist_y,
test_size=0.2,
random_state=42)
def validate_homework():
train_X, test_X, train_y, test_y = load_mnist()
train_X_mini = train_X[:100]
train_y_mini = train_y[:100]
test_X_mini = test_X[:100]
test_y_mini = test_y[:100]
pred_y = homework(train_X_mini, train_y_mini, test_X_mini, test_y_mini)
print(f1_score(test_y_mini, pred_y, average='macro'))
def score_homework():
train_X, test_X, train_y, test_y = load_mnist()
pred_y = homework(train_X, train_y, test_X, test_y)
print(f1_score(test_y, pred_y, average='macro'))
validate_homework()
# score_homework()