Logistic regression 100% test accuracy: What am I doing wrong? - python-3.x

I am using tensorflow on mnist handwritten numbers. Here, I keep getting 100% accuracy on a test set when using logistic regression (I am expecting something much less accuracy).
The following is the code I used. Can someone please point out what I am doing wrong?
I must be somehow overfitting the data, but I cannot figure out why the accuracy is this high. Also, when I tested the model on random handwritten numbers, it doesn't get the prediction right always, so most likely the way I am calculating the accuracy must be incorrect. I believe I am using the correct formulas here. I am stumped here. Any help will be much appreciated. Thanks.
import tensorflow as tf
gpu_options = tf.GPUOptions(allow_growth=True, per_process_gpu_memory_fraction=0.5)
s = tf.InteractiveSession(config=tf.ConfigProto(gpu_options=gpu_options))
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.datasets import load_digits
cl = 2 # number of classes = 2
mnist = load_digits(cl)
X,y = mnist.data, mnist.target
# inputs and shareds
weights = tf.Variable(tf.zeros([64, cl])) # 8x8 image, 2 output classes
b = tf.Variable(tf.zeros([cl]))
input_X = tf.placeholder('float32', [None,64])
input_y = tf.placeholder('float32', [None,cl])
accuracy_train = tf.Variable(tf.zeros([1]))
accuracy_test = tf.Variable(tf.zeros([1]))
# create model to predict y
predicted_y = tf.nn.sigmoid(tf.matmul(input_X, weights) + b)
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=predicted_y, labels = input_y))
optimizer = tf.train.GradientDescentOptimizer(0.001).minimize(loss)
def train_function(X, y):
weights, c = s.run([optimizer, loss], {input_X:X, input_y:y})
return weights, c
def predict_function(X):
predict = s.run(probs, {input_X:X})
return predict
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)
y_train.shape = (y_train.shape[0],1)
y_test.shape = (y_test.shape[0],1)
# one-hot encoding
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder(sparse=False)
y_train = enc.fit_transform(y_train).astype('int')
y_test = enc.fit_transform(y_test).astype('int')
from sklearn.metrics import roc_auc_score
batch_size =16
s.run(tf.global_variables_initializer())
for epoch in range(15):
avg_loss = 0
num_batches = int(X_train.shape[0]/batch_size)
for i in range(num_batches):
batch_x, batch_y = X_train[i:i+num_batches], y_train[i:i+num_batches]
_, c = train_function(batch_x, batch_y)
avg_loss += c/num_batches
print("\nloss at iter %i:%.4f" % (i, np.sum(c)))
print("train auc:",roc_auc_score(y_train, s.run(predicted_y, {input_X:X_train})))
print("test auc:",roc_auc_score(y_test, s.run(predicted_y, {input_X:X_test})))
print("Average loss = ", avg_loss)
plt.scatter(epoch,avg_loss, color='b', marker='x')
plt.xlabel('epoch')
plt.ylabel('Avg loss')
# Prediction
probs = tf.equal(tf.argmax(predicted_y,1), tf.argmax(input_y,1))
# Accuracy
accuracy = tf.reduce_mean(tf.cast(probs, tf.float32))
print("Training accuracy: ",s.run(accuracy, {input_X:X_train, input_y:y_train}))
print("Test accuracy: ", s.run(accuracy, {input_X:X_test, input_y:y_test}))
# Testing on a random image
fn = ["0.png", "1.jpg", "1_1.jpg", "One.png", "1_2.jpeg", "1-3.png"]
filename = np.random.choice(fn)
from PIL import Image
def resize_image(image):
img = Image.open(image)
img = img.resize((8,8))
arr = np.array(img)
#convert to gray scale
if len(arr.shape) > 2:
arr = np.mean(arr, 2)
#flatten
arr = arr.flatten()
return arr
if any("0" in s for s in filename):
test_label = 0
elif any("1" in s for s in filename):
test_label = 1
elif any("2" in s for s in filename):
test_label = 2
elif any("3" in s for s in filename):
test_label = 3
elif any("4" in s for s in filename):
test_label = 4
elif any("5" in s for s in filename):
test_label = 5
elif any("6" in s for s in filename):
test_label = 6
elif any("7" in s for s in filename):
test_label = 7
elif any("8" in s for s in filename):
test_label = 8
elif any("9" in s for s in filename):
test_label = 9
test_label
test_image = resize_image(filename)
new_predict = predict_function(test_image.reshape((1, 64)))
print("predicted label: ", s.run(tf.argmax(new_predict, 1))[0])

Related

Loss Not Decreasing for a Bert from Scratch PyTorch Model

I followed Aladdin Persson's Youtube video to code up just the encoder portion of the transformer model in PyTorch, except I just used the Pytorch's multi-head attention layer. The model seems to produce the correct shape of data. However, during training, the training loss does not drop and the resulting model always predicts the same output of 0.4761. Dataset used for training is from the Sarcasm Detection Dataset from Kaggle. Would appreciate any help you guys can give on errors that I have made.
import pandas as pd
from transformers import BertTokenizer
import torch.nn as nn
import torch
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import ReduceLROnPlateau
import math
df = pd.read_json("Sarcasm_Headlines_Dataset_v2.json", lines=True)
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
encoded_input = tokenizer(df['headline'].tolist(), return_tensors='pt',padding=True)
X = encoded_input['input_ids']
y = torch.tensor(df['is_sarcastic'].values).float()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify = y)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
torch.cuda.empty_cache()
class TransformerBlock(nn.Module):
def __init__(self,embed_dim, num_heads, dropout, expansion_ratio):
super(TransformerBlock, self).__init__()
self.attention = nn.MultiheadAttention(embed_dim, num_heads)
self.norm1 = nn.LayerNorm(embed_dim)
self.norm2 = nn.LayerNorm(embed_dim)
self.feed_forward = nn.Sequential(
nn.Linear(embed_dim, expansion_ratio*embed_dim),
nn.ReLU(),
nn.Linear(expansion_ratio*embed_dim,embed_dim)
)
self.dropout = nn.Dropout(dropout)
def forward(self, value, key, query):
attention, _ = self.attention(value, key, query)
x=self.dropout(self.norm1(attention+query))
forward = self.feed_forward(x)
out=self.dropout(self.norm2(forward+x))
return out
class Encoder(nn.Module):
#the vocab size is one more than the max value in the X matrix.
def __init__(self,vocab_size=30109,embed_dim=128,num_layers=1,num_heads=4,device="cpu",expansion_ratio=4,dropout=0.1,max_length=193):
super(Encoder,self).__init__()
self.device = device
self.word_embedding = nn.Embedding(vocab_size,embed_dim)
self.position_embedding = nn.Embedding(max_length,embed_dim)
self.layers = nn.ModuleList(
[
TransformerBlock(embed_dim,num_heads,dropout,expansion_ratio) for _ in range(num_layers)
]
)
self.dropout = nn.Dropout(dropout)
self.classifier1 = nn.Linear(embed_dim,embed_dim)
self.classifier2 = nn.Linear(embed_dim,1)
self.relu = nn.ReLU()
def forward(self,x):
N, seq_length = x.shape
positions = torch.arange(0,seq_length).expand(N, seq_length).to(self.device)
out = self.dropout(self.word_embedding(x) + self.position_embedding(positions))
for layer in self.layers:
#print(out.shape)
out = layer(out,out,out)
#Get the first output for classification
#Pooled output from hugging face is: Last layer hidden-state of the first token of the sequence (classification token) further processed by a Linear layer and a Tanh activation function.
#Pooled output from hugging face will be different from out[:,0,:], which is the output from the CLS token.
out = self.relu(self.classifier1(out[:,0,:]))
out = self.classifier2(out)
return out
torch.cuda.empty_cache()
net = Encoder(device=device)
net.to(device)
batch_size = 32
num_train_samples = X_train.shape[0]
num_val_samples = X_test.shape[0]
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=1e-5)
scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5)
val_loss_hist=[]
loss_hist=[]
epoch = 0
min_val_loss = math.inf
print("Training Started")
patience = 0
for _ in range(100):
epoch += 1
net.train()
epoch_loss = 0
permutation = torch.randperm(X_train.size()[0])
for i in range(0,X_train.size()[0], batch_size):
indices = permutation[i:i+batch_size]
features=X_train[indices].to(device)
labels=y_train[indices].reshape(-1,1).to(device)
output = net.forward(features)
loss = criterion(output, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
epoch_loss+=loss.item()
epoch_loss = epoch_loss / num_train_samples * num_val_samples
loss_hist.append(epoch_loss)
#print("Eval")
net.eval()
epoch_val_loss = 0
permutation = torch.randperm(X_test.size()[0])
for i in range(0,X_test.size()[0], batch_size):
indices = permutation[i:i+batch_size]
features=X_test[indices].to(device)
labels = y_test[indices].reshape(-1,1).to(device)
output = net.forward(features)
loss = criterion(output, labels)
epoch_val_loss+=loss.item()
val_loss_hist.append(epoch_val_loss)
scheduler.step(epoch_val_loss)
#if epoch % 5 == 0:
print("Epoch: " + str(epoch) + " Train Loss: " + format(epoch_loss, ".4f") + ". Val Loss: " + format(epoch_val_loss, ".4f") + " LR: " + str(optimizer.param_groups[0]['lr']))
if epoch_val_loss < min_val_loss:
min_val_loss = epoch_val_loss
torch.save(net.state_dict(), "torchmodel/weights_best.pth")
print('\033[93m'+"Model Saved"+'\033[0m')
patience = 0
else:
patience += 1
if (patience == 10):
break
print("Training Ended")

Input size (depth of inputs) must be accessible via shape inference, but saw value None error whaen trying to set tf.expand_dims axis to 0

I am trying to use 20 news groups data set available in sklearn to train a LSTM to do incremental learning (classification). I used the sklearn's TfidfVectorizer to pre-process the data. Then I turned the resulting sparse matrix into a numpy array before feeding it. After that when coding the below line:
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
It gave an error saying that the 'inputs_' should have 3 dimensions. so I used:
inputs_ = tf.expand_dims(inputs_, 0)
To expand the dimension. But when I do that i get the error:
ValueError: Input size (depth of inputs) must be accessible via shape
inference, but saw value None.
The shape of 'input_' is:
(1, 134410)
I already went through this post, but it did not help.
I cannot seem to understand how to solve this issue. Any help is much appreciated. Thank you in advance!
show below is my complete code:
import os
from collections import Counter
import tensorflow as tf
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
from matplotlib import cm
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import f1_score, recall_score, precision_score
from string import punctuation
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelBinarizer
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
vectorizer = TfidfVectorizer()
features = vectorizer.fit_transform(newsgroups_data.data)
lb = LabelBinarizer()
labels = np.reshape(newsgroups_data.target, [-1])
labels = lb.fit_transform(labels)
return features, labels
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot_error(errorplot, datapoint, numberOfWrongPreds):
errorplot.set_xdata(np.append(errorplot.get_xdata(), datapoint))
errorplot.set_ydata(np.append(errorplot.get_ydata(), numberOfWrongPreds))
errorplot.autoscale(enable=True, axis='both', tight=None)
plt.draw()
def train_test():
features, labels = pre_process()
#Defining Hyperparameters
epochs = 1
lstm_layers = 1
batch_size = 1
lstm_size = 30
learning_rate = 0.003
print(lstm_size)
print(batch_size)
print(epochs)
#--------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.float32, [None,None], name = "inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.int32, [None,None], name = "labels")
#getting dynamic batch size according to the input tensor size
# dynamic_batch_size = tf.shape(inputs_)[0]
#output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name = "keep_prob")
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
#Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
inputs_ = tf.expand_dims(inputs_, 0)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_, initial_state=initial_state)
#hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, 1, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
errorplot, = plt.plot([], [])
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x.toarray(),
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.round(tf.nn.softmax(logit)).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
print("Prediction: ", predictions)
print("Actual: ",y)
pred = np.array(predictions)
print(pred)
print(y)
if not ((pred==y).all()):
wrongPred += 1
if ii % 27 == 0:
plot_error(errorplot,ii,wrongPred)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred/len(labels)
print("ERROR RATE: ", errorRate )
if __name__ == '__main__':
train_test()
ValueError: Input size (depth of inputs) must be accessible via shape inference, but saw value None.
This error is given because you don't specify the size nor the amount of inputs.
I got the script working like this:
inputs_ = tf.placeholder(tf.float32, [1,None], name = "inputs")
inputs_withextradim = tf.expand_dims(inputs_, 2)
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs_withextradim, initial_state=initial_state)

Server (Ubuntu 16.04) freezes when running LSTM and stops responding

I have run my code before on this server without a problem but I was using a different cost function (tf.losses.mean_squared_error) for a classification problem. Upon learning that this was a wrong approach,I changed it to tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits). I also changed the learning rate from 0.003 to 0.01. I then ran this new code on my server and after sometime my server stopped responding and it could not even be pinged.
my code is shown below:
from collections import Counter
import tensorflow as tf
from sklearn.datasets import fetch_20newsgroups
import matplotlib as mplt
mplt.use('agg') # Must be before importing matplotlib.pyplot or pylab!
import matplotlib.pyplot as plt
from string import punctuation
from sklearn.preprocessing import LabelBinarizer
import numpy as np
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')
def pre_process():
newsgroups_data = fetch_20newsgroups(subset='all', remove=('headers', 'footers', 'quotes'))
words = []
temp_post_text = []
print(len(newsgroups_data.data))
for post in newsgroups_data.data:
all_text = ''.join([text for text in post if text not in punctuation])
all_text = all_text.split('\n')
all_text = ''.join(all_text)
temp_text = all_text.split(" ")
for word in temp_text:
if word.isalpha():
temp_text[temp_text.index(word)] = word.lower()
temp_text = [word for word in temp_text if word not in stopwords.words('english')]
temp_text = list(filter(None, temp_text))
temp_text = ' '.join([i for i in temp_text if not i.isdigit()])
words += temp_text.split(" ")
temp_post_text.append(temp_text)
# temp_post_text = list(filter(None, temp_post_text))
dictionary = Counter(words)
# deleting spaces
# del dictionary[""]
sorted_split_words = sorted(dictionary, key=dictionary.get, reverse=True)
vocab_to_int = {c: i for i, c in enumerate(sorted_split_words)}
message_ints = []
for message in temp_post_text:
temp_message = message.split(" ")
message_ints.append([vocab_to_int[i] for i in temp_message])
# maximum message length = 6577
message_lens = Counter([len(x) for x in message_ints])
seq_length = 6577
num_messages = len(temp_post_text)
features = np.zeros([num_messages, seq_length], dtype=int)
for i, row in enumerate(message_ints):
features[i, -len(row):] = np.array(row)[:seq_length]
lb = LabelBinarizer()
lbl = newsgroups_data.target
labels = np.reshape(lbl, [-1])
labels = lb.fit_transform(labels)
return features, labels, len(sorted_split_words)
def get_batches(x, y, batch_size=1):
for ii in range(0, len(y), batch_size):
yield x[ii:ii + batch_size], y[ii:ii + batch_size]
def plot(noOfWrongPred, dataPoints):
font_size = 14
fig = plt.figure(dpi=100,figsize=(10, 6))
mplt.rcParams.update({'font.size': font_size})
plt.title("Distribution of wrong predictions", fontsize=font_size)
plt.ylabel('Error rate', fontsize=font_size)
plt.xlabel('Number of data points', fontsize=font_size)
plt.plot(dataPoints, noOfWrongPred, label='Prediction', color='blue', linewidth=1.8)
# plt.legend(loc='upper right', fontsize=14)
plt.savefig('distribution of wrong predictions.png')
# plt.show()
def train_test():
features, labels, n_words = pre_process()
print(features.shape)
print(labels.shape)
# Defining Hyperparameters
lstm_layers = 1
batch_size = 1
lstm_size = 200
learning_rate = 0.01
# --------------placeholders-------------------------------------
# Create the graph object
graph = tf.Graph()
# Add nodes to the graph
with graph.as_default():
tf.set_random_seed(1)
inputs_ = tf.placeholder(tf.int32, [None, None], name="inputs")
# labels_ = tf.placeholder(dtype= tf.int32)
labels_ = tf.placeholder(tf.float32, [None, None], name="labels")
# output_keep_prob is the dropout added to the RNN's outputs, the dropout will have no effect on the calculation of the subsequent states.
keep_prob = tf.placeholder(tf.float32, name="keep_prob")
# Size of the embedding vectors (number of units in the embedding layer)
embed_size = 300
# generating random values from a uniform distribution (minval included and maxval excluded)
embedding = tf.Variable(tf.random_uniform((n_words, embed_size), -1, 1))
embed = tf.nn.embedding_lookup(embedding, inputs_)
print(embedding.shape)
print(embed.shape)
print(n_words)
# Your basic LSTM cell
lstm = tf.contrib.rnn.BasicLSTMCell(lstm_size)
# Add dropout to the cell
drop = tf.contrib.rnn.DropoutWrapper(lstm, output_keep_prob=keep_prob)
# Stack up multiple LSTM layers, for deep learning
cell = tf.contrib.rnn.MultiRNNCell([drop] * lstm_layers)
# Getting an initial state of all zeros
initial_state = cell.zero_state(batch_size, tf.float32)
outputs, final_state = tf.nn.dynamic_rnn(cell, embed, initial_state=initial_state)
# hidden layer
hidden = tf.layers.dense(outputs[:, -1], units=25, activation=tf.nn.relu)
logit = tf.contrib.layers.fully_connected(hidden, num_outputs=20, activation_fn=None)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logit, labels=labels_))
optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost)
saver = tf.train.Saver()
# ----------------------------online training-----------------------------------------
with tf.Session(graph=graph) as sess:
tf.set_random_seed(1)
sess.run(tf.global_variables_initializer())
iteration = 1
state = sess.run(initial_state)
wrongPred = 0
noOfWrongPreds = []
dataPoints = []
for ii, (x, y) in enumerate(get_batches(features, labels, batch_size), 1):
feed = {inputs_: x,
labels_: y,
keep_prob: 0.5,
initial_state: state}
predictions = tf.nn.softmax(logit).eval(feed_dict=feed)
print("----------------------------------------------------------")
print("Iteration: {}".format(iteration))
isequal = np.equal(np.argmax(predictions[0], 0), np.argmax(y[0], 0))
print(np.argmax(predictions[0], 0))
print(np.argmax(y[0], 0))
if not (isequal):
wrongPred += 1
print("nummber of wrong preds: ",wrongPred)
if iteration%50 == 0:
noOfWrongPreds.append(wrongPred/iteration)
dataPoints.append(iteration)
loss, states, _ = sess.run([cost, final_state, optimizer], feed_dict=feed)
print("Train loss: {:.3f}".format(loss))
iteration += 1
saver.save(sess, "checkpoints/sentiment.ckpt")
errorRate = wrongPred / len(labels)
print("ERRORS: ", wrongPred)
print("ERROR RATE: ", errorRate)
plot(noOfWrongPreds, dataPoints)
if __name__ == '__main__':
train_test()
Is there anything in my code that could be causing a memory leak? Any help is much appreciated. Thank you in advance.

Evaluating CNN model for one image

I'm new to tensorflow and I'm trying it, so if one of you could be able to help me I will really appreciate it.
So I've created a model CNN and train it to classify a series of images in 2 categories, for example, FLOWERS and OTHERS and I think I did a good job for that but if do you have any idea how can I improve this model please let me know.
But my problem is after I train this model, how can I use it to classify just one specific image? I don't want to use baches if is possible. Could anyone give me some advice or examples about it, please?
My Code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import math
import numpy as np
from PIL import Image
import tensorflow as tf
import os
# Basic model parameters as external flags.
flags = tf.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 10000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 256, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 64, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 32, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', "ModelData/data", 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
'for unit testing.')
NUM_CLASSES = 2
IMAGE_SIZE = 200
CHANNELS = 3
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * CHANNELS
# starter_learning_rate = 0.1
def inference(images, hidden1_units, hidden2_units):
# Hidden 1
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
name='weights')
biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
# Hidden 2
with tf.name_scope('hidden2'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units))),
name='weights')
biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
# Linear
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))),
name='weights')
biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
logits = tf.matmul(hidden2, weights) + biases
return logits
def cal_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def training(loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
def placeholder_inputs(batch_size):
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
return images_placeholder, labels_placeholder
def fill_feed_dict(images_feed, labels_feed, images_pl, labels_pl):
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
}
return feed_dict
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_set):
# And run one epoch of eval.
true_count = 0 # Counts the number of correct predictions.
steps_per_epoch = 32 // FLAGS.batch_size
num_examples = steps_per_epoch * FLAGS.batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision # 1: %0.04f' % (num_examples, true_count, precision))
# Get the sets of images and labels for training, validation, and
def init_training_data_set(dir):
train_images = []
train_labels = []
def GetFoldersList():
mylist = []
filelist = os.listdir(dir)
for name in filelist:
if os.path.isdir(os.path.join(dir, name)):
mylist.append(name)
return mylist
def ReadImagesFromFolder(folder):
fin_dir = os.path.join(dir, folder)
images_name = os.listdir(fin_dir)
images = []
for img_name in images_name:
img_location = os.path.join(dir, folder)
final_loc = os.path.join(img_location, img_name)
try:
hash_folder = int(folder.split("_")[0])
images.append((np.array(Image.open(final_loc).convert('RGB')), hash_folder))
except:
pass
return images
folders = GetFoldersList()
for folder in folders:
for imgs in ReadImagesFromFolder(folder):
train_images.append(imgs[0])
train_labels.append(imgs[1])
return train_images, train_labels
train_images, train_labels = init_training_data_set(os.path.join("FetchData", "Image"))
train_images = np.array(train_images)
train_images = train_images.reshape(len(train_images), IMAGE_PIXELS)
train_labels = np.array(train_labels)
def restore_model_last_version(saver, sess):
def get_biggest_index(folder):
import re
index_vals = []
for file in os.listdir(folder):
split_data = file.split(".")
extension = split_data[len(split_data) - 1]
if extension == "meta":
index = int(re.findall(r"\d+", file)[0])
index_vals.append(index)
index_vals.sort(reverse=True)
if index_vals:
return index_vals[0]
else:
return ""
real_path = os.path.abspath(os.path.split(FLAGS.train_dir)[0])
index = get_biggest_index(real_path)
isdir = os.path.isdir(real_path)
is_empty = True
if isdir:
if os.listdir(real_path):
is_empty = False
if not is_empty:
saver.restore(sess, FLAGS.train_dir + "-" + str(index))
def run_training():
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Generate placeholders for the images and labels.
images_placeholder, labels_placeholder = placeholder_inputs(len(train_images))
# Build a Graph that computes predictions from the inference model.
logits = inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)
# Add to the Graph the Ops for loss calculation.
loss = cal_loss(logits, labels_placeholder)
# Add to the Graph the Ops that calculate and apply gradients.
train_op = training(loss, FLAGS.learning_rate)
# Add the Op to compare the logits to the labels during evaluation.
eval_correct = evaluation(logits, labels_placeholder)
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(save_relative_paths=True)
# Create a session for running Ops on the Graph.
# sess = tf.Session()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.9
# gpu_options = tf.GPUOptions(allow_growth=True)
# sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess = tf.Session(config=config)
# Run the Op to initialize the variables.
# init = train_op.g
init = tf.global_variables_initializer()
sess.run(init)
restore_model_last_version(saver, sess)
# And then after everything is built, start the training loop.
for step in range(FLAGS.max_steps):
start_time = time.time()
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
if (step) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
print("Current step is: " + str(step))
print("Current los value: " + str(loss_value))
print("Current duration: " + str(duration))
print("\n")
saver.save(sess, save_path=FLAGS.train_dir, global_step=step)
print('Training Data Eval:')
do_eval(sess, eval_correct, images_placeholder, labels_placeholder, train_images)
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
So if anyone could help me with this and knows how can I make that evaluation for just one pictures please help me.
Thanks :)
Pretty much every operation in Tensorflow expect you to pass a batched input to make great use of the parallelization capacities of modern GPUs.
Now, if you want to infer on a single image, you simply need to consider this image as a batch of size 1. Here is quick code snippet :
# Load image
img = np.array(Image.open(your_path).convert('RGB'))
# Expand dimensions to simulate a batch of size 1
img = np.expand_dims(img, 0)
...
# Get prediction
pred = sess.run(tf.nn.softmax(logits), {images_placeholder: img})

Why do I get "nan" for Loss when I implement MNIST tensorflow code to my dataset

My Project goal is to checkout weight map of neural network for my dataset.
I followed the MNIST example code and worked fine.
MNIST dataset have 784(28*28) pixel_data input and 10 class_data output.
My dataset have 72(8*9) pixel_data input and 4 class_data output.
I made the code to handle my dataset same format as MNIST dataset
but when I train, the Loss is keep giving "NAN" value.
you can check my code and dataset in my github.
import os
import glob
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import random as ran
#데이터를 8x9이미지 형태로 display
def display_digit(num):
print(y_data[num])
label = y_data[num].argmax(axis=0)
image = x_data[num].reshape([8,9])
plt.title('Example: %d Label: %d' % (num, label))
plt.imshow(image, cmap=plt.get_cmap('gray_r'))
plt.show()
#데이터를 vector형태로 dispaly
def display_mult_flat(start, stop):
images = x_data[start].reshape([1,72])
for i in range(start+1,stop):
images = np.concatenate((images, x_data[i].reshape([1,72])))
plt.imshow(images, cmap=plt.get_cmap('gray_r'))
plt.show()
#y_data을 oneshot방법으로 표현
def oneshot(n):
if n=="1":
return [1,0,0,0]
elif n=="2":
return [0,1,0,0]
elif n=="3":
return [0,0,1,0]
elif n=="4":
return [0,0,0,1]
# input, out data 반환, (MNIST에서 사용되는 형태)
def Get_data():
Glass_dir='./glass_data/'
csv_filenames = [i for i in glob.glob('./glass_data/*.{}'.format('csv'))]
y_data=[]
x_data=[]
for filename in csv_filenames:
y=oneshot(filename[13])
csv_file = pd.read_csv(filename)
df = pd.DataFrame(csv_file).T
df.columns = df.iloc[0]
df = df[1:]
df = df.ffill()
for i in range(len(df.index)):
y_data.append(y)
for row in df.iterrows():
index, data = row
x_data.append(data.tolist())
combined = list(zip(x_data, y_data))
ran.shuffle(combined)
x_data[:], y_data[:] = zip(*combined)
y_data=np.array(y_data)
x_data=np.array(x_data, dtype=np.float32)
return x_data, y_data
#각 class의 5개씩을 test로 사용
def Get_testdata():
Glass_dir='./glass_data/'
csv_filenames = [i for i in glob.glob('./glass_data/*.{}'.format('csv'))]
y_test=[]
x_test=[]
for filename in csv_filenames:
y=oneshot(filename[13])
csv_file = pd.read_csv(filename)
df = pd.DataFrame(csv_file).T
df.columns = df.iloc[0]
df = df[1:]
df = df.ffill()
for i in range(5):
y_test.append(y)
df2=df.head()
for row in df2.iterrows():
index, data = row
x_test.append(data.tolist())
y_test=np.array(y_test)
x_test=np.array(x_test, dtype=np.float32)
return x_test, y_test
# In[3]:
x_data, y_data = Get_data()
# In[4]:
x_data
# In[5]:
y_data
# In[6]:
display_digit(ran.randint(0, x_data.shape[0]))
# In[7]:
display_mult_flat(0,200)
# In[8]:
sess = tf.Session()
x = tf.placeholder(tf.float32, shape=[None, 72])
y_ = tf.placeholder(tf.float32, shape=[None, 4])
W = tf.Variable(tf.zeros([72,4]))
b = tf.Variable(tf.zeros([4]))
y = tf.nn.softmax(tf.matmul(x,W) + b)
print(y)
# In[9]:
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y), reduction_indices=[1]))
# In[10]:
x_train, y_train = Get_data()
x_test, y_test= Get_testdata()
LEARNING_RATE = 0.01
TRAIN_STEPS = 2500
# In[11]:
init = tf.global_variables_initializer()
sess.run(init)
# In[12]:
training = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# In[13]:
for i in range(TRAIN_STEPS+1):
sess.run(training, feed_dict={x: x_train, y_: y_train})
if i%100 == 0:
print('Training Step:' + str(i) + ' Accuracy = ' + str(sess.run(accuracy, feed_dict={x: x_train, y_: y_train})) + ' Loss = ' + str(sess.run(cross_entropy, {x: x_train, y_: y_train})))
# In[14]:
for i in range(4):
plt.subplot(2, 5, i+1)
weight = sess.run(W)[:,i]
plt.title(i)
plt.imshow(weight.reshape([8,9]), cmap=plt.get_cmap('seismic'))
frame1 = plt.gca()
frame1.axes.get_xaxis().set_visible(False)
frame1.axes.get_yaxis().set_visible(False)
# In[15]:
plt.show()
Two suggestions: first initialize your weights to something other than zeros. For instance:
W = tf.Variable(tf.truncated_normal(shape=[72, 4]))
Second, it is recommended that you use tf.nn.softmax_cross_entropy_with_logits rather than calculating this yourself. It is more efficient and numerically stable. To do so, get rid of your softmax function and change your cost:
y = tf.matmul(x,W) + b # your network output before softmax (aka "logits")
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)
cross_entropy = tf.reduce_mean(cross_entropy)

Resources