Normalize MNIST in PyTorch - pytorch

I am trying to normalize MNIST dataset in PyTorch 1.9 and Python 3.8 to be between the range [0, 1] with the code (batch_size = 32).
# Specify path to MNIST dataset-
path_to_data = "path_to_dataset"
# Define transformation(s) to be applied to dataset-
transforms_MNIST = transforms.Compose(
transforms.Normalize(mean = (0.1307,), std = (0.3081,))
# Load MNIST dataset-
train_dataset = torchvision.datasets.MNIST(
# root = './data', train = True,
root = path_to_data + "data", train = True,
transform = transforms_MNIST, download = True
test_dataset = torchvision.datasets.MNIST(
# root = './data', train = False,
root = path_to_data + "data", train = False,
transform = transforms_MNIST
# Create training and testing dataloaders-
train_loader =
dataset = train_dataset, batch_size = batch_size,
shuffle = True
test_loader =
dataset = test_dataset, batch_size = batch_size,
shuffle = False
print(f"Sizes of train_dataset: {len(train_dataset)} and test_dataet: {len(test_dataset)}")
print(f"Sizes of train_loader: {len(train_loader)} and test_loader: {len(test_loader)}")
# Sizes of train_dataset: 60000 and test_dataet: 10000
# Sizes of train_loader: 1875 and test_loader: 313
# Sanity check-
print(f"train_dataset: min pixel value = {} &"
f" max pixel value = {}")
# train_dataset: min pixel value = 0.000 & max pixel value = 255.000
print(f"test_dataset: min pixel value = {} &"
f" max pixel value = {}")
# test_dataset: min pixel value = 0.000 & max pixel value = 255.000
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 1875 & len(test_loader) = 313
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (1875.0, 312.5)
# Get some random batch of training images & labels-
images, labels = next(iter(train_loader))
# You get x images due to the specified batch size-
print(f"images.shape: {images.shape} & labels.shape: {labels.shape}")
# images.shape: torch.Size([32, 1, 28, 28]) & labels.shape: torch.Size([32])
# Get min and max values for normalized pixels in mini-batch-
images.min(), images.max()
# (tensor(-0.4242), tensor(2.8215))
The min and max for 'images' should be between 0 and 1, instead, it is 0.4242 and 2.8215. What is going wrong?

This happens because Normalize applies what is actually known (also) as a standardization: output = (input - mean) / std.
The normalization you want to achieve is automatically performed when loading the image so you can comment Normalize.


PyTorch DataLoader: Only one element tensors can be converted Python scalars

For Python 3.10 and torch version: 1.12.1, I am using MNIST dataset scaled in the range [0, 1] with one-hot encoded vectors for the target as:
batch_size = 256
# Define transformations for MNIST dataset-
# MNIST dataset statistics-
# mean = np.array([0.1307])
# std_dev = np.array([0.3081])
transforms_apply = transforms.Compose(
# transforms.Normalize(mean = mean, std = std_dev)
# Load MNIST dataset-
train_dataset = torchvision.datasets.MNIST(
root = 'data', train = True,
transform = transforms_apply, download = True
test_dataset = torchvision.datasets.MNIST(
root = 'data', train = False,
transform = transforms_apply
# Sanity check-
print(f"training dataset length/shape: {list(}")
# training dataset length/shape: [60000, 28, 28]
print(f"mean = { / 255:.4f} &"
f" std dev = { / 255:.4f}"
# mean = 0.1307 & std dev = 0.3081
# Convert the targets to one-hot encoded vectors-
train_dataset.targets = F.one_hot(train_dataset.targets, num_classes = 10)
test_dataset.targets = F.one_hot(test_dataset.targets, num_classes = 10)
# Sanity checks-
print(f"Train dataset: min = {} & max = {};"
f" Test dataset: min = {} & max = {}"
# Train dataset: min = 0 & max = 255; Test dataset: min = 0 & max = 255, train_dataset.targets.shape
# (torch.Size([60000, 28, 28]), torch.Size([60000, 10])), test_dataset.targets.shape
# (torch.Size([10000, 28, 28]), torch.Size([10000, 10]))
# Create training and testing dataloaders-
train_loader =
dataset = train_dataset, batch_size = batch_size,
shuffle = True
test_loader =
dataset = test_dataset, batch_size = batch_size,
shuffle = False
print(f"Sizes of train_dataset: {len(train_dataset)} and test_dataet: {len(test_dataset)}")
print(f"Sizes of train_loader: {len(train_loader)} and test_loader: {len(test_loader)}")
# Sizes of train_dataset: 60000 and test_dataet: 10000
# Sizes of train_loader: 235 and test_loader: 40
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")
# len(train_loader) = 235 & len(test_loader) = 40
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size
# (234.375, 39.0625)
# Get some random batch of training images & labels-
x, y = next(iter(train_loader))
print(f"images.shape: {x.shape}, labels.shape: {y.shape}")
This generates the error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) Input In [25], in <cell line: 2>()
1 # Get some random batch of training images & labels-
----> 2 x, y = next(iter(train_loader))
3 print(f"images.shape: {x.shape}, labels.shape: {y.shape}")
678 if self._sampler_iter is None:
679 # TODO(
680 self._reset() # type: ignore[call-arg]
--> 681 data = self._next_data()
682 self._num_yielded += 1
683 if self._dataset_kind == _DatasetKind.Iterable and
684 self._IterableDataset_len_called is not None and
685 self._num_yielded > self._IterableDataset_len_called:
in _SingleProcessDataLoaderIter._next_data(self)
719 def _next_data(self):
720 index = self._next_index() # may raise StopIteration
--> 721 data = self._dataset_fetcher.fetch(index) # may raise StopIteration
722 if self._pin_memory:
723 data = _utils.pin_memory.pin_memory(data, self._pin_memory_device)
in _MapDatasetFetcher.fetch(self, possibly_batched_index)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
in (.0)
47 def fetch(self, possibly_batched_index):
48 if self.auto_collation:
---> 49 data = [self.dataset[idx] for idx in possibly_batched_index]
50 else:
51 data = self.dataset[possibly_batched_index]
in MNIST.getitem(self, index)
130 def getitem(self, index: int) -> Tuple[Any, Any]:
131 """
132 Args:
133 index (int): Index (...)
136 tuple: (image, target) where target is index of the target class.
137 """
--> 138 img, target =[index], int(self.targets[index])
140 # doing this so that it is consistent with all other datasets
141 # to return a PIL Image
142 img = Image.fromarray(img.numpy(), mode="L")
ValueError: only one element tensors can be converted to Python
I know that this is due to the one-hot encoding since when not using it, this error is absent. How to solve it?
You can add it to your transforms using the Lambda transform
transforms_apply = transforms.Compose(
# transforms.Normalize(mean = mean, std = std_dev)
transforms.Lambda(lambda t: F.one_hot(t.long(), num_classes=10))

MNIST data processing - PyTorch

I am trying to code a Variational Autoencoder for MNIST dataset and the data pre-processing is as follows:
# Create transformations to be applied to dataset-
transforms = torchvision.transforms.Compose(
(0.1307,), (0.3081,)
# (0.5,), (0.5,)
# Create training and validation datasets-
train_dataset = torchvision.datasets.MNIST(
# root = 'data', train = True,
root = path_to_data, train = True,
download = True, transform = transforms
val_dataset = torchvision.datasets.MNIST(
# root = 'data', train = False,
root = path_to_data, train = False,
download = True, transform = transforms
# Sanity check-
len(train_dataset), len(val_dataset)
# (60000, 10000)
# Create training and validation data loaders-
train_dataloader =
dataset = train_dataset, batch_size = 32,
shuffle = True,
# num_workers = 2
val_dataloader =
dataset = val_dataset, batch_size = 32,
shuffle = True,
# num_workers = 2
# Get a mini-batch of train data loaders-
imgs, labels = next(iter(train_dataloader))
imgs.shape, labels.shape
# (torch.Size([32, 1, 28, 28]), torch.Size([32]))
# Minimum & maximum pixel values-
imgs.min(), imgs.max()
# (tensor(-0.4242), tensor(2.8215))
# Compute min and max for train dataloader-
min_mnist, max_mnist = 0.0, 0.0
for img, _ in train_dataloader:
if img.min() < min_mnist:
min_mnist = img.min()
if img.max() > max_mnist:
max_mnist = img.max()
print(f"MNIST - train: min pixel value = {min_mnist:.4f} & max pixel value = {max_mnist:.4f}")
# MNIST - train: min pixel value = -0.4242 & max pixel value = 2.8215
min_mnist, max_mnist = 0.0, 0.0
for img, _ in val_dataloader:
if img.min() < min_mnist:
min_mnist = img.min()
if img.max() > max_mnist:
max_mnist = img.max()
print(f"MNIST - validation: min pixel value = {min_mnist:.4f} & max pixel value = {max_mnist:.4f}")
# MNIST - validation: min pixel value = -0.4242 & max pixel value = 2.8215
Using 'ToTensor()' and 'Normalize()' transforms, the output image pixels are in the range [-0.4242, 2.8215]. The output layer of the decoder within the VAE either uses the sigmoid or tanh activation function. Sigmoid outputs values in the range [0, 1], while tanh outputs values in the range[-1, 1].
This can be a problem since the input is in the range [-0.4242, 2.8215], while the output can be in the range [0, 1] or [-1, 1] depending on the activation being used - sigmoid or tanh.
The reconstruction loss being used is MSE. BCE could also be used but it is suggested for Bernoulli distributions vs. continuous data - pixel values.
One simple fix is to just use 'ToTensor()' transformation which scales the input in the range [0, 1] and then use sigmoid activation function for the output decoder layer within the VAE. But what's a better approach for data pre-processing using images which need normalization with 'Normalize()' transformation for each of the channels such that the input and output/reconstructions are in the same range?
The easiest way would be to remove the sigmoid or tanh activation function in the last layer and just use a Linear layer as your output. In that case, the network can output any value and is not restricted to [0,1] or [-1, 1].

Pytorch couldn't build multi scaled kernel nested model

I'm trying to create a modified MNIST model which takes input 1x28x28 MNIST tensor images, and it kind of branches into different models with different sized kernels, and accumulates at the end, so as to give a multi-scale-kerneled response in the spatial domain of the images. I'm worried about the model, since, I'm unable to construct it.
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import as Data
from torchvision import datasets, transforms
import torch.nn.functional as F
import timeit
import unittest
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
# check availability of GPU and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# define a transforms for preparing the dataset
transform = transforms.Compose([
transforms.ToTensor(), # convert the image to a pytorch tensor
transforms.Normalize((0.1307,), (0.3081,)) # normalise the images with mean and std of the dataset
# Load the MNIST training, test datasets using `torchvision.datasets.MNIST` using the transform defined above
train_dataset = datasets.MNIST('./data',train=True,transform=transform,download=True)
test_dataset = datasets.MNIST('./data',train=False,transform=transform,download=True)
# create dataloaders for training and test datasets
# use a batch size of 32 and set shuffle=True for the training set
train_dataloader = Data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_dataloader = Data.DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)
# My Net
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# define a conv layer with output channels as 16, kernel size of 3 and stride of 1
self.conv11 = nn.Conv2d(1, 16, 3, 1) # Input = 1x28x28 Output = 16x26x26
self.conv12 = nn.Conv2d(1, 16, 5, 1) # Input = 1x28x28 Output = 16x24x24
self.conv13 = nn.Conv2d(1, 16, 7, 1) # Input = 1x28x28 Output = 16x22x22
# define a conv layer with output channels as 32, kernel size of 3 and stride of 1
self.conv21 = nn.Conv2d(16, 32, 3, 1) # Input = 16x26x26 Output = 32x24x24
self.conv22 = nn.Conv2d(16, 32, 5, 1) # Input = 16x24x24 Output = 32x20x20
self.conv23 = nn.Conv2d(16, 32, 7, 1) # Input = 16x22x22 Output = 32x16x16
# define a conv layer with output channels as 64, kernel size of 3 and stride of 1
self.conv31 = nn.Conv2d(32, 64, 3, 1) # Input = 32x24x24 Output = 64x22x22
self.conv32 = nn.Conv2d(32, 64, 5, 1) # Input = 32x20x20 Output = 64x16x16
self.conv33 = nn.Conv2d(32, 64, 7, 1) # Input = 32x16x16 Output = 64x10x10
# define a max pooling layer with kernel size 2
self.maxpool = nn.MaxPool2d(2), # Output = 64x11x11
# define dropout layer with a probability of 0.25
self.dropout1 = nn.Dropout(0.25)
# define dropout layer with a probability of 0.5
self.dropout2 = nn.Dropout(0.5)
# define a linear(dense) layer with 128 output features
self.fc11 = nn.Linear(64*11*11, 128)
self.fc12 = nn.Linear(64*8*8, 128) # after maxpooling 2x2
self.fc13 = nn.Linear(64*5*5, 128)
# define a linear(dense) layer with output features corresponding to the number of classes in the dataset
self.fc21 = nn.Linear(128, 10)
self.fc22 = nn.Linear(128, 10)
self.fc23 = nn.Linear(128, 10)
self.fc33 = nn.Linear(30,10)
def forward(self, x1):
# Use the layers defined above in a sequential way (folow the same as the layer definitions above) and
# write the forward pass, after each of conv1, conv2, conv3 and fc1 use a relu activation.
x = F.relu(self.conv11(x1))
x = F.relu(self.conv21(x))
x = F.relu(self.maxpool(self.conv31(x)))
#x = torch.flatten(x, 1)
x = x.view(-1,64*11*11)
x = self.dropout1(x)
x = F.relu(self.fc11(x))
x = self.dropout2(x)
x = self.fc21(x)
y = F.relu(self.conv12(x1))
y = F.relu(self.conv22(y))
y = F.relu(self.maxpool(self.conv32(y)))
#x = torch.flatten(x, 1)
y = y.view(-1,64*8*8)
y = self.dropout1(y)
y = F.relu(self.fc12(y))
y = self.dropout2(y)
y = self.fc22(y)
z = F.relu(self.conv13(x1))
z = F.relu(self.conv23(z))
z = F.relu(self.maxpool(self.conv33(z)))
#x = torch.flatten(x, 1)
z = z.view(-1,64*5*5)
z = self.dropout1(z)
z = F.relu(self.fc13(z))
z = self.dropout2(z)
z = self.fc23(z)
out = self.fc33(, y, z), 0))
output = F.log_softmax(out, dim=1)
return output
import unittest
class TestImplementations(unittest.TestCase):
# Dataloading tests
def test_dataset(self):
self.dataset_classes = ['0 - zero',
'1 - one',
'2 - two',
'3 - three',
'4 - four',
'5 - five',
'6 - six',
'7 - seven',
'8 - eight',
'9 - nine']
self.assertTrue(train_dataset.classes == self.dataset_classes)
self.assertTrue(train_dataset.train == True)
def test_dataloader(self):
self.assertTrue(train_dataloader.batch_size == 32)
self.assertTrue(test_dataloader.batch_size == 32)
def test_total_parameters(self):
model = Net().to(device)
#self.assertTrue(sum(p.numel() for p in model.parameters()) == 1015946)
suite = unittest.TestLoader().loadTestsFromModule(TestImplementations())
def train(model, device, train_loader, optimizer, epoch):
for batch_idx, (data, target) in enumerate(train_loader):
# send the image, target to the device
data, target =,
# flush out the gradients stored in optimizer
# pass the image to the model and assign the output to variable named output
output = model(data)
# calculate the loss (use nll_loss in pytorch)
loss = F.nll_loss(output, target)
# do a backward pass
# update the weights
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
# send the image, target to the device
data, target =,
# pass the image to the model and assign the output to variable named output
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
model = Net().to(device)
## Define Adam Optimiser with a learning rate of 0.01
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
start = timeit.default_timer()
for epoch in range(1, 11):
train(model, device, train_dataloader, optimizer, epoch)
test(model, device, test_dataloader)
stop = timeit.default_timer()
print('Total time taken: {} seconds'.format(int(stop - start)) )
Here is my full code. I couldn't understand what could possibly go wrong...
It is giving
<ipython-input-72-194680537dcc> in forward(self, x1)
46 x = F.relu(self.conv11(x1))
47 x = F.relu(self.conv21(x))
---> 48 x = F.relu(self.maxpool(self.conv31(x)))
49 #x = torch.flatten(x, 1)
50 x = x.view(-1,64*11*11)
TypeError: 'tuple' object is not callable
P.S.: Pytorch Noob here.
You have mistakenly placed a comma at the end of the line where you define self.maxpool : self.maxpool = nn.MaxPool2d(2), # Output = 64x11x11 see?
This comma makes self.maxpool a tuple instead of a torch.nn.modules.pooling.MaxPool2d. Drop the comma at the end and this error is fixed.
I see you haven't given the stride argument in you definition of self.maxpool = nn.MaxPool2d(2). Choose one: e.g. self.maxpool = nn.MaxPool2d(2, stride = 2).

3-layer feedfoward neural network not predicting regression values accurately

I'm pretty new to Tensorflow. Currently, I'm doing a 3-layer network, with 10 neurons in the hidden layer with ReLU, mini-batch gradient descent size of 8, L2 regularisation weight decay parameter (beta) of 0.001. The Tensorflow version I'm using is 1.14 and I'm on Python 3.6.
The issue that boggles my mind is that my predicted values and testing errors are absolutely off the charts.
For example, I plotted out the test errors and the predicted vs target values for a sample size of 50, and this is what came out.
As you can see, both plots are way off, and I haven't had the slightest clue as to why.
Here's how the dataset roughly looks like. The first column is discarded as it is just a counter value, and the last column is the target.
My code:
num_neuron = 10
batch_size = 8
beta = 0.001
learning_rate = 0.001
epochs = 4000
seed = 10
# read and divide data into test and train sets
total_dataset= np.genfromtxt('dataset_excel.csv', delimiter=',')
X_data, Y_data = total_dataset[1:, 1:8], total_dataset[1:, -1]
Y_data = Y_data.reshape(Y_data.shape[0], 1)
# shuffle input, ensure both are shuffled with the same order
shufflestate = np.random.get_state()
# 70% used for training, 30% used for testing
trainX = X_data[:280]
trainY = Y_data[:280]
testX = X_data[280:]
testY = Y_data[280:]
trainX = (trainX - np.mean(trainX, axis=0)) / np.std(trainX, axis=0)
# Create the model
x = tf.placeholder(tf.float32, [None, NUM_FEATURES])
y_ = tf.placeholder(tf.float32, [None, 1])
# get 50 samples for plotting of predicted vs target values
limited50testX = testX[:50]
limited50testY = testY[:50]
# Hidden
with tf.name_scope('hidden'):
weight1 = tf.Variable(tf.truncated_normal([NUM_FEATURES, num_neuron],stddev=1.0,name='weight1'))
bias1 = tf.Variable(tf.zeros([num_neuron]),name='bias1')
hidden = tf.nn.relu(tf.matmul(x, weight1) + bias1)
# output
with tf.name_scope('linear'):
weight2 = tf.Variable(tf.truncated_normal([num_neuron, 1],stddev=1.0 / np.sqrt(float(num_neuron))),name='weight2')
bias2 = tf.Variable(tf.zeros([1]),name='bias2')
logits = tf.matmul(hidden, weight2) + bias2
ridgeLoss = tf.square(y_ - logits)
regularisation = tf.nn.l2_loss(weight1) + tf.nn.l2_loss(weight2)
loss = tf.reduce_mean(ridgeLoss + beta * regularisation)
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
train_op = optimizer.minimize(loss)
error = tf.reduce_mean(tf.square(y_ - logits))
N = len(trainX)
idx = np.arange(N)
with tf.Session() as sess:
train_err = []
test_err_ = []
for i in range(epochs):
for batchStart, batchEnd in zip(range(0, trainX.shape[0], batch_size),range(batch_size, trainX.shape[0], batch_size)):{x: trainX[batchStart:batchEnd], y_: trainY[batchStart:batchEnd]})
err = error.eval(feed_dict={x: trainX, y_: trainY})
if i % 100 == 0:
print('iter %d: train error %g' % (i, train_err[i]))
test_err = error.eval(feed_dict={x: testX, y_: testY})
predicted =, feed_dict={x:limited50testX})
print("predicted values: ", predicted)
print("size of predicted values is", len(predicted))
print("targets: ", limited50testY)
print("size of target values is", len(limited50testY))
#plot predictions vs targets
numberList=np.arange(0, 50, 1).tolist()
predplot = plt.figure(1)
plt.plot(numberList, predicted, label='Predictions')
plt.plot(numberList, limited50testY, label='Targets')
plt.xlabel('50 samples')
plt.legend(loc='lower right')
# plot training error
trainplot = plt.figure(2)
plt.plot(range(epochs), train_err)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Train Error')
#plot testing error
testplot = plt.figure(3)
plt.plot(range(epochs), test_err_)
plt.xlabel(str(epochs) + ' iterations')
plt.ylabel('Test Error')
Not sure if that's it, but trainX is normalized whereas testX is not. You might want to use the same normalization on testX before predicting.

tensorflow-for-onehot-classification , cost is always 0

This follows on from this post (not mine): TensorFlow for binary classification
I had a similar issue and converted my data to use one hot encoding. However I'm still getting a cost of 0. Interestingly the accuracy is correct (90%) when I feed my training data back into it.
Code below:
# Set parameters
learning_rate = 0.02
training_iteration = 2
batch_size = int(np.size(y_vals)/300)
display_step = 1
numOfFeatures = 20 # 784 if MNIST
numOfClasses = 2 #10 if MNIST dataset
# TF graph input
x = tf.placeholder("float", [None, numOfFeatures])
y = tf.placeholder("float", [None, numOfClasses])
# Create a model
# Set model weights to random numbers:
W = tf.Variable(tf.random_normal(shape=[numOfFeatures,1])) # Weight vector
b = tf.Variable(tf.random_normal(shape=[1,1])) # Constant
# Construct a linear model
model = tf.nn.softmax(tf.matmul(x, W) + b) # Softmax
# Minimize error using cross entropy
# Cross entropy
cost_function = -tf.reduce_sum(y*tf.log(model))
# Gradient Descent
optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(cost_function)
# Initializing the variables
init = tf.global_variables_initializer()
# Launch the graph
with tf.Session() as sess:
# Training cycle
for iteration in range(training_iteration):
avg_cost = 0.
total_batch = int(len(x_vals)/batch_size)
# Loop over all batches
for i in range(total_batch):
batch_xs = x_vals[i*batch_size:(i*batch_size)+batch_size]
batch_ys = y_vals_onehot[i*batch_size:(i*batch_size)+batch_size]
# Fit training using batch data, feed_dict={x: batch_xs, y: batch_ys})
# Compute average loss
avg_cost +=, feed_dict={x: batch_xs, y: batch_ys})/total_batch
# Display logs per eiteration step
if iteration % display_step == 0:
print ("Iteration:", '%04d' % (iteration + 1), "cost=", "{:.9f}".format(avg_cost))
print ("Tuning completed!")
# Evaluation function
correct_prediction = tf.equal(tf.argmax(model, 1), tf.argmax(y, 1))
#correct_prediction = tf.equal(model, y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
# Test the model
print ("Accuracy:", accuracy.eval({x: x_vals_test, y: y_vals_test_onehot}))
Your output for cost is using:
Therefore, maybe you can replace 9 with bigger number.
Ok here is what I found in the end.
b = tf.Variable(tf.random_normal(shape=[1,1]))
b = tf.Variable(tf.zeros([1]))
