an error with dimensions in CNN using tensorflow - python-3.x

I am doing a basic tensorflow driven CNN. There is some kind of dimension which I am unable to locate. thanks in advance
I am working with jupyter in my system. I run on miniconda environment.
pred = conv_net(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)**
Here you check whether the index of the maximum value of the predicted image is equal to the actual labelled image. and
both will be a column vector.
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
calculate accuracy across all the given images and average them out.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
the refernce link from where i am learning
The error is
InvalidArgumentError Traceback (most recent call last)
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1627 try:
-> 1628 c_op = c_api.TF_FinishOperation(op_desc)
1629 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimensions must be equal, but are 11 and 10 for 'Add_1' (op: 'Add') with input shapes: [?,11], [10].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-b95a9f9c020a> in <module>()
----> 1 pred = conv_net(x, weights, biases)
2
3 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
4
5 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
<ipython-input-8-a23543908ef7> in conv_net(x, weights, biases)
24 # Output, class prediction
25 # finally we multiply the fully connected layer with the weights and add a bias term.
---> 26 out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
27 return out
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\ops\gen_math_ops.py in add(x, y, name)
308 if _ctx is None or not _ctx._eager_context.is_eager:
309 _, _, _op = _op_def_lib._apply_op_helper(
--> 310 "Add", x=x, y=y, name=name)
311 _result = _op.outputs[:]
312 _inputs_flat = _op.inputs
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\op_def_library.py in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
789
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\util\deprecation.py in new_func(*args, **kwargs)
486 'in a future version' if date is None else ('after %s' % date),
487 instructions)
--> 488 return func(*args, **kwargs)
489 return tf_decorator.make_decorator(func, new_func, 'deprecated',
490 _add_deprecated_arg_notice_to_docstring(
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ops.py in create_op(***failed resolving arguments***)
3272 input_types=input_types,
3273 original_op=self._default_original_op,
-> 3274 op_def=op_def)
3275 self._create_op_helper(ret, compute_device=compute_device)
3276 return ret
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ops.py in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1790 op_def, inputs, node_def.attr)
1791 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1792 control_input_ops)
1793
1794 # Initialize self._outputs.
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ops.py in _create_c_op(graph, node_def, inputs, control_inputs)
1629 except errors.InvalidArgumentError as e:
1630 # Convert to ValueError for backwards compatibility.
-> 1631 raise ValueError(str(e))
1632
1633 return c_op
ValueError: Dimensions must be equal, but are 11 and 10 for 'Add_1' (op: 'Add') with input shapes: [?,11], [10].
the following is the full code:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
df = pd.read_excel(r"C:/Users/ggmah/Desktop/HMM Data updated.xlsx")
tf.logging.set_verbosity(tf.logging.INFO)
dff = OneHotEncoder(df)
dfg = pd.get_dummies(df)
o =list(df.columns.values)
label_dict = dict()
for i,value in enumerate(o):
label_dict[i] = value
training_iters = 220
learning_rate = 0.002
batch_size = 16
n_input = 59
n_classes = 11
x = tf.placeholder("float", [None, 60,11,1])
y = tf.placeholder("float", [None, n_classes])
def conv2d(x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
weights = {
'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()),
'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()),
}
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(10), initializer=tf.contrib.layers.xavier_initializer()),
}
X = df[['Att1','Att2','Att3','Att4','Att5','Att6','Att7','Att8','Att9','Att10']]
Y = df[['Att11']]
train_X, test_X,train_y,test_y = train_test_split(X,Y,train_size=0.88,random_state=5)
def conv_net(x, weights, biases):
#here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
#Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
conv1 = maxpool2d(conv1, k=2)
#Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
conv3 = maxpool2d(conv3, k=2)
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
#Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term.
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
pred = conv_net(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#Here you check whether the index of the maximum value of the predicted image is equal to the actual labelled image. and
#both will be a column vector.
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#calculate accuracy across all the given images and average them out.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

It's simply a mistake of the dimension. The bias shape of the last out layer should be [11], because you have 11 classes as output which connect to 11 softmax:
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(11), initializer=tf.contrib.layers.xavier_initializer()),
}

Related

assertion failed: [Condition x == y did not hold element-wise:]

I have built a BiLSTM model with an attention layer for sentence classification task but I am getting an error that my assertion has failed due to mismatch in number of parameters. The attention layer code is here and the error is below the code.
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
When i am training the model with attention layer included, it is giving an error that assertion failed.
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history = model.fit(sequences_matrix, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history = model.fit(sequences_matrix, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
train_function
My model is
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(SpatialDropout1D(0.1))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(attention(return_sequences=True))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
activity_regularizer=regularizers.l2(1e-5)))
model.summary()
Shape of Y_train is
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
print(Y_train.shape)
(43532, 1)
your target is in 2D so you need to set return_sequences=False in the last attention layer in order to return output in 2D format
Add flatten layer before Dropout and then execute.
model.add(Flatten())

The size of tensor a (10) must match the size of tensor b (9) at non-singleton

i am using this code earlier in Jupiter notebook it was not showing error but accuracy was veryless then i have tried the same code in google colab there it is showing error, please suggest some way to increase accuracy . i am trying to perform multilevel CNN for detecting leaf with downsampling of image
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1,32,2)
self.conv2 = nn.Conv2d(32,64,2)
self.conv2_bn = nn.BatchNorm2d(64)
x= torch.randn(256,256).view(-1,1,256,256)
self._to_linear = None
self.convs(x)
self.fc1= nn.Linear(self._to_linear, 512)
self.fc2 = nn.Linear(512,6)
def convs(self,x):
y=torch.nn.functional.interpolate(x, size=([128,128]), scale_factor=None, mode='nearest', align_corners=None)
z=torch.nn.functional.interpolate(x, size=([64,64]), scale_factor=None, mode='nearest', align_corners=None)
w=torch.nn.functional.interpolate(x, size=([32,32]), scale_factor=None, mode='nearest', align_corners=None)
# print(x[0].shape)
x= F.relu(self.conv1(x))
m = nn.ConstantPad2d(1,0)
x=m(x)
x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), 2))
# print(x[0].shape)
y= F.relu(self.conv1(y))
m = nn.ConstantPad2d(1,0)
y=m(y)
y = F.relu(self.conv2_bn(self.conv2(y)), 2)
# print(y[0].shape)
CAT_1=torch.cat((x,y),1)
CAT_1=F.max_pool2d(CAT_1,(2,2))
# print(CAT_1[0].shape)
z= F.relu(self.conv1(z))
m = nn.ConstantPad2d(1,0)
z=m(z)
z= F.relu(self.conv2_bn(self.conv2(z)))
# print(z[0].shape)
CAT_2=torch.cat((CAT_1,z),1)
CAT_2=F.max_pool2d(CAT_2,(2,2))
# print(CAT_2[0].shape)
w= F.relu(self.conv1(w))
m = nn.ConstantPad2d(1,0)
w=m(w)
w = F.relu((self.conv2_bn(self.conv2(w))))
# print(w[0].shape)
x=torch.cat((CAT_2,w),1)
x=F.max_pool2d(x,(2,2))
# print("i lov pp")
# print(x[0].shape)
x=torch.nn.functional.avg_pool2d(x, (2,2))
# print("i lov pp")
# print(x[0].shape)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
# print("i lov pp")
x=self.convs(x)
x=x.view(-1, self._to_linear)
x= F.relu(self.fc1(x))
x= self.fc2(x)
return F.softmax(x, dim=1)
# print(x[0].shape)
net=Net()
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,256,256)
X=X/255.0
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1
val_size=int (len(X)*VAL_PCT)
print(val_size)
train_X= X[:-val_size]
train_y= y[:-val_size]
test_X=X[-val_size:]
test_y = y[-val_size:]
print(len(train_X))
print(len(test_X))
BATCH_SIZE =10
EPOCHS = 1
for epoch in range(EPOCHS):
for i in (range(0, len(train_X), BATCH_SIZE)):
#print(i, i+BATCH_SIZE)
batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,256,256)
# print(batch_X.shape)
batch_y = train_y[i:i+BATCH_SIZE]
#print(batch_y.shape)
net.zero_grad()
outputs = net(batch_X)
#print (outputs.shape)
loss = loss_function(outputs, batch_y)
loss.backward()
optimizer.step()
#print(loss)
#print(f"Epoch: {epoch}. Loss: {loss}")
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/loss.py:432: UserWarning: Using a target size (torch.Size([10, 256, 256, 3])) that is different to the input size (torch.Size([10, 6])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-38-a154b102127f> in <module>()
15 outputs = net(batch_X)
16 #print (outputs.shape)
---> 17 loss = loss_function(outputs, batch_y)
18 loss.backward()
19 optimizer.step()
3 frames
/usr/local/lib/python3.6/dist-packages/torch/functional.py in broadcast_tensors(*tensors)
60 if any(type(t) is not Tensor for t in tensors) and has_torch_function(tensors):
61 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 62 return _VF.broadcast_tensors(tensors)
63
64
RuntimeError: The size of tensor a (10) must match the size of tensor b (9) at non-singleton dimension 3

size mismatch, m1: [3584 x 28], m2: [784 x 128] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
super().init()
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
model.train()
for data, target in trainloader:
optimizer.zero_grad()
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
loss.backward()
#4. weight optimization
optimizer.step()
train_loss.append(loss.item())
# evaluation part
model.eval()
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
valid_loss.append(loss.item())
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
**kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
else:
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
/pytorch/aten/src/TH/generic/THTensorMath.cpp:940
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
optimizer.zero_grad()
...
The same is needed to be done in the validation loop.

Neural Network regression in tensorflow: error in code

I don't understand why my code wouldn't run. I started with the TensorFlow tutorial to classify the images in the mnist data set using a single layer feedforward neural net. Then modified the code to create a multilayer perceptron that maps out 37 inputs to 1 output. The input and output training data are being loaded from Matlab data file (.mat)
Here is my code..
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from scipy.io import loadmat
%matplotlib inline
import tensorflow as tf
from tensorflow.contrib import learn
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
filterwarnings('ignore')
sns.set_style('white')
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons
X = np.array(loadmat("Data/DataIn.mat")['TrainingDataIn'])
Y = np.array(loadmat("Data/DataOut.mat")['TrainingDataOut'])
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 10
display_step = 1
dropout_rate = 0.9
# Network Parameters
n_hidden_1 = 19 # 1st layer number of features
n_hidden_2 = 26 # 2nd layer number of features
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
X = tf.placeholder("float32", [None, 37])
Y = tf.placeholder("float32", [None])
def multilayer_perceptron(X, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(X, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes], 0, 0.1))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
}
# Construct model
pred = multilayer_perceptron(X, weights, biases)
tf.shape(pred)
tf.shape(Y)
print("Prediction matrix:", pred)
print("Output matrix:", Y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
print(total_batch)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
"{:.9f}".format(avg_cost))
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
when I run the code I get error messages:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-4-6b8af9192775> in <module>()
93 # Run optimization op (backprop) and cost op (to get loss value)
94 _, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_x,
---> 95 Y: batch_y})
96 # Compute average loss
97 avg_cost += c / total_batch
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\session.py in run(self, fetches, feed_dict, options, run_metadata)
787 try:
788 result = self._run(None, fetches, feed_dict, options_ptr,
--> 789 run_metadata_ptr)
790 if run_metadata:
791 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\session.py in _run(self, handle, fetches, feed_dict, options, run_metadata)
973 'Cannot feed value of shape %r for Tensor %r, '
974 'which has shape %r'
--> 975 % (np_val.shape, subfeed_t.name, str(subfeed_t.get_shape())))
976 if not self.graph.is_feedable(subfeed_t):
977 raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Cannot feed value of shape (10, 1) for Tensor 'Placeholder_7:0', which has shape '(?,)'
I've encountered this problem before. The difference is that a Tensor of shape (10, 1) looks like [[1], [2], [3]], while a Tensor of shape (10,) looks like [1, 2, 3].
You should be able to fix it by changing the line
Y = tf.placeholder("float32", [None])
to:
Y = tf.placeholder("float32", [None, 1])

Keras error when finetuning InceptionV3

I am trying to follow the "Fine-tune InceptionV3 on a new set of classes" sample code to freeze the first 172 layers and re-train the last layers on cats/dogs dataset. I keep getting an error which I have noted at the bottom. Please help. I am using Ubuntu 16.04, keras 1.2.1, theano 0.9.0beta1.dev, numpy 1.12.0 and python 3.5.
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
data_root_dir = "/home/ubuntu/ML/data/dogscats/"
train_dir = os.path.join(data_root_dir,"sample", "train")
valid_dir = os.path.join(data_root_dir, "valid")
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=True)
# add a global spatial average pooling layer
x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
from sklearn.preprocessing import OneHotEncoder
def get_data(path, target_size=(299,299)):
batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
return np.concatenate([batches.next() for i in range(batches.nb_sample)])
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=2, class_mode='categorical',
target_size=(299,299)):
return gen.flow_from_directory(dirname, target_size=target_size,
class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())
# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches(valid_dir, shuffle=False, batch_size=10)
train_batches = get_batches(train_dir, shuffle=False, batch_size=10)
val_classes = val_batches.classes
trn_classes = train_batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)
model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
validation_data=val_batches, nb_val_samples=val_batches.n)
The exception is: padding must be zero for average_exc_pad
Here is the full stack-trace:
ValueError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
ValueError: padding must be zero for average_exc_pad
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-4-369d7760ec6e> in <module>()
34
35 model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
---> 36 validation_data=val_batches, nb_val_samples=val_batches.n)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
1551 outs = self.train_on_batch(x, y,
1552 sample_weight=sample_weight,
-> 1553 class_weight=class_weight)
1554
1555 if not isinstance(outs, list):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/training.py in train_on_batch(self, x, y, sample_weight, class_weight)
1314 ins = x + y + sample_weights
1315 self._make_train_function()
-> 1316 outputs = self.train_function(ins)
1317 if len(outputs) == 1:
1318 return outputs[0]
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/backend/theano_backend.py in __call__(self, inputs)
957 def __call__(self, inputs):
958 assert isinstance(inputs, (list, tuple))
--> 959 return self.function(*inputs)
960
961
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
896 node=self.fn.nodes[self.fn.position_of_error],
897 thunk=thunk,
--> 898 storage_map=getattr(self.fn, 'storage_map', None))
899 else:
900 # old-style linkers raise their own exceptions
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/gof/link.py in raise_with_op(node, thunk, exc_info, storage_map)
323 # extra long error message in that case.
324 pass
--> 325 reraise(exc_type, exc_value, exc_trace)
326
327
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/six.py in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
687
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/function_module.py in __call__(self, *args, **kwargs)
882 try:
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
886 except Exception:
ValueError: padding must be zero for average_exc_pad
Apply node that caused the error: AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}(Join.0, IncSubtensor{InplaceInc;::, ::, :int64:, :int64:}.0, TensorConstant{(2,) of 3}, TensorConstant{(2,) of 1}, TensorConstant{(2,) of 1})
Toposort index: 5270
Inputs types: [TensorType(float32, 4D), TensorType(float32, 4D), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(10, 2048, 8, 8), (10, 2048, 8, 8), (2,), (2,), (2,)]
Inputs strides: [(524288, 256, 32, 4), (524288, 256, 32, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', array([3, 3]), array([1, 1]), array([1, 1])]
Outputs clients: [[Elemwise{add,no_inplace}(CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}.0)]]
Fine-tuning in that situation possibly means using the convolutional layers as pre-trained feature extractors. So you don't really want the top layers (densely connected layers) of the Inception network.
Changing
base_model = InceptionV3(weights='imagenet', include_top=True)
to
base_model = InceptionV3(weights='imagenet', include_top=False)
should work.
Also, if you have 200 classes you should change
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
to
predictions = Dense(200, activation='softmax')(x)
So your last layer will have the desired 200 elements.

Resources