I am doing a basic tensorflow driven CNN. There is some kind of dimension which I am unable to locate. thanks in advance
I am working with jupyter in my system. I run on miniconda environment.
pred = conv_net(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)**
Here you check whether the index of the maximum value of the predicted image is equal to the actual labelled image. and
both will be a column vector.
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
calculate accuracy across all the given images and average them out.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
the refernce link from where i am learning
The error is
InvalidArgumentError Traceback (most recent call last)
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ in _create_c_op(graph, node_def, inputs, control_inputs)
1627 try:
-> 1628 c_op = c_api.TF_FinishOperation(op_desc)
1629 except errors.InvalidArgumentError as e:
InvalidArgumentError: Dimensions must be equal, but are 11 and 10 for 'Add_1' (op: 'Add') with input shapes: [?,11], [10].
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-9-b95a9f9c020a> in <module>()
----> 1 pred = conv_net(x, weights, biases)
3 cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
5 optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
<ipython-input-8-a23543908ef7> in conv_net(x, weights, biases)
24 # Output, class prediction
25 # finally we multiply the fully connected layer with the weights and add a bias term.
---> 26 out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
27 return out
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\ops\ in add(x, y, name)
308 if _ctx is None or not _ctx._eager_context.is_eager:
309 _, _, _op = _op_def_lib._apply_op_helper(
--> 310 "Add", x=x, y=y, name=name)
311 _result = _op.outputs[:]
312 _inputs_flat = _op.inputs
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ in _apply_op_helper(self, op_type_name, name, **keywords)
785 op = g.create_op(op_type_name, inputs, output_types, name=scope,
786 input_types=input_types, attrs=attr_protos,
--> 787 op_def=op_def)
788 return output_structure, op_def.is_stateful, op
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\util\ in new_func(*args, **kwargs)
486 'in a future version' if date is None else ('after %s' % date),
487 instructions)
--> 488 return func(*args, **kwargs)
489 return tf_decorator.make_decorator(func, new_func, 'deprecated',
490 _add_deprecated_arg_notice_to_docstring(
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ in create_op(***failed resolving arguments***)
3272 input_types=input_types,
3273 original_op=self._default_original_op,
-> 3274 op_def=op_def)
3275 self._create_op_helper(ret, compute_device=compute_device)
3276 return ret
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ in __init__(self, node_def, g, inputs, output_types, control_inputs, input_types, original_op, op_def)
1790 op_def, inputs, node_def.attr)
1791 self._c_op = _create_c_op(self._graph, node_def, grouped_inputs,
-> 1792 control_input_ops)
1794 # Initialize self._outputs.
~\Miniconda3\envs\idp\lib\site-packages\tensorflow\python\framework\ in _create_c_op(graph, node_def, inputs, control_inputs)
1629 except errors.InvalidArgumentError as e:
1630 # Convert to ValueError for backwards compatibility.
-> 1631 raise ValueError(str(e))
1633 return c_op
ValueError: Dimensions must be equal, but are 11 and 10 for 'Add_1' (op: 'Add') with input shapes: [?,11], [10].
the following is the full code:
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
import os
df = pd.read_excel(r"C:/Users/ggmah/Desktop/HMM Data updated.xlsx")
dff = OneHotEncoder(df)
dfg = pd.get_dummies(df)
o =list(df.columns.values)
label_dict = dict()
for i,value in enumerate(o):
label_dict[i] = value
training_iters = 220
learning_rate = 0.002
batch_size = 16
n_input = 59
n_classes = 11
x = tf.placeholder("float", [None, 60,11,1])
y = tf.placeholder("float", [None, n_classes])
def conv2d(x, W, b, strides=1):
x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME')
x = tf.nn.bias_add(x, b)
return tf.nn.relu(x)
def maxpool2d(x, k=2):
return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1],padding='SAME')
weights = {
'wc1': tf.get_variable('W0', shape=(3,3,1,32), initializer=tf.contrib.layers.xavier_initializer()),
'wc2': tf.get_variable('W1', shape=(3,3,32,64), initializer=tf.contrib.layers.xavier_initializer()),
'wc3': tf.get_variable('W2', shape=(3,3,64,128), initializer=tf.contrib.layers.xavier_initializer()),
'wd1': tf.get_variable('W3', shape=(4*4*128,128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('W6', shape=(128,n_classes), initializer=tf.contrib.layers.xavier_initializer()),
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(10), initializer=tf.contrib.layers.xavier_initializer()),
X = df[['Att1','Att2','Att3','Att4','Att5','Att6','Att7','Att8','Att9','Att10']]
Y = df[['Att11']]
train_X, test_X,train_y,test_y = train_test_split(X,Y,train_size=0.88,random_state=5)
def conv_net(x, weights, biases):
#here we call the conv2d function we had defined above and pass the input image x, weights wc1 and bias bc1.
conv1 = conv2d(x, weights['wc1'], biases['bc1'])
#Max Pooling (down-sampling), this chooses the max value from a 2*2 matrix window and outputs a 14*14 matrix.
conv1 = maxpool2d(conv1, k=2)
#Convolution Layer
conv2 = conv2d(conv1, weights['wc2'], biases['bc2'])
conv2 = maxpool2d(conv2, k=2)
conv3 = conv2d(conv2, weights['wc3'], biases['bc3'])
conv3 = maxpool2d(conv3, k=2)
fc1 = tf.reshape(conv3, [-1, weights['wd1'].get_shape().as_list()[0]])
fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1'])
fc1 = tf.nn.relu(fc1)
#Output, class prediction
# finally we multiply the fully connected layer with the weights and add a bias term.
out = tf.add(tf.matmul(fc1, weights['out']), biases['out'])
return out
pred = conv_net(x, weights, biases)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
#Here you check whether the index of the maximum value of the predicted image is equal to the actual labelled image. and
#both will be a column vector.
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
#calculate accuracy across all the given images and average them out.
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

It's simply a mistake of the dimension. The bias shape of the last out layer should be [11], because you have 11 classes as output which connect to 11 softmax:
biases = {
'bc1': tf.get_variable('B0', shape=(32), initializer=tf.contrib.layers.xavier_initializer()),
'bc2': tf.get_variable('B1', shape=(64), initializer=tf.contrib.layers.xavier_initializer()),
'bc3': tf.get_variable('B2', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'bd1': tf.get_variable('B3', shape=(128), initializer=tf.contrib.layers.xavier_initializer()),
'out': tf.get_variable('B4', shape=(11), initializer=tf.contrib.layers.xavier_initializer()),


assertion failed: [Condition x == y did not hold element-wise:]

I have built a BiLSTM model with an attention layer for sentence classification task but I am getting an error that my assertion has failed due to mismatch in number of parameters. The attention layer code is here and the error is below the code.
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
def call(self, x):
e = K.tanh(,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
When i am training the model with attention layer included, it is giving an error that assertion failed.
Epoch 1/10
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history =, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history =, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/ in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
My model is
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
Shape of Y_train is
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
(43532, 1)
your target is in 2D so you need to set return_sequences=False in the last attention layer in order to return output in 2D format
Add flatten layer before Dropout and then execute.

The size of tensor a (10) must match the size of tensor b (9) at non-singleton

i am using this code earlier in Jupiter notebook it was not showing error but accuracy was veryless then i have tried the same code in google colab there it is showing error, please suggest some way to increase accuracy . i am trying to perform multilevel CNN for detecting leaf with downsampling of image
import torch
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
self.conv1 = nn.Conv2d(1,32,2)
self.conv2 = nn.Conv2d(32,64,2)
self.conv2_bn = nn.BatchNorm2d(64)
x= torch.randn(256,256).view(-1,1,256,256)
self._to_linear = None
self.fc1= nn.Linear(self._to_linear, 512)
self.fc2 = nn.Linear(512,6)
def convs(self,x):
y=torch.nn.functional.interpolate(x, size=([128,128]), scale_factor=None, mode='nearest', align_corners=None)
z=torch.nn.functional.interpolate(x, size=([64,64]), scale_factor=None, mode='nearest', align_corners=None)
w=torch.nn.functional.interpolate(x, size=([32,32]), scale_factor=None, mode='nearest', align_corners=None)
# print(x[0].shape)
x= F.relu(self.conv1(x))
m = nn.ConstantPad2d(1,0)
x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), 2))
# print(x[0].shape)
y= F.relu(self.conv1(y))
m = nn.ConstantPad2d(1,0)
y = F.relu(self.conv2_bn(self.conv2(y)), 2)
# print(y[0].shape),y),1)
# print(CAT_1[0].shape)
z= F.relu(self.conv1(z))
m = nn.ConstantPad2d(1,0)
z= F.relu(self.conv2_bn(self.conv2(z)))
# print(z[0].shape),z),1)
# print(CAT_2[0].shape)
w= F.relu(self.conv1(w))
m = nn.ConstantPad2d(1,0)
w = F.relu((self.conv2_bn(self.conv2(w))))
# print(w[0].shape),w),1)
# print("i lov pp")
# print(x[0].shape)
x=torch.nn.functional.avg_pool2d(x, (2,2))
# print("i lov pp")
# print(x[0].shape)
if self._to_linear is None:
self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
return x
def forward(self, x):
# print("i lov pp")
x=x.view(-1, self._to_linear)
x= F.relu(self.fc1(x))
x= self.fc2(x)
return F.softmax(x, dim=1)
# print(x[0].shape)
import torch.optim as optim
optimizer = optim.Adam(net.parameters(), lr=0.001)
loss_function = nn.MSELoss()
X = torch.Tensor([i[0] for i in training_data]).view(-1,256,256)
y = torch.Tensor([i[1] for i in training_data])
VAL_PCT = 0.1
val_size=int (len(X)*VAL_PCT)
train_X= X[:-val_size]
train_y= y[:-val_size]
test_y = y[-val_size:]
for epoch in range(EPOCHS):
for i in (range(0, len(train_X), BATCH_SIZE)):
#print(i, i+BATCH_SIZE)
batch_X = train_X[i:i+BATCH_SIZE].view(-1,1,256,256)
# print(batch_X.shape)
batch_y = train_y[i:i+BATCH_SIZE]
outputs = net(batch_X)
#print (outputs.shape)
loss = loss_function(outputs, batch_y)
#print(f"Epoch: {epoch}. Loss: {loss}")
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ UserWarning: Using a target size (torch.Size([10, 256, 256, 3])) that is different to the input size (torch.Size([10, 6])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
return F.mse_loss(input, target, reduction=self.reduction)
RuntimeError Traceback (most recent call last)
<ipython-input-38-a154b102127f> in <module>()
15 outputs = net(batch_X)
16 #print (outputs.shape)
---> 17 loss = loss_function(outputs, batch_y)
18 loss.backward()
19 optimizer.step()
3 frames
/usr/local/lib/python3.6/dist-packages/torch/ in broadcast_tensors(*tensors)
60 if any(type(t) is not Tensor for t in tensors) and has_torch_function(tensors):
61 return handle_torch_function(broadcast_tensors, tensors, *tensors)
---> 62 return _VF.broadcast_tensors(tensors)
RuntimeError: The size of tensor a (10) must match the size of tensor b (9) at non-singleton dimension 3

size mismatch, m1: [3584 x 28], m2: [784 x 128] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from import DataLoader
from import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
for data, target in trainloader:
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
#4. weight optimization
# evaluation part
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/ in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
/usr/local/lib/python3.6/dist-packages/torch/nn/ in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
The same is needed to be done in the validation loop.

Neural Network regression in tensorflow: error in code

I don't understand why my code wouldn't run. I started with the TensorFlow tutorial to classify the images in the mnist data set using a single layer feedforward neural net. Then modified the code to create a multilayer perceptron that maps out 37 inputs to 1 output. The input and output training data are being loaded from Matlab data file (.mat)
Here is my code..
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from import loadmat
%matplotlib inline
import tensorflow as tf
from tensorflow.contrib import learn
import sklearn
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from warnings import filterwarnings
from sklearn import datasets
from sklearn.preprocessing import scale
from sklearn.cross_validation import train_test_split
from sklearn.datasets import make_moons
X = np.array(loadmat("Data/DataIn.mat")['TrainingDataIn'])
Y = np.array(loadmat("Data/DataOut.mat")['TrainingDataOut'])
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.5)
total_len = X_train.shape[0]
# Parameters
learning_rate = 0.001
training_epochs = 500
batch_size = 10
display_step = 1
dropout_rate = 0.9
# Network Parameters
n_hidden_1 = 19 # 1st layer number of features
n_hidden_2 = 26 # 2nd layer number of features
n_input = X_train.shape[1]
n_classes = 1
# tf Graph input
X = tf.placeholder("float32", [None, 37])
Y = tf.placeholder("float32", [None])
def multilayer_perceptron(X, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(X, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1], 0, 0.1)),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes], 0, 0.1))
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1], 0, 0.1)),
'b2': tf.Variable(tf.random_normal([n_hidden_2], 0, 0.1)),
'out': tf.Variable(tf.random_normal([n_classes], 0, 0.1))
# Construct model
pred = multilayer_perceptron(X, weights, biases)
print("Prediction matrix:", pred)
print("Output matrix:", Y)
# Define loss and optimizer
cost = tf.reduce_mean(tf.square(pred-Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
# Launch the graph
with tf.Session() as sess:
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(total_len/batch_size)
# Loop over all batches
for i in range(total_batch-1):
batch_x = X_train[i*batch_size:(i+1)*batch_size]
batch_y = Y_train[i*batch_size:(i+1)*batch_size]
# Run optimization op (backprop) and cost op (to get loss value)
_, c, p =[optimizer, cost, pred], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# sample prediction
label_value = batch_y
estimate = p
err = label_value-estimate
print ("num batch:", total_batch)
# Display logs per epoch step
if epoch % display_step == 0:
print ("Epoch:", '%04d' % (epoch+1), "cost=", \
print ("[*]----------------------------")
for i in xrange(5):
print ("label value:", label_value[i], \
"estimated value:", estimate[i])
print ("[*]============================")
print ("Optimization Finished!")
# Test model
correct_prediction = tf.equal(tf.argmax(pred), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print ("Accuracy:", accuracy.eval({X: X_test, Y: Y_test}))
when I run the code I get error messages:
ValueError Traceback (most recent call last)
<ipython-input-4-6b8af9192775> in <module>()
93 # Run optimization op (backprop) and cost op (to get loss value)
94 _, c, p =[optimizer, cost, pred], feed_dict={X: batch_x,
---> 95 Y: batch_y})
96 # Compute average loss
97 avg_cost += c / total_batch
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\ in run(self, fetches, feed_dict, options, run_metadata)
787 try:
788 result = self._run(None, fetches, feed_dict, options_ptr,
--> 789 run_metadata_ptr)
790 if run_metadata:
791 proto_data = tf_session.TF_GetBuffer(run_metadata_ptr)
~\AppData\Local\Continuum\Anaconda3\envs\ann\lib\site-packages\tensorflow\python\client\ in _run(self, handle, fetches, feed_dict, options, run_metadata)
973 'Cannot feed value of shape %r for Tensor %r, '
974 'which has shape %r'
--> 975 % (np_val.shape,, str(subfeed_t.get_shape())))
976 if not self.graph.is_feedable(subfeed_t):
977 raise ValueError('Tensor %s may not be fed.' % subfeed_t)
ValueError: Cannot feed value of shape (10, 1) for Tensor 'Placeholder_7:0', which has shape '(?,)'
I've encountered this problem before. The difference is that a Tensor of shape (10, 1) looks like [[1], [2], [3]], while a Tensor of shape (10,) looks like [1, 2, 3].
You should be able to fix it by changing the line
Y = tf.placeholder("float32", [None])
Y = tf.placeholder("float32", [None, 1])

Keras error when finetuning InceptionV3

I am trying to follow the "Fine-tune InceptionV3 on a new set of classes" sample code to freeze the first 172 layers and re-train the last layers on cats/dogs dataset. I keep getting an error which I have noted at the bottom. Please help. I am using Ubuntu 16.04, keras 1.2.1, theano, numpy 1.12.0 and python 3.5.
from PIL import Image
import os
import matplotlib.pyplot as plt
import numpy as np
data_root_dir = "/home/ubuntu/ML/data/dogscats/"
train_dir = os.path.join(data_root_dir,"sample", "train")
valid_dir = os.path.join(data_root_dir, "valid")
from keras.applications.inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=True)
# add a global spatial average pooling layer
x = base_model.output
#x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
# this is the model we will train
model = Model(input=base_model.input, output=predictions)
for layer in model.layers[:172]:
layer.trainable = False
for layer in model.layers[172:]:
layer.trainable = True
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')
from sklearn.preprocessing import OneHotEncoder
def get_data(path, target_size=(299,299)):
batches = get_batches(path, shuffle=False, batch_size=1, class_mode=None, target_size=target_size)
return np.concatenate([ for i in range(batches.nb_sample)])
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=2, class_mode='categorical',
return gen.flow_from_directory(dirname, target_size=target_size,
class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
def onehot(x): return np.array(OneHotEncoder().fit_transform(x.reshape(-1,1)).todense())
# Use batch size of 1 since we're just doing preprocessing on the CPU
val_batches = get_batches(valid_dir, shuffle=False, batch_size=10)
train_batches = get_batches(train_dir, shuffle=False, batch_size=10)
val_classes = val_batches.classes
trn_classes = train_batches.classes
val_labels = onehot(val_classes)
trn_labels = onehot(trn_classes)
model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
validation_data=val_batches, nb_val_samples=val_batches.n)
The exception is: padding must be zero for average_exc_pad
Here is the full stack-trace:
ValueError Traceback (most recent call last)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/ in __call__(self, *args, **kwargs)
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
ValueError: padding must be zero for average_exc_pad
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
<ipython-input-4-369d7760ec6e> in <module>()
35 model.fit_generator(train_batches, samples_per_epoch=train_batches.n, nb_epoch=10,
---> 36 validation_data=val_batches, nb_val_samples=val_batches.n)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/ in fit_generator(self, generator, samples_per_epoch, nb_epoch, verbose, callbacks, validation_data, nb_val_samples, class_weight, max_q_size, nb_worker, pickle_safe, initial_epoch)
1551 outs = self.train_on_batch(x, y,
1552 sample_weight=sample_weight,
-> 1553 class_weight=class_weight)
1555 if not isinstance(outs, list):
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/ in train_on_batch(self, x, y, sample_weight, class_weight)
1314 ins = x + y + sample_weights
1315 self._make_train_function()
-> 1316 outputs = self.train_function(ins)
1317 if len(outputs) == 1:
1318 return outputs[0]
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/backend/ in __call__(self, inputs)
957 def __call__(self, inputs):
958 assert isinstance(inputs, (list, tuple))
--> 959 return self.function(*inputs)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/ in __call__(self, *args, **kwargs)
896 node=self.fn.nodes[self.fn.position_of_error],
897 thunk=thunk,
--> 898 storage_map=getattr(self.fn, 'storage_map', None))
899 else:
900 # old-style linkers raise their own exceptions
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/gof/ in raise_with_op(node, thunk, exc_info, storage_map)
323 # extra long error message in that case.
324 pass
--> 325 reraise(exc_type, exc_value, exc_trace)
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/ in reraise(tp, value, tb)
683 value = tp()
684 if value.__traceback__ is not tb:
--> 685 raise value.with_traceback(tb)
686 raise value
/home/ubuntu/anaconda3/envs/tensorflow/lib/python3.5/site-packages/theano/compile/ in __call__(self, *args, **kwargs)
882 try:
883 outputs =\
--> 884 self.fn() if output_subset is None else\
885 self.fn(output_subset=output_subset)
886 except Exception:
ValueError: padding must be zero for average_exc_pad
Apply node that caused the error: AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}(Join.0, IncSubtensor{InplaceInc;::, ::, :int64:, :int64:}.0, TensorConstant{(2,) of 3}, TensorConstant{(2,) of 1}, TensorConstant{(2,) of 1})
Toposort index: 5270
Inputs types: [TensorType(float32, 4D), TensorType(float32, 4D), TensorType(int64, vector), TensorType(int64, vector), TensorType(int64, vector)]
Inputs shapes: [(10, 2048, 8, 8), (10, 2048, 8, 8), (2,), (2,), (2,)]
Inputs strides: [(524288, 256, 32, 4), (524288, 256, 32, 4), (8,), (8,), (8,)]
Inputs values: ['not shown', 'not shown', array([3, 3]), array([1, 1]), array([1, 1])]
Outputs clients: [[Elemwise{add,no_inplace}(CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, CorrMM_gradInputs{half, (1, 1), (1, 1)}.0, AveragePoolGrad{ignore_border=True, mode='average_exc_pad', ndim=2}.0)]]
Fine-tuning in that situation possibly means using the convolutional layers as pre-trained feature extractors. So you don't really want the top layers (densely connected layers) of the Inception network.
base_model = InceptionV3(weights='imagenet', include_top=True)
base_model = InceptionV3(weights='imagenet', include_top=False)
should work.
Also, if you have 200 classes you should change
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(2, activation='softmax')(x)
predictions = Dense(200, activation='softmax')(x)
So your last layer will have the desired 200 elements.
