Recently I just started learning to implement neural network using Keras, and I tried to implement Le-Net5 NN for MNIST problem based on the structure listed as below.
Code:
# Load the data
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Parameter set up
input_shape = (32,32,1)
batch_size = 128
# Format the image info
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train = np.pad(x_train, ((0,0),(2,2),(2,2),(0,0)), 'constant')
x_test = np.pad(x_test, ((0,0),(2,2),(2,2),(0,0)), 'constant')
# Encode label
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Construct the model
model = keras.Sequential()
conv_stride = 1
pooling_stride = 2
model.add(layers.Conv2D(filters=6, kernel_size=[5,5], input_shape=input_shape, padding="valid", strides=[conv_stride,conv_stride], activation='tanh'))
model.add(layers.AveragePooling2D(pool_size=[2,2], padding="valid", strides=pooling_stride))
model.add(layers.Conv2D(filters=16, kernel_size=[5,5], padding="valid", strides=[conv_stride,conv_stride],activation='tanh'))
model.add(layers.AveragePooling2D(pool_size=[2,2], padding="valid", strides=pooling_stride))
model.add(layers.Conv2D(filters=120, kernel_size=[5,5], padding="valid", strides=[conv_stride,conv_stride], activation='tanh'))
model.add(layers.Flatten())
model.add(layers.Dense(84, activation='tanh'))
model.add(layers.Dense(10, activation=tf.nn.softmax))
print(model.summary())
model.compile(optimizer="sgd", loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=batch_size, verbose=1, epochs=12)
score = model.evaluate(x_test, y_test, verbose=1)
print("Test Accuracy: ", score[1])
However, there's an error when I run the program, the error message is:
Matrix size-incompatible: In[0]: [128,1536], In[1]: [1176,200]
[[{{node dense/MatMul}} = MatMul[T=DT_FLOAT, _class=["loc:#training/Adam/gradients/dense/MatMul_grad/MatMul"], transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/device:CPU:0"](flatten/Reshape, dense/MatMul/ReadVariableOp)]]
I have checked the structure using model.summary() and it seems like the neural network's structure is correct. Can anybody tell me which part leads to the error?
Related
I am learning about neural networks with Kaggle tutorials. I have made a neural net to predict concrete strength and I want to display the MSE (for starters) metric after fitting the model. I have failed both with print(metrics) and plotting the metrics (displays an empty graph).
df = concrete.copy()
df_train = df.sample(frac=0.7, random_state=0)
df_valid = df.drop(df_train.index)
X_train = df_train.drop('CompressiveStrength', axis=1)
X_valid = df_valid.drop('CompressiveStrength', axis=1)
y_train = df_train['CompressiveStrength']
y_valid = df_valid['CompressiveStrength']
model = keras.Sequential([
layers.BatchNormalization(),
layers.Dense(512, activation='relu', input_shape=input_shape),
layers.BatchNormalization(),
layers.Dense(512, activation='relu'),
layers.Dropout(rate=0.3), # apply 30% dropout to the next layer
layers.Dense(512, activation='relu'),
layers.BatchNormalization(),
layers.Dense(512, activation='relu'),
layers.BatchNormalization(),
layers.Dense(1),
])
model.compile(
optimizer='sgd', # SGD is more sensitive to differences of scale
loss='mse',
metrics=[tf.keras.metrics.MeanSquaredError()]
)
history = model.fit(
X_train, y_train,
validation_data=(X_valid, y_valid),
batch_size=64,
epochs=100,
verbose=0,
callbacks=[early_stopping],
)
print(history)
pyplot.plot(history.history['mean_squared_error'])
I was trying to implement shap DeepExplainer (DeepSHAP) module with the help of MNIST Dataset. But i am getting the following error:
ValueError: Dimension 1 in both shapes must be equal, but are 10 and 1. Shapes are [?,10] and [?,1]. for gradients_7/dense_2_1/Softmax_grad/gradients/gradients_7/dense_2_1/Softmax_grad/truediv_grad/Select_1 (op: 'Select') with input shapes: [?,1], [?,10], [?,10].
The error is in the line:
shap_values = e.shap_values(x_test[1:5])
Full code:
from __future__ import print_function
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
activation='relu',
input_shape=input_shape))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test))
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
import shap
import numpy as np
# select a set of background examples to take an expectation over
background = x_train[np.random.choice(x_train.shape[0], 100, replace=False)]
# explain predictions of the model on three images
e = shap.DeepExplainer(model, background)
shap_values = e.shap_values(x_test[1:5])
shap.image_plot(shap_values, -x_test[1:5])
Which Keras version you are using? You can print from your code keras.__version__
When I imported keras under Tensorflow, your code worked as expected. One of the major change is using adam optimizer which works better in your case as it is able to improve accuracy within few epochs. I tried Adadelta but it was slowly optimizing. check the performance yourself.
Check the following code for small modifications (not much). All i did was to import modules under Tensorflow as keas 2.3.1 was throwing some other error.
from __future__ import print_function
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import backend as K
batch_size = 256
num_classes = 10
epochs = 12
# input image dimensions
img_rows, img_cols = 28, 28
# the data, split between train and test sets
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),activation='relu', input_shape=input_shape,))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=tf.keras.losses.categorical_crossentropy,
optimizer=tf.keras.optimizers.Adam(0.001),#tf.keras.optimizers.Adadelta(),
metrics=['accuracy'])
model.fit(x_train, y_train,
steps_per_epoch = x_train.shape[0]//batch_size,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_test, y_test),validation_steps=x_test.shape[0]//batch_size)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])
Iam using 1dCNN for time series data but following error occur in model.fit line.
The error is as follows :
Error when checking input: expected conv1d_11_input to have shape (6700, 1)
but got array with shape (1, 1).
any one plz help
code portion is as below
dataframe = pd.read_excel("file path", header=None,delim_whitespace=True)
dataset = dataframe.values
X=dataframe.values[:,0]
Y=dataframe.values[:,2]
X = np.expand_dims(X, axis=1)
Y = np.expand_dims(Y, axis=1)
(X_train, X_test, Y_train, Y_test) = train_test_split(X, Y, test_size=0.33, random_state=seed)
X_train = np.reshape(X_train, (-1, X_train.shape[1],1))
Y_train = np.reshape(Y_train, (Y_train.shape[0], 1, Y_train.shape[1]))
X_test = np.reshape(X_test, (X_test.shape[0], 1, X_test.shape[1]))
print(X_train.shape)
print(Y_train.shape)
n_timesteps, n_features, n_outputs = X_train.shape[0], X_train.shape[1], Y_train.shape[1]
verbose, epochs, batch_size = 0, 100, 32
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,1)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()
model.fit(X_train, Y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)'
Use:
n_timesteps, n_outputs = X_train.shape[1], Y_train.shape[1]
I'm training a functional keras model on mnist dataset. There's a layer that requires 2 inputs - the traditional input tensor and a current batch of on-hot encoded labels. I think I've set up my model to accept 2 inputs, but I get:
ValueError: Error when checking model : the list of Numpy arrays that you are passing to your model is not the size the model expected. Expected to see 2 array(s), but instead got the following list of 1 arrays. [array([[[[0.], ...
Most answers suggested converting inputs to numpy arrays, but mnist images and labels are numpy arrays by default.
batch_size = 128
num_classes = 10
epochs = 1
# Mnist part
img_rows, img_cols = 28, 28
(x_train, y_train), (x_test, y_test) = mnist.load_data()
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# Model part
images = Input(shape=input_shape, name='images_input')
labels = Input(shape=(num_classes,), name='labels_input')
x = Conv2D(32, kernel_size=(3, 3), activation='relu')(images)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Dropout(0.25)(x)
x = Flatten()(x)
x = Dense(128, activation='relu', name='features')(x)
x = Dropout(0.5)(x)
x = SomeLayerWith2Inputs()([x, labels])
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=[images, labels], outputs=output)
model.compile(
loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy']
)
# x_train = <type 'numpy.ndarray'> (60000, 28, 28, 1)
# y_train = <type 'numpy.ndarray'> (60000, 10)
# x_test = <type 'numpy.ndarray'> (10000, 28, 28, 1)
# y_test = <type 'numpy.ndarray'> (10000, 10)
model.fit(
[x_train, y_train],
y_train,
batch_size=batch_size,
callbacks=[tensorboard],
epochs=epochs,
verbose=1,
validation_data=([x_test, y_test], y_test)
)
I've also tried to do model.fit(x={'images_input': x_train, 'labels_input': y_train}, y=y_train), but that also didn't work.
I'm using Keras v2.2.4
Found the problem. There was also a tensorboard callback (not shown in original question):
tensorboard = TensorBoard(
batch_size=128,
embeddings_freq=1,
embeddings_layer_names=['features'],
embeddings_metadata='metadata.tsv',
embeddings_data=x_test
)
The embeddings_data argument should be [x_test, y_test].
For example, just want the bias. A scalar. No kernel.
Using model.add_weight on tf.Variable and K.variable fails.
This should be simple. Can not find it in the docs.
UPDATE:
This seems to be the best way I've come across so far:
class BiasLayer(keras.layers.Layer):
def __init__(self, output_dim=1, **kwargs):
self.output_dim = output_dim
super().__init__(**kwargs)
def build(self, input_shape):
self.V = self.add_weight(shape=(1,), initializer=keras.initializers.Constant(value=0), dtype=tf.float32, trainable=True)
super().build(input_shape)
def call(self, x):
return x * 0 + self.V # this is stupid, is there a better way
# return self.V # this does not work, results in no trainable variables
def compute_output_shape(self, input_shape):
return (input_shape[0], self.output_dim)
You can set the kernel_size=0. I write a sample to demonstrate this.
With normal kernel
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
mnist = tf.keras.datasets.mnist
(x_train, y_train),(x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
kernel_size=(5, 5)
# kernel_size = 0
model = tf.keras.Sequential()
model.add(layers.Conv2D(64, kernel_size, strides=(1, 1), padding='same',
input_shape=(28, 28, 1)))
model.add(layers.LeakyReLU())
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Conv2D(32, kernel_size, strides=(1, 1), padding='same'))
model.add(layers.LeakyReLU())
model.add(layers.MaxPooling2D(pool_size=(2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
# model.fit(x_train, y_train,
# batch_size=32, nb_epoch=1, verbose=1)
# model.evaluate(x_test, y_test)
model.summary()
The summary with kernel
Without kernel
change the kernel_size=5 to kernel_size=0
The summary without kernel