Keras Activation with lambda issue when load_model - keras

I'm trying to perform softmax using the parameter 'axis', and the only way I found was by means of the function lambda. Here is my code, containing an Activation layer with lambda for the softmax:
from keras.models import Model
from keras.layers import Input,Dense,Reshape,Activation
from keras.layers.merge import Multiply,Concatenate
from keras.layers.core import Lambda
from keras.activations import softmax
from keras import backend as K
import numpy as np
N = 6
M = 6
T = 1000
H = 5
# Toy input creation
input = np.concatenate([np.random.normal(np.random.rand(1)[0],1.,(1,N,M)) for t in range(T)],axis=0)
input2 = np.random.rand(T,N,M)
input3 = np.random.rand(T,N,M)
input4 = np.random.rand(T,N,M)
a = np.mean(np.reshape(input,(T,N*M)),axis=1)
a = np.maximum(0.,np.minimum(a,0.9999))
a = np.floor(a*3).astype(int)
a = np.stack([a for i in range(M)],axis=1)
a = np.stack([a for i in range(N)],axis=2)
mix1 = np.concatenate((input2[:,:2,:],input3[:,2:4,:],input4[:,4:,:]),axis=1)
mix2 = np.concatenate((input3[:,:2,:],input4[:,2:4,:],input2[:,4:,:]),axis=1)
mix3 = np.concatenate((input4[:,:2,:],input2[:,2:4,:],input3[:,4:,:]),axis=1)
output = np.choose(a,[mix1,mix2,mix3])
images = np.stack((input2,input3,input4),axis=3)
# models definition
# one general model to be trained and
# one mask model to be used later for testing
input_layer = Input(shape=(N,M))
images_input = Input(shape=(N,M,3))
x = Reshape((N*M,))(input_layer)
x = Dense(H, kernel_initializer='uniform', activation='relu')(x)
x = Dense(N*N*3, kernel_initializer='uniform')(x)
x = Reshape((N,N,3))(x)
masks = Activation(activation=lambda y:softmax(y,axis=3))(x)
output_layer = Multiply()([masks,images_input])
output_layer = Lambda(lambda x:K.sum(x,axis=3))(output_layer)
model = Model(inputs=[input_layer,images_input],outputs=output_layer)
mask_model = Model(inputs=input_layer,outputs=masks)
# Compile model
model.compile(loss='mean_squared_error', optimizer='adam')
# Fit the model
history = model.fit([input,images], output, epochs=200, batch_size=50)
#save models
model.save('test.h5')
mask_model.save('mask_test.h5')
It works fine during training, but when I try to load the file, it fails:
from keras.models import load_model
mask_model = load_model('mask_test.h5')
I get the error:
Traceback (most recent call last):
File "/home/kresch/general2.py", line 3, in <module>
mask_model = load_model('mask_test.h5')
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/models.py", line 246, in load_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/models.py", line 314, in model_from_config
return layer_module.deserialize(config, custom_objects=custom_objects)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/layers/__init__.py", line 54, in deserialize
printable_module_name='layer')
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 140, in deserialize_keras_object
list(custom_objects.items())))
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/topology.py", line 2450, in from_config
process_layer(layer_data)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/topology.py", line 2419, in process_layer
custom_objects=custom_objects)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/layers/__init__.py", line 54, in deserialize
printable_module_name='layer')
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 142, in deserialize_keras_object
return cls.from_config(config['config'])
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/engine/topology.py", line 1242, in from_config
return cls(**config)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/layers/core.py", line 287, in __init__
self.activation = activations.get(activation)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/activations.py", line 81, in get
return deserialize(identifier)
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/activations.py", line 73, in deserialize
printable_module_name='activation function')
File "/opt/anaconda3/envs/tensorflow/lib/python3.5/site-packages/keras/utils/generic_utils.py", line 160, in deserialize_keras_object
':' + function_name)
ValueError: Unknown activation function:<lambda>
Process finished with exit code 1
The same happens for:
model = load_model('test.h5')
Am I using the lambda function wrong? Or (better) is there a way I can avoid using the lambda function?

try custom activation layer then load model.
load_model('test.h5',custom_objects=activation_layer)

Related

Keras deep clustering undefined errors in clustering custom layer

i was following this guide to implement a clustering into a deep model
https://ai-mrkogao.github.io/reinforcement%20learning/clusteringkeras/ but i got two errors :
The first one, on this line
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
it says
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 463, in __call__
self.build(unpack_singleton(input_shapes))
File "<stdin>", line 14, in build
TypeError: add_weight() got multiple values for argument 'name'
So i located
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
from the class ClusteringLayer and i removed name='clusters' as follow
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
But now it keeps giving me the following error
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 463, in __call__
self.build(unpack_singleton(input_shapes))
File "<stdin>", line 14, in build
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 282, in add_weight
constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 620, in variable
value, dtype=dtype, name=name, constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py", line 782, in variable
constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py", line 263, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py", line 460, in __init__
shape=shape)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py", line 582, in _init_from_args
if init_from_fn else [initial_value]) as name:
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 6513, in __enter__
return self._name_scope.__enter__()
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 4306, in name_scope
if not _VALID_SCOPE_NAME_REGEX.match(name):
TypeError: expected string or bytes-like object
Here is the complete and reproducible code
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
def autoencoder(dims, act='relu', init='glorot_uniform'):
n_stacks = len(dims) - 1
input_img = Input(shape=(dims[0],), name='input')
x = input_img
for i in range(n_stacks-1):
x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
x = encoded
for i in range(n_stacks-1, 0, -1):
x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
decoded = x
return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)
x_train.shape
x.shape
n_clusters = len(np.unique(y))
kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=4)
y_pred_kmeans = kmeans.fit_predict(x)
y_pred_kmeans[:10]
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',distribution='uniform')
pretrain_optimizer = SGD(lr=1, momentum=0.9)
autoencoder, encoder = autoencoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=250, epochs=2) #, callbacks=cb)
autoencoder.save_weights( 'ae_weights.h5')
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
encoder.output
clustering_layer
784 image input -> 10 classification
The result of
from tensorflow.python import keras
print(keras.__version__)
is
2.2.4-tf
You can even download the docker with keras installation at repbioinfo/autoencoderforclustering
I have solved the error, just correct these line
self.clusters = self.add_weight(shape = (self.n_clusters, input_dim),
initializer='glorot_uniform', name='clusters')
instead of
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')

TensorFlow AlphaDropout: rank undefined

I am trying to set-up a neural network using TensorFlow's tf.contrib.nn.alpha_dropout (as implemented in TensorFlow version 1.12.0). Please consider the following example:
import tensorflow as tf
from tensorflow.contrib.layers import fully_connected
from tensorflow.contrib.nn import alpha_dropout
import numpy as np
N_data = 100
x_in = tf.placeholder(tf.float32, shape=[None, N_data], name="x_in")
keep_prob = tf.placeholder(tf.float32)
fc = fully_connected(inputs=x_in, num_outputs=N_data)
drop = alpha_dropout(fc, keep_prob=keep_prob)
x_out = fully_connected(inputs=drop, num_outputs=N_data)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
fd = {
x_in: np.random.rand(2, N_data),
keep_prob: 0.5,
}
output = x_out.eval(feed_dict=fd)
When evaluating the output of the dropout layer, everything seems normal, but when the output from the dropout layer is linked to a second dense layer, I get the following error message:
Traceback (most recent call last):
File "/***/problem_alpha_dropout.py", line 14, in <module>
x_out = fully_connected(inputs=drop, num_outputs=N_data)
File "/***/anaconda3/envs/TensorFlow/lib/python3.6/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 182, in func_with_args
return func(*args, **current_args)
File "/***/anaconda3/envs/TensorFlow/lib/python3.6/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1854, in fully_connected
outputs = layer.apply(inputs)
File "/***/anaconda3/envs/TensorFlow/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 817, in apply
return self.__call__(inputs, *args, **kwargs)
File "/***/anaconda3/envs/TensorFlow/lib/python3.6/site-packages/tensorflow/python/layers/base.py", line 374, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "/***/anaconda3/envs/TensorFlow/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 730, in __call__
self._assert_input_compatibility(inputs)
File "/***/anaconda3/envs/TensorFlow/lib/python3.6/site-packages/tensorflow/python/keras/engine/base_layer.py", line 1465, in _assert_input_compatibility
self.name + ' is incompatible with the layer: '
ValueError: Input 0 of layer fully_connected_1 is incompatible with the layer: its rank is undefined, but the layer requires a defined rank.
This behaviour does not emerge when tf.contrib.nn.alpha_dropout is replaced by tf.nn.dropout (same usage).
Additional information:
TensorFlow version: 1.12.0 (GPU)
Python version: 3.6 (through Anaconda)
OS: Linux Mint
Just specify the shape of the keep_prob placeholder:
keep_prob = tf.placeholder(tf.float32, shape=())

Keras 2.2.4 ERROR:AttributeError: 'NoneType' object has no attribute 'inbound_nodes'

I'm building a new channel wise operation for my network.
A global average pooling result will multiply(element-wise) the first x(input) value.
But, when i run the train.py file, it will occur errors which i couldn't understand. pls HELP!!!
The error message:
Traceback (most recent call last):
File "E:/githubRemote/train.py", line 49, in <module>
model = init_model()
File "E:/githubRemote/train.py", line 37, in init_model
model = Model(inputs=im_n, outputs=resd)
File "C:\Users\Anaconda3\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 231, in _init_graph_network
self.inputs, self.outputs)
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 1366, in _map_graph_network
tensor_index=tensor_index)
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 1353, in build_map
node_index, tensor_index)
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 1353, in build_map
node_index, tensor_index)
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 1353, in build_map
node_index, tensor_index)
[Previous line repeated 3 more times]
File "C:\Users\Anaconda3\lib\site-packages\keras\engine\network.py", line 1325, in build_map
node = layer._inbound_nodes[node_index]
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
My error code is the Multiply layer operation.
When i comment the net = Multiply()([x, excitation])
It will work!
I think the keras model may consider that code line don't make a layer of Keras. So it's a NoneType -.-
My code:
def CAlayer(x, channel, reduction=16):
# tensorflow implement
# avg_pool = tflearn.global_avg_pool(inputx)
# conv_1 = slim.conv2d(avg_pool, channel // reduction, 1)
# conv_2 = slim.conv2d(conv_1, channel, 1, activation_fn=None)
# excitation = tf.nn.sigmoid(conv_2)
# keras implementation
avg_pool = GlobalAveragePooling2D()(x)
avg_pool = expand_dims(avg_pool, axis=1)
avg_pool = expand_dims(avg_pool, axis=1)
conv_1 = Conv2D(channel//reduction, 1, activation=None, padding='same')(avg_pool)
conv_1_ac = Activation('relu')(conv_1)
conv_2 = Conv2D(channel, 1, activation=None, padding='same')(conv_1_ac)
excitation = Activation('sigmoid')(conv_2)
--> net = Multiply()([excitation, x])
# print (net.shape)
return net
In your code where you have used :
avg_pool = expand_dims(avg_pool, axis=1)
this is causing the problem, as expand_dims is a function defined under keras.backend which
gives TensorFlow tensor as an output but all operations should be encapsulated in Keras layers.
You must use its equivalent Keras layer function.
A rule of thumb: All Keras layer functions start with a capital letter.

Exception in Keras when trying to use XCeption model as layer in Keras

I am getting exception in Keras when I am trying to use model as a layer. My code looks as follows:
from keras import layers
from keras import applications
from keras import Input
from keras.models import Model
xception_base = applications.Xception(weights=None,
include_top=False)
left_input = Input(shape=(250, 250, 3))
right_input = Input(shape=(250, 250, 3))
left_features = xception_base(left_input)
right_input = xception_base(right_input)
merged_features = layers.concatenate([left_features, right_input], axis=-1)
model = Model([left_input, right_input], merged_features)
Here is the exception I am getting. Not clear to me from the exception what is going wrong
Traceback (most recent call last):
File "/home/asattar/workspace/projects/keras-examples/chapter7/MergeTwoModels.py", line 18, in <module>
model = Model([left_input, right_input], merged_features)
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/engine/network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/Keras-2.2.4-py2.7.egg/keras/engine/network.py", line 224, in _init_graph_network
assert node_index == 0
AssertionError
Can anyone help me with what might be going wrong?
Also there is no error when I do this
model = Model(left_input, left_features)
Actually nvm. I realized that I messed up my variable name
right_input = xception_base(right_input)
making my graph circular.

Chainer CNN- TypeError: forward() missing 1 required positional argument: 'x'

I'm trying to run a classifier on Chainer, but failed due to the following error.
I have no idea about the error, because I confirmed that the iterator actually sent a batch to the trainer.
Is there a problem with the neural network model? Or, the way the data has been fed into the model is wrong?
Input.py
from chainer.datasets import split_dataset_random
from chainer.iterators import SerialIterator
from chainer.optimizers import Adam
from chainer.training import Trainer
from chainer.training.updaters import StandardUpdater
from chainer import functions as F, links as L
from chainer import Sequential
import numpy as np
batch_size = 3
X_train = np.ones((9957, 60, 80, 3), dtype=np.float32)
X_train, _ = split_dataset_random(X_train, 8000, seed=0)
train_iter = SerialIterator(X_train, batch_size)
model = Sequential(
L.Convolution2D(None, 64, 3, 2),
F.relu,
L.Convolution2D(64, 32, 3, 2),
F.relu,
L.Linear(None, 16),
F.dropout,
L.Linear(16, 4)
)
model_loss = L.Classifier(model)
optimizer = Adam()
optimizer.setup(model_loss)
updater = StandardUpdater(train_iter, optimizer)
trainer = Trainer(updater, (25, 'epoch'))
trainer.run()
Stacktrace.py
Exception in main training loop: forward() missing 1 required positional argument: 'x'
Traceback (most recent call last):
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 315, in run
update()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
self.update_core()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 181, in update_core
optimizer.update(loss_func, in_arrays)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/optimizer.py", line 680, in update
loss = lossfun(*args, **kwds)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/links/model/classifier.py", line 143, in forward
self.y = self.predictor(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/sequential.py", line 210, in forward
x = layer(*x)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
Will finalize trainer extensions and updater before reraising the exception.
Traceback (most recent call last):
File "/home/user/deploy/aaa.py", line 33, in <module>
trainer.run()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 348, in run
six.reraise(*exc_info)
File "/home/user/miniconda3/lib/python3.7/site-packages/six.py", line 693, in reraise
raise value
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 315, in run
update()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
self.update_core()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 181, in update_core
optimizer.update(loss_func, in_arrays)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/optimizer.py", line 680, in update
loss = lossfun(*args, **kwds)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/links/model/classifier.py", line 143, in forward
self.y = self.predictor(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/sequential.py", line 210, in forward
x = layer(*x)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
TypeError: forward() missing 1 required positional argument: 'x'
Is there a problem with the neural network model or the way the data has been fed into the model? Please let me know if you need to see the whole code
All you had to do is to give a tuple of ndarray and int to the model, because this is the specification of L.Classifier.
Is there a problem with the neural network model? Or, the way the data has been fed into the model is wrong?
Therefore, the absolute answer is "the way the data has been fed in to the model is wrong".
In the following code, I defined a class inheriting DatasetMixin to feed a tuple of ndarray and int. (This is a conventional way how Chainer goes)
It should be noted that the input argument of L.Convolution2D must be an ndarray whose shape is (batch, channel, width, height). So I transpose the array in the dataset.
Solution.py
from chainer.datasets import split_dataset_random
from chainer.iterators import SerialIterator
from chainer.optimizers import Adam
from chainer.training import Trainer
from chainer.training.updaters import StandardUpdater
from chainer import functions as F, links as L
from chainer import Sequential
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
def __init__(self, X, labels):
super(MyDataset, self).__init__()
self.X_ = X
self.labels_ = labels
self.size_ = X.shape[0]
def __len__(self):
return self.size_
def get_example(self, i):
return np.transpose(self.X_[i, ...], (2, 0, 1)), self.labels_[i]
batch_size = 3
X_train = np.ones((9957, 60, 80, 3), dtype=np.float32)
label_train = np.random.randint(0, 4, (9957,), dtype=np.int32)
dataset = MyDataset(X_train, label_train)
dataset_train, _ = split_dataset_random(dataset, 8000, seed=0)
train_iter = SerialIterator(dataset_train, batch_size)
model = Sequential(
L.Convolution2D(None, 64, 3, 2),
F.relu,
L.Convolution2D(64, 32, 3, 2),
F.relu,
L.Linear(None, 16),
F.dropout,
L.Linear(16, 4)
)
model_loss = L.Classifier(model)
optimizer = Adam()
optimizer.setup(model_loss)
updater = StandardUpdater(train_iter, optimizer)
trainer = Trainer(updater, (25, 'epoch'))
trainer.run()

Resources