my method has 2 input data model
network
branches:
49The first branch is composed of an embedding followed by simple Multi-layer
Perceptron (MLP) designed to handle input of the product description.
The second branch is a CNN to operate over the product image data.
These branches are then be concatenated together to form the final.
The problem is when we try to split the data with train_test_split by Cross Validation, It given as this error.
ValueError: Found input variables with inconsistent numbers of samples: [2, 8382]
MLP and CNN
def create_mlp(dim, regress=False):
# define our MLP network
model = Sequential()
model.add(Dense(8, input_dim=dim, activation="relu"))
model.add(Dense(4, activation="relu"))
# check to see if the regression node should be added
if regress:
model.add(Dense(1, activation="linear"))
# return our model
return model
def create_cnn(width, height, depth, filters=(64, 32, 16), regress=False):
# initialize the input shape and channel dimension, assuming
# TensorFlow/channels-last ordering
inputShape = (height, width, depth)
chanDim = -1
# define the model input
inputs = Input(shape=inputShape)
# loop over the number of filters
for (i, f) in enumerate(filters):
# if this is the first CONV layer then set the input
# appropriately
if i == 0:
x = inputs
# CONV => RELU => BN => POOL
x = Conv2D(f, (3, 3), padding="same")(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# flatten the volume, then FC => RELU => BN => DROPOUT
x = Flatten()(x)
x = Dense(16)(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = Dropout(0.5)(x)
# apply another FC layer, this one to match the number of nodes
# coming out of the MLP
x = Dense(4)(x)
x = Activation("relu")(x)
# check to see if the regression node should be added
if regress:
x = Dense(1, activation="linear")(x)
# construct the CNN
model = Model(inputs, x)
# return the CNN
return model
mlp = create_mlp(trainEmbedX.shape[1], regress=False)
cnn = create_cnn(64, 64, 3, regress=False)
combinedInput = concatenate([mlp.output, cnn.output])
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)
model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer="adam") # binary_crossentropy
The error occurs here
n_folds=3
epochs=3
batch_size=128
#save the model history in a list after fitting so that we can plot later
model_history = []
for i in range(n_folds):
print("Training on Fold: ",i+1)
t_x, val_x, t_y, val_y = train_test_split([trainEmbedX,trainImagesX], trainY, test_size = 0.2, random_state = np.random.randint(1,1000, 1)[0])
model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size))
print("======="*12, end="\n\n\n")
Training on Fold: 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-84-651638774259> in <module>
7 for i in range(n_folds):
8 print("Training on Fold: ",i+1)
----> 9 t_x, val_x, t_y, val_y = train_test_split([trainEmbedX,trainImagesX], trainY, test_size = 0.2, random_state = np.random.randint(1,1000, 1)[0])
10 model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size))
11 print("======="*12, end="\n\n\n")
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/model_selection/_split.py in train_test_split(*arrays, **options)
2182 test_size = 0.25
2183
-> 2184 arrays = indexable(*arrays)
2185
2186 if shuffle is False:
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/utils/validation.py in indexable(*iterables)
258 else:
259 result.append(np.array(X))
--> 260 check_consistent_length(*result)
261 return result
262
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
233 if len(uniques) > 1:
234 raise ValueError("Found input variables with inconsistent numbers of"
--> 235 " samples: %r" % [int(l) for l in lengths])
236
237
ValueError: Found input variables with inconsistent numbers of samples: [2, 8382]
This error happens with mismatching dimensions of X and Y in train_test_split.
By looking at your snippet, you try to concatenate two arrays by [trainEmbedX,trainImagesX] which will add a dimension if the original arrays trainEmbedX and trainImagesX are not 1D, hence you have the shape [2, 8382] in the error.
So instead of [trainEmbedX,trainImagesX], I suggest to use np.concatenate to merge these two arrays by np.concatenate((trainEmbedX,trainImagesX),axis=1).
Related
I have built a BiLSTM model with an attention layer for sentence classification task but I am getting an error that my assertion has failed due to mismatch in number of parameters. The attention layer code is here and the error is below the code.
class attention(Layer):
def __init__(self, return_sequences=True):
self.return_sequences = return_sequences
super(attention,self).__init__()
def build(self, input_shape):
self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
initializer="normal")
self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
initializer="zeros")
super(attention,self).build(input_shape)
def call(self, x):
e = K.tanh(K.dot(x,self.W)+self.b)
a = K.softmax(e, axis=1)
output = x*a
if self.return_sequences:
return output
return K.sum(output, axis=1)
When i am training the model with attention layer included, it is giving an error that assertion failed.
Epoch 1/10
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
<ipython-input-45-ac310033130c> in <module>()
1 #Early stopping, Adam, dropout = 0.3, 0.5, 0.5
2 #history = model.fit(sequences_matrix, Y_train, batch_size=256, epochs=5, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', min_delta=0.0001)])
----> 3 history = model.fit(sequences_matrix, Y_train, batch_size=32, epochs=10, validation_split=0.1)
8 frames
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/execute.py in quick_execute(op_name, num_outputs, inputs, attrs, ctx, name)
58 ctx.ensure_initialized()
59 tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
---> 60 inputs, attrs, num_outputs)
61 except core._NotOkStatusException as e:
62 if name is not None:
InvalidArgumentError: assertion failed: [Condition x == y did not hold element-wise:] [x (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/Shape_1:0) = ] [32 1] [y (sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/strided_slice:0) = ] [32 758]
[[node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/assert_equal_1/Assert/Assert (defined at <ipython-input-45-ac310033130c>:3) ]] [Op:__inference_train_function_19854]
Function call stack:
train_function
My model is
model = Sequential()
model.add(Embedding(max_words, 768, input_length=max_len, weights=[embedding]))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(SpatialDropout1D(0.1))
model.add(Conv1D(16, kernel_size=11, activation='relu'))
model.add(Bidirectional(LSTM(16, return_sequences=True)))
model.add(attention(return_sequences=True))
model.add(BatchNormalization())
model.add(Activation('tanh'))
model.add(Dropout(0.2))
model.add(Dense(2, activation='softmax', use_bias=True, kernel_regularizer=regularizers.l1_l2(l1=1e-5, l2=1e-4), bias_regularizer=regularizers.l2(1e-4),
activity_regularizer=regularizers.l2(1e-5)))
model.summary()
Shape of Y_train is
max_words = 48369
max_len = 768
tok = Tokenizer(num_words = max_words)
tok.fit_on_texts(X_train)
sequences = tok.texts_to_sequences(X_train)
sequences_matrix = sequence.pad_sequences(sequences, maxlen = max_len)
Y_train = np.array(Y_train)
Y_test = np.array(Y_test)
print(Y_train.shape)
(43532, 1)
your target is in 2D so you need to set return_sequences=False in the last attention layer in order to return output in 2D format
Add flatten layer before Dropout and then execute.
model.add(Flatten())
I am using TensorFlow 2.0 with Python 3.7.5 to build a neural network for Iris classification using Model sub-classing approach.
The code I have is as follows:
import tensorflow as tf
from tensorflow.keras import Sequential, Model
from tensorflow.keras.layers import Dense, Input
import pandas as pd
import numpy as np
# Read in data-
data = pd.read_csv("iris.csv")
# Get data types for different attributes-
data.dtypes
'''
sepallength float64
sepalwidth float64
petallength float64
petalwidth float64
class object
dtype: object
'''
# Get shape of data-
data.shape
# (150, 5)
# Check for missing values-
data.isnull().values.any()
# False
# Perform label encoding for target variable-
# Initialize a label encoder-
le = LabelEncoder()
# Label encode target attribute-
data['class'] = le.fit_transform(data['class'])
# Get different classes which are label encoded-
le.classes_
# array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)
# Split data into features (X) and target (y)-
X = data.drop('class', axis = 1)
y = data['class']
# Get training & testing sets using features and labels-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
# Convert from Pandas to numpy arrays-
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
print("\nTraining and Testing set dimensions:")
print("X_train.shape = {0}, y_train.shape = {1}".format(X_train.shape, y_train.shape))
print("X_test.shape = {0}, y_test.shape = {1}\n".format(X_test.shape, y_test.shape))
# Training and Testing set dimensions:
# X_train.shape = (105, 4), y_train.shape = (105,)
# X_test.shape = (45, 4), y_test.shape = (45,)
class IrisClassifier(Model):
def __init__(self):
super(IrisClassifier, self).__init__()
'''
self.layer1 = Dense(
units = 4, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
'''
self.input_layer = Input(
shape = (4,)
)
self.layer1 = Dense(
units = 10, activation = 'relu',
input_dim = 4,
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.layer2 = Dense(
units = 10, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.outputlayer = Dense(
units = 3, activation = 'softmax'
)
def call(self, x):
x = self.input_layer(x)
x = self.layer1(x)
x = self.layer2(x)
# x = self.layer3(x)
return self.outputlayer(x)
# Instantiate a model of defined neural network class-
model = IrisClassifier()
# Define EarlyStopping callback-
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
# Compile defined model-
model.compile(
optimizer=tf.keras.optimizers.Adam(lr = 0.001),
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy']
)
# Train model-
history2 = model.fit(
x = X_train, y = y_train,
validation_data = [X_test, y_test],
epochs = 50, batch_size = 16,
callbacks = [callback]
)
When I execute 'history2' code, I get the following error:
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last) in
3 validation_data = [X_test, y_test],
4 epochs = 50, batch_size = 16,
----> 5 callbacks = [callback]
6 )
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in fit(self, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_freq, max_queue_size, workers, use_multiprocessing,
**kwargs)
726 max_queue_size=max_queue_size,
727 workers=workers,
--> 728 use_multiprocessing=use_multiprocessing)
729
730 def evaluate(self,
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_arrays.py
in fit(self, model, x, y, batch_size, epochs, verbose, callbacks,
validation_split, validation_data, shuffle, class_weight,
sample_weight, initial_epoch, steps_per_epoch, validation_steps,
validation_freq, **kwargs)
640 steps=steps_per_epoch,
641 validation_split=validation_split,
--> 642 shuffle=shuffle)
643
644 if validation_data:
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in _standardize_user_data(self, x, y, sample_weight, class_weight,
batch_size, check_steps, steps_name, steps, validation_split, shuffle,
extract_tensors_from_dataset) 2417 # First, we build the model
on the fly if necessary. 2418 if not self.inputs:
-> 2419 all_inputs, y_input, dict_inputs = self._build_model_with_inputs(x, y) 2420 is_build_called =
True 2421 else:
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training.py
in _build_model_with_inputs(self, inputs, targets) 2580 # or
lists of arrays, and extract a flat list of inputs from the passed
2581 # structure.
-> 2582 training_utils.validate_input_types(inputs, orig_inputs) 2583 2584 if isinstance(inputs, (list, tuple)):
~/.local/lib/python3.7/site-packages/tensorflow_core/python/keras/engine/training_utils.py
in validate_input_types(inp, orig_inp, allow_dict, field_name) 1149
raise ValueError( 1150 'Please provide as model inputs
either a single array or a list of '
-> 1151 'arrays. You passed: {}={}'.format(field_name, orig_inp)) 1152 1153
ValueError: Please provide as model inputs either a single array or a
list of arrays. You passed: inputs= sepallength sepalwidth
petallength petalwidth 117 7.7 3.8 6.7
2.2 7 5.0 3.4 1.5 0.2 73 6.1 2.8 4.7 1.2 92 5.8 2.6 4.0 1.2 87 6.3 2.3 4.4 1.3 .. ... ... ... ... 93 5.0
2.3 3.3 1.0 30 4.8 3.1 1.6 0.2 25 5.0 3.0 1.6 0.2 31 5.4 3.4 1.5 0.4 97 6.2 2.9 4.3 1.3
[105 rows x 4 columns]
After converting X_train, y_train, X_test and y_test to numpy arrays, when I execute, history2 to train the model, I get the following error:
TypeError: in converted code:
<ipython-input-14-ae6111e00410>:34 call *
x = self.input_layer(x)
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py:427
converted_call
f in m.dict.values() for m in (collections, pdb, copy, inspect, re)):
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/autograph/impl/api.py:427
f in m.dict.values() for m in (collections, pdb, copy, inspect, re)):
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/math_ops.py:1336
tensor_equals
return gen_math_ops.equal(self, other)
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/ops/gen_math_ops.py:3627
equal
name=name)
/home/arjun/.local/lib/python3.7/site-packages/tensorflow_core/python/framework/op_def_library.py:536
_apply_op_helper
repr(values), type(values).name, err))
TypeError: Expected float32 passed to parameter 'y' of op 'Equal', got 'collections' of type 'str' instead. Error: Expected float32, got
'collections' of type 'str' instead.
What's going wrong?
Thanks!
Your problem stems from the way you preprocess your data, before you fit it to your model.
It is highly likely that you pass the entire csv-dataset from iris, including your column headers, hence your issue. You can verify this from
"You passed: inputs= sepallength sepalwidth petallength petalwidth 117
7.7 3.8 6.7".
Ensure that your Xs and ys do not contain the column names, but only the values. Use X_train = X_train.to_numpy() to ensure the conversion works. In older versions, you could also use X_train.values, but the latter has been deprecated.
I solved the problem. According to Francois Chollet:
A subclassed model is a piece of Python code (a call method). There is
no graph of layers here. We cannot know how layers are connected to
each other (because that's defined in the body of call, not as an
explicit data structure), so we cannot infer input / output shapes
Therefore, the following code runs fine (where you don't specify the input training data shape):
# Define EarlyStopping callback-
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
class IrisClassifier(Model):
def __init__(self):
super(IrisClassifier, self).__init__()
self.layer1 = Dense(
units = 10, activation = 'relu',
# input_dim = 4,
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.layer2 = Dense(
units = 10, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
self.outputlayer = Dense(
units = 3, activation = 'softmax'
)
def call(self, x):
# x = self.input_layer(x)
x = self.layer1(x)
x = self.layer2(x)
# x = self.layer3(x)
return self.outputlayer(x)
# Instantiate a model of defined neural network class-
model2 = IrisClassifier()
# Compile defined model-
model2.compile(
optimizer=tf.keras.optimizers.Adam(lr = 0.001),
loss = 'sparse_categorical_crossentropy',
metrics = ['accuracy']
)
# Train model-
history2 = model2.fit(
x = X_train, y = y_train,
validation_data = [X_test, y_test],
epochs = 50, batch_size = 16,
callbacks = [callback]
)
Thanks!
I'm having trouble adapting an existing Keras model to work with TenforFlow Federated.
The existing model is a 1D convolutional autoencoder (details shown below)
Existing Model:
input_window = Input(shape=(window_length,1))
x = Conv1D(16, 3, activation="relu", padding="same")(input_window)
x = MaxPooling1D(2, padding="same")(x)
x = Conv1D(1, 3, activation="relu", padding="same")(x)
encoded = MaxPooling1D(2, padding="same")(x)
encoder = Model(input_window, encoded)
x = Conv1D(1, 3, activation="relu", padding="same")(encoded)
x = UpSampling1D(2)(x)
x = Conv1D(16, 1, activation='relu')(x)
x = UpSampling1D(2)(x)
decoded = Conv1D(1, 3, activation='sigmoid', padding='same')(x)
autoencoder = Model(input_window, decoded)
Training data is passed as a numpy.ndarray of shape (102, 48, 1).
Conceptually, this represents 102 days worth of data, each containg 48 values. I can provide an example of this if it would assist in answering.
My attempt to convert the model is shown below.
Converted Model:
def create_compiled_keras_model():
input_window = tf.keras.layers.Input(shape=(window_length,1))
x = tf.keras.layers.Conv1D(16, 3, activation="relu", padding="same")(input_window)
x = tf.keras.layers.MaxPooling1D(2, padding="same")(x)
x = tf.keras.layers.Conv1D(1, 3, activation="relu", padding="same")(x)
encoded = tf.keras.layers.MaxPooling1D(2, padding="same")(x)
encoder = tf.keras.Model(input_window, encoded)
x = tf.keras.layers.Conv1D(1, 3, activation="relu", padding="same")(encoded)
x = tf.keras.layers.UpSampling1D(2)(x)
x = tf.keras.layers.Conv1D(16, 1, activation='relu')(x)
x = tf.keras.layers.UpSampling1D(2)(x)
decoded = tf.keras.layers.Conv1D(1, 3, activation='sigmoid', padding='same')(x)
autoencoder = tf.keras.Model(input_window, decoded)
autoencoder.compile(optimizer='adam', loss='MSE')
return autoencoder
sample_batch = train // numpy.ndarray of shape (102, 48, 1)
def model_fn():
keras_model = create_compiled_keras_model()
return tff.learning.from_compiled_keras_model(keras_model, train)
This produces the error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-397-9bed171c79be> in <module>
----> 1 model = model_fn()
<ipython-input-396-13bc1955a7f2> in model_fn()
1 def model_fn():
2 keras_model = create_compiled_keras_model()
----> 3 return tff.learning.from_compiled_keras_model(keras_model, train)
~/miniconda3/lib/python3.6/site-packages/tensorflow_federated/python/learning/model_utils.py in from_compiled_keras_model(keras_model, dummy_batch)
190 raise ValueError('`keras_model` must be compiled. Use from_keras_model() '
191 'instead.')
--> 192 return enhance(_TrainableKerasModel(keras_model, dummy_batch))
193
194
~/miniconda3/lib/python3.6/site-packages/tensorflow_federated/python/learning/model_utils.py in __init__(self, inner_model, dummy_batch)
434 # until the model has been called on input. The work-around is to call
435 # Model.test_on_batch() once before asking for metrics.
--> 436 inner_model.test_on_batch(**dummy_batch)
437 # This must occur after test_on_batch()
438 if len(inner_model.loss_functions) != 1:
TypeError: test_on_batch() argument after ** must be a mapping, not numpy.ndarray
So far I have been unable to resolve this.
Is this an issue relating to my model not being compiled correctly, or due to the way I'm passing data?
Any help in resolving this would be greatly appreciated, thanks!
The sample batch should be something that can be passed to batch_input argument of tff.learning.Model.forward_pass.
For wrapped Keras models, this must be a dict with keys matching the arguments to tf.keras.models.Model.test_on_batch.
For this case, I think you may be able to simply wrap the sample batch in a dict with a single key of x:
numpy_sample_batch = train // numpy.ndarray
sample_batch = {'x': numpy_sample_batch}
I am trying to train a 2D neural network using keras. I have a weird error message, "ValueError: setting an array element with a sequence." when I try to use model.fit function in keras. Specifically, the error says that my "tensor_train_labels" is a sequence instead of an array. But my labels are indeed numpy arrays (not a sequence). I am not sure why does keras complain about it ?
I am following this tutorial for building my network
tensor_train_data.shape
#TensorShape([Dimension(209), Dimension(64), Dimension(64), Dimension(3)])
tensor_test_data.shape
#TensorShape([Dimension(50), Dimension(64), Dimension(64), Dimension(3)])
tensor_train_labels = tf.reshape(tensor_train_labels, [209,1])
tensor_test_labels = tf.reshape(tensor_test_labels, [50,1])
batch_size = 10
epochs = 8
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32, kernel_size=(3,3), activation='relu',
input_shape=(64, 64, 3)))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2,2)))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(128, activation = 'relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(2, activation = 'softmax'))
model.compile(loss='categorical_crossentropy', optimizer =
tf.keras.optimizers.Adam(lr=0.0001, decay=1e-6), metrics=['accuracy'])
model.fit(tensor_train_data/255.0,
tf.keras.utils.to_categorical(tensor_train_labels),
batch_size = batch_size,
shuffle = True,
epochs = epochs,
validation_data = (tensor_test_data/ 255.0,
tf.keras.utils.to_categorical(tensor_test_labels)))
scores = model.evaluate(tensor_test_labels/ 255.0,
tf.keras.utils.to_categorical(tensor_test_labels))
print('Loss: %.3f' % scores[0])
print('Accuracy: %.3f' % scores[1])
The Error :
ValueError Traceback (most recent call last)
<ipython-input-224-80431a1b3e79> in <module>
1 model.compile(loss='categorical_crossentropy', optimizer = tf.keras.optimizers.Adam(lr=0.0001, decay=1e-6), metrics=['accuracy'])
----> 2 model.fit(tensor_train_data/255.0, tf.keras.utils.to_categorical(tensor_train_labels),
3 batch_size = batch_size,
4 shuffle = True,
5 epochs = epochs,
~\AppData\Local\conda\conda\envs\deeplearning\lib\site-packages\tensorflow\python\keras\utils\np_utils.py in to_categorical(y,
num_classes)
37 last.
38 """
---> 39 y = np.array(y, dtype='int')
40 input_shape = y.shape
41 if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
ValueError: setting an array element with a sequence.
The possible error is that you have arrays of different sizes when you are trying to convert it into the numpy array. Possible solution : https://stackoverflow.com/a/49617425/8185479
I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
super().init()
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
model.train()
for data, target in trainloader:
optimizer.zero_grad()
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
loss.backward()
#4. weight optimization
optimizer.step()
train_loss.append(loss.item())
# evaluation part
model.eval()
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
valid_loss.append(loss.item())
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
**kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
else:
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
/pytorch/aten/src/TH/generic/THTensorMath.cpp:940
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
optimizer.zero_grad()
...
The same is needed to be done in the validation loop.