VGG model not converging on CIFAR10 dataset using Pytorch - pytorch

My code is available at
https://www.kaggle.com/aman0807/vgg-cifar10/execution
As you can see, the validation loss diverges from the start of the training.
I have tried with Adam optimizer as well as SGD optimizer. I cannot figure out what it is that I am doing incorrectly. Please point me in the right direction.
Edit: Included entire code here
# Importing Dependencies
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
from torchvision import transforms
from torch.utils.data import DataLoader
from tqdm import tqdm
from datetime import datetime
# Defining model
arch = [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
class VGGNet(nn.Module):
def __init__(self, in_channels, num_classes):
super().__init__()
self.in_channels = in_channels
self.conv_layers = self.create_conv_layers(arch)
self.fcs = nn.Sequential(
nn.Linear(in_features=512*1*1, out_features=4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, num_classes)
)
def forward(self, x):
x = self.conv_layers(x)
# print(x.shape)
x = x.reshape(x.shape[0], -1)
x = self.fcs(x)
return x
def create_conv_layers(self, arch):
layers = []
in_channels = self.in_channels
for x in arch:
if type(x) == int:
out_channels = x
layers += [nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(x),
nn.ReLU(),
]
in_channels = x
elif x =='M':
layers += [nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))]
return nn.Sequential(*layers)
# Hyperparameters and settings
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 16
EPOCHS = 50
train_data = CIFAR10(root=".", train=True,
transform=transforms.Compose([transforms.ToTensor()]), download=True)
# print(len(train_data))
val_data = CIFAR10(root=".", train=False,
transform=transforms.Compose([transforms.ToTensor()]), download=True)
# print(len(val_data))
train_loader = DataLoader(train_data, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=8)
val_loader = DataLoader(val_data, batch_size=VAL_BATCH_SIZE, shuffle=True, num_workers=8)
# print(len(train_loader))
# print(len(val_loader))
num_train_batches = int(len(train_data)/TRAIN_BATCH_SIZE)
num_val_batches = int(len(val_data)/VAL_BATCH_SIZE)
# Training and Val Loop
model = VGGNet(3, 10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
# optim = torch.optim.Adam(model.parameters(), lr=0.01)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, verbose=True)
# save_path = os.path.join(r"trained_models", f'{datetime.now().strftime("%m%d_%H%M%S")}.pth')
def train_val():
for epoch in range(1, EPOCHS+1):
print(f"Epoch: {epoch}/20")
model.train()
total_loss = 0
for data in train_loader:
image, target = data[0], data[1]
image, target = image.to(device), target.to(device)
optimizer.zero_grad()
output = model(image)
loss = criterion(output, target)
total_loss += loss.item()
loss.backward()
optimizer.step()
print(f"Loss : {total_loss / num_train_batches}")
save_path = os.path.join(r"trained_models", f'{datetime.now().strftime("%m%d_%H%M%S")}_{epoch}.pth')
if epoch % 5 == 0:
torch.save(model.state_dict(), save_path)
with torch.no_grad():
model.eval()
total_val_loss = 0
for data in val_loader:
image, target = data[0], data[1]
image, target = image.to(device), target.to(device)
output = model(image)
val_loss = criterion(output, target)
total_val_loss += val_loss
total_val_loss = total_val_loss/num_val_batches
print(f"Val Loss: {total_val_loss}")
scheduler.step(total_val_loss)
The output is :
> Epoch: 1/20 Loss : 1.3286100650795292 Val Loss: 1.3787670135498047
> Epoch: 2/20 Loss : 0.822020811685832 Val Loss: 0.948610246181488
> Epoch: 3/20 Loss : 0.6018326392113476 Val Loss: 0.9581698775291443
> Epoch: 4/20 Loss : 0.47134833609764004 Val Loss: 1.2446043491363525
> Epoch: 5/20 Loss : 0.35625831704114524 Val Loss: 0.8038020730018616
> Epoch: 6/20 Loss : 0.27602518926566605 Val Loss: 0.6090452075004578
> Epoch: 7/20 Loss : 0.21279048924686128 Val Loss: 0.6626076102256775
> Epoch: 8/20 Loss : 0.16782210255280214 Val Loss: 0.6386368870735168
> Epoch: 9/20 Loss : 0.12904227719518205 Val Loss: 0.8135524988174438
> Epoch: 10/20 Loss : 0.10961572862077902 Val Loss: 0.727300226688385
> Epoch: 11/20 Loss : 0.08377284912137456 Val Loss: 0.7346469163894653
> Epoch: 12/20 Loss : 0.07044737199237916 Val Loss: 0.8241418600082397
> Epoch: 13/20 Loss : 0.06040401630707726 Val Loss: 0.8411757349967957
> Epoch: 14/20 Loss : 0.05157513573171604 Val Loss: 0.9980310201644897
> Epoch: 15/20 Loss : 0.04703645325243019 Val Loss: 0.7441162467002869
> Epoch: 16/20 Loss : 0.039386494244257594 Val Loss: 0.7185537219047546
> Epoch: 17/20 Loss : 0.0361507039006692 Val Loss: 0.7251362800598145
> Epoch 17: reducing learning rate of group 0 to 1.0000e-03. Epoch:
> 18/20 Loss : 0.010131187833331622 Val Loss: 0.6911444067955017 Epoch:
> 19/20 Loss : 0.004273188020082817 Val Loss: 0.6758599877357483 Epoch:
> 20/20 Loss : 0.0023282255553611917 Val Loss: 0.6790934801101685 Epoch:
> 21/20 Loss : 0.002249847758697408 Val Loss: 0.6877240538597107 Epoch:
> 22/20 Loss : 0.0017385115527510854 Val Loss: 0.6955451369285583 Epoch:
> 23/20 Loss : 0.0015288436127294692 Val Loss: 0.6895047426223755 Epoch:
> 24/20 Loss : 0.0013521527944272392 Val Loss: 0.6986600160598755 Epoch:
> 25/20 Loss : 0.001302041847793007 Val Loss: 0.7062056660652161 Epoch:
> 26/20 Loss : 0.0009422088254753626 Val Loss: 0.721610426902771 Epoch:
> 27/20 Loss : 0.00092220353266205 Val Loss: 0.7185065150260925 Epoch:
> 28/20 Loss : 0.000855279816558849 Val Loss: 0.7262870669364929 Epoch
> 28: reducing learning rate of group 0 to 1.0000e-04. Epoch: 29/20 Loss
> : 0.0009319903464029908 Val Loss: 0.7129291892051697 Epoch: 30/20 Loss
> : 0.0009213638452758586 Val Loss: 0.7219230532646179 Epoch: 31/20 Loss
> : 0.000790543920696094 Val Loss: 0.7180697321891785 Epoch: 32/20 Loss
> : 0.0007001494628562334 Val Loss: 0.7132201194763184 Epoch: 33/20 Loss
> : 0.000848956296528779 Val Loss: 0.720444917678833 Epoch: 34/20 Loss :
> 0.0007309111221651813 Val Loss: 0.7152464389801025 Epoch: 35/20 Loss : 0.0008760697004345624 Val Loss: 0.7213227152824402 Epoch: 36/20 Loss : 0.0007820251893755433 Val Loss: 0.7167501449584961 Epoch: 37/20 Loss : 0.0008780398232813891 Val Loss: 0.7176992297172546 Epoch: 38/20 Loss : 0.0006976223276931253 Val Loss: 0.7317324280738831 Epoch: 39/20 Loss : 0.0007082251550411605 Val Loss: 0.7190949320793152 Epoch 39: reducing learning rate of group 0 to 1.0000e-05. Epoch: 40/20 Loss :
> 0.0007200245124846 Val Loss: 0.7223398089408875 Epoch: 41/20 Loss : 0.0007270275670335591 Val Loss: 0.721064031124115 Epoch: 42/20 Loss : 0.0008191512905424633 Val Loss: 0.7175146341323853 Epoch: 43/20 Loss : 0.0007468872463225763 Val Loss: 0.7313894629478455 Epoch: 44/20 Loss : 0.0007858102934361841 Val Loss: 0.7234065532684326 Epoch: 45/20 Loss : 0.0008550570492202036 Val Loss: 0.7210699319839478 Epoch: 46/20 Loss : 0.0007921795028490795 Val Loss: 0.7209402322769165 Epoch: 47/20 Loss : 0.0007907233434224173 Val Loss: 0.7207257747650146 Epoch: 48/20 Loss : 0.0008106642895507929 Val Loss: 0.7174465656280518 Epoch: 49/20 Loss : 0.0008104693277185383 Val Loss: 0.7192382216453552 Epoch: 50/20 Loss : 0.0007664988370204754 Val Loss: 0.722599446773529 Epoch 50: reducing learning rate of group 0 to 1.0000e-06.

Related

CNN: accuracy and loss are increasing and decreasing

I'am beginner in deep learning, I created 3DCNN using Pytorch.
input image: 120 * 120 * 120
the problem that the accuracy and loss decrease and increase in the same interval [45,56].
Can you help me please ?
def __init__(self):
super(CNNModel, self).__init__()
self.conv_layer1 = self._conv_layer_set(3, 32)
self.conv_layer2 = self._conv_layer_set(32, 64)
self.conv_layer3 = self._conv_layer_set(64, 128)
self.conv_layer4 = self._conv_layer_set(128, 256)
self.conv_layer5 = self._conv_layer_set(256, 512)
self.fc1 = nn.Linear(512, 128)
self.fc2 = nn.Linear(128, num_classes)
self.relu = nn.LeakyReLU()
self.batch=nn.BatchNorm1d(128)
self.drop=nn.Dropout(p=0.5, inplace = True)
def _conv_layer_set(self, in_c, out_c):
conv_layer = nn.Sequential(
nn.Conv3d(in_c, out_c, kernel_size=(3, 3, 3), padding=0),
nn.LeakyReLU(),
nn.MaxPool3d((2, 2, 2)),
)
return conv_layer
def forward(self, x):
# Set 1
out = self.conv_layer1(x)
out = self.conv_layer2(out)
out = self.conv_layer3(out)
out = self.conv_layer4(out)
out = self.conv_layer5(out)
out = out.view(out.size(0), -1)
out = self.fc1(out)
out = self.relu(out)
out = self.batch(out)
out = self.drop(out)
out = self.fc2(out)
#out = F.softmax(out, dim=1)
return out
Result :
teration: 1/10 Loss: 0.8040086030960083 Accuracy: 47.023809523809526 %
Iteration: 2/10 Loss: 0.8323351740837097 Accuracy: 45.23809523809524 %
Iteration: 3/10 Loss: 0.8008261322975159 Accuracy: 50.595238095238095 %
Iteration: 4/10 Loss: 0.7527135610580444 Accuracy: 55.95238095238095 %
Iteration: 5/10 Loss: 0.7785584330558777 Accuracy: 51.19047619047619 %
Iteration: 6/10 Loss: 0.7463465929031372 Accuracy: 56.25 %
Iteration: 7/10 Loss: 0.8021382093429565 Accuracy: 52.083333333333336 %
Iteration: 8/10 Loss: 0.7705538868904114 Accuracy: 50.595238095238095 %

keras model does not learn anything after setting pretrained weights by layer

I am training a smaller VGG like model, and I set the pretrained weights of VGG16 to the first conv layers which are identical. My model acts pretty strange though, and does not learn anything at all - the loss stays the same, the accuracy stays the same. What is wrong and how can I fix it?
from keras import applications
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras import backend as k
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping
from keras import layers
from keras import models
from keras import optimizers
from keras.layers import Dropout
from keras.regularizers import l2
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
import matplotlib.pyplot as plt
from keras.preprocessing.image import ImageDataGenerator
import os
base_model = models.Sequential()
base_model.add(layers.Conv2D(64, (3, 3), activation='relu', name='block1_conv1', input_shape=(224, 224, 3)))
base_model.add(layers.Conv2D(64, (3, 3), activation='relu', name='block1_conv2'))
base_model.add(layers.MaxPooling2D((2, 2)))
#model.add(Dropout(0.2))
base_model.add(layers.Conv2D(128, (3, 3), activation='relu', name='block2_conv1'))
base_model.add(layers.Conv2D(128, (3, 3), activation='relu', name='block2_conv2'))
base_model.add(layers.MaxPooling2D((2, 2), name='block2_pool'))
#model.add(Dropout(0.2))
base_model.summary()
"""
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
_________________________________________________________________
block1_conv1 (Conv2D) (None, 256, 256, 64) 1792
_________________________________________________________________
block1_conv2 (Conv2D) (None, 256, 256, 64) 36928
_________________________________________________________________
block1_pool (MaxPooling2D) (None, 128, 128, 64) 0
_________________________________________________________________
block2_conv1 (Conv2D) (None, 128, 128, 128) 73856
_________________________________________________________________
block2_conv2 (Conv2D) (None, 128, 128, 128) 147584
_________________________________________________________________
block2_pool (MaxPooling2D) (None, 64, 64, 128) 0
=================================================================
Total params: 260,160.0
Trainable params: 260,160.0
Non-trainable params: 0.0
"""
base_model.add(layers.Flatten())
#base_model.add(layers.Dropout(0.5)) #Dropout for regularization
base_model.add(layers.Dense(256, activation='relu'))
base_model.add(layers.Dense(1, activation='sigmoid')) #Sigmoid function at the end because we have just two classes
epochs = 50
callbacks = []
#schedule = None
decay = 0.0
#earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=0, mode='min')
#mcp_save = ModelCheckpoint('.mdl_wts.hdf5', save_best_only=True, monitor='val_loss', mode='min')
#reduce_lr_loss = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1, epsilon=1e-5, mode='min')
# compile the model with a SGD/momentum optimizer
# and a very slow learning rate.
base_model.compile(loss='binary_crossentropy',
optimizer=optimizers.SGD(lr=1e-4,decay=1e-6, momentum=0.9, nesterov=True),
metrics=['accuracy'])
vgg = applications.VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
print('Model loaded.')
"""
['block1_conv1',
'block1_conv2',
'block1_pool',
'block2_conv1',
'block2_conv2',
'block2_pool',
'block3_conv1',
'block3_conv2',
'block3_conv3',
'block3_conv4',
'block3_pool',
'block4_conv1',
'block4_conv2',
'block4_conv3',
'block4_conv4',
'block4_pool',
'block5_conv1',
'block5_conv2',
'block5_conv3',
'block5_conv4',
'block5_pool',
'dense_1',
'dense_2',
'dense_3',
'dropout_1',
'global_average_pooling2d_1',
'input_1']
"""
for layer in vgg.layers:
if layer.name == 'block1_conv1':
base_model.layers[0].set_weights(layer.get_weights())
elif layer.name == 'block1_conv2':
base_model.layers[1].set_weights(layer.get_weights())
elif layer.name == 'block2_conv1':
base_model.layers[3].set_weights(layer.get_weights())
elif layer.name == 'block2_conv2':
base_model.layers[4].set_weights(layer.get_weights())
os.environ["CUDA_VISIBLE_DEVICES"]="0"
train_dir = '/home/d/Desktop/s/data/train'
eval_dir = '/home/d/Desktop/s/data/eval'
test_dir = '/home/d/Desktop/s/data/test'
# create a data generator
train_datagen = ImageDataGenerator(rescale=1./255, #Scale the image between 0 and 1
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,)
val_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
test_datagen = ImageDataGenerator(rescale=1./255) #We do not augment validation data. we only perform rescale
# load and iterate training dataset
train_generator = train_datagen.flow_from_directory(train_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate validation dataset
val_generator = val_datagen.flow_from_directory(eval_dir, target_size=(224,224),class_mode='binary', batch_size=16, shuffle='True', seed=42)
# load and iterate test dataset
test_generator = test_datagen.flow_from_directory(test_dir, target_size=(224,224), class_mode=None, batch_size=1, shuffle='False', seed=42)
#The training part
#We train for 64 epochs with about 100 steps per epoch
history = base_model.fit_generator(train_generator,
steps_per_epoch=train_generator.n // train_generator.batch_size,
epochs=epochs,
validation_data=val_generator,
validation_steps=val_generator.n // val_generator.batch_size) #,
#callbacks=[earlyStopping, mcp_save, reduce_lr_loss])
#Save the model
#base_model.save_weights('/home/d/Desktop/s/base_model_weights.h5')
#base_model.save('/home/d/Desktop/s/base_model_keras.h5')
#lets plot the train and val curve
#get the details form the history object
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
#Train and validation accuracy
plt.plot(epochs, acc, 'b', label='Training accuracy')
plt.plot(epochs, val_acc, 'r', label='Validation accuracy')
plt.title('Training and Validation accurarcy')
plt.legend()
plt.figure()
#Train and validation loss
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Validation loss')
plt.title('Training and Validation loss')
plt.legend()
plt.show()
and the training just goes on forever like this (no changes from epoch to epoch either):
2625/4002 [==================>...........] - ETA: 3:49 - loss: 7.9723 - acc: 0.5053
2626/4002 [==================>...........] - ETA: 3:49 - loss: 7.9720 - acc: 0.5053
2627/4002 [==================>...........] - ETA: 3:49 - loss: 7.9735 - acc: 0.5052
2628/4002 [==================>...........] - ETA: 3:48 - loss: 7.9732 - acc: 0.5052
2629/4002 [==================>...........] - ETA: 3:48 - loss: 7.9732 - acc: 0.5052
2630/4002 [==================>...........] - ETA: 3:48 - loss: 7.9729 - acc: 0.5052
2631/4002 [==================>...........] - ETA: 3:48 - loss: 7.9725 - acc: 0.5052
2632/4002 [==================>...........] - ETA: 3:48 - loss: 7.9729 - acc: 0.5052
2633/4002 [==================>...........] - ETA: 3:48 - loss: 7.9733 - acc: 0.5052
2634/4002 [==================>...........] - ETA: 3:47 - loss: 7.9722 - acc: 0.5053
2635/4002 [==================>...........] - ETA: 3:47 - loss: 7.9730 - acc: 0.5052
2636/4002 [==================>...........] - ETA: 3:47 - loss: 7.9719 - acc: 0.5053
2637/4002 [==================>...........] - ETA: 3:47 - loss: 7.9727 - acc: 0.5052
2638/4002 [==================>...........] - ETA: 3:47 - loss: 7.9731 - acc: 0.5052
2639/4002 [==================>...........] - ETA: 3:47 - loss: 7.9732 - acc: 0.5052

why my neural network sequential model gets low accuracy ( below than 0.0011)?

I am building a hashtag recommendation model for twitter media posts, which takes tweet text as input and does 300-dimensional word embedding on it and classifies it among 198 hashtags as classes. When I run my model I get lower than 0.0011 accuracy which does not change later! What is wrong in my model?
import pickle
import numpy as np
from keras import initializers, regularizers
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau
from keras.layers import LSTM, Activation, Dense, Dropout, Embedding
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, load_model
package = "2018_pickle"
with open(path1, "rb") as f:
maxLen, l_h2i, l_w2i = pickle.load(f)
with open(path2, "rb") as f:
X_train, X_test, X_train_indices, X_test_indices = pickle.load(f)
with open(path3, "rb") as f:
Y_train, Y_test, Y_train_oh, Y_test_oh = pickle.load(f)
with open(path4, "rb") as f:
emd_matrix = pickle.load(f)
if __name__ == "__main__":
modelname = "model_1"
train = False
vocab_size = len(emd_matrix)
emd_dim = emd_matrix.shape[1]
if train:
model = Sequential()
model.add(
Embedding(
vocab_size,
emd_dim,
weights=[emd_matrix],
input_length=maxLen,
trainable=False,
)
)
model.add(
LSTM(
256,
return_sequences=True,
activation="relu",
kernel_regularizer=regularizers.l2(0.01),
kernel_initializer=initializers.glorot_normal(seed=None),
)
)
model.add(
LSTM(
256,
return_sequences=True,
activation="relu",
kernel_regularizer=regularizers.l2(0.01),
kernel_initializer=initializers.glorot_normal(seed=None),
)
)
model.add(
LSTM(
256,
return_sequences=False,
activation="relu",
kernel_regularizer=regularizers.l2(0.01),
kernel_initializer=initializers.glorot_normal(seed=None),
)
)
model.add(Dense(198, activation="softmax"))
model.compile(
loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
)
checkpoint = ModelCheckpoint(
filepath, monitor="loss", verbose=1, save_best_only=True, mode="min"
)
reduce_lr = ReduceLROnPlateau(
monitor="val_loss", factor=0.5, patience=2, min_lr=0.000001
)
history = model.fit(
X_train_indices,
Y_train_oh,
batch_size=2048,
epochs=5,
validation_split=0.1,
shuffle=True,
callbacks=[checkpoint, reduce_lr],
)
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_10 (Embedding) (None, 54, 300) 22592100
_________________________________________________________________
lstm_18 (LSTM) (None, 54, 256) 570368
_________________________________________________________________
lstm_19 (LSTM) (None, 54, 256) 525312
_________________________________________________________________
lstm_20 (LSTM) (None, 256) 525312
_________________________________________________________________
dense_7 (Dense) (None, 198) 50886
=================================================================
Total params: 24,263,978
Trainable params: 1,671,878
Non-trainable params: 22,592,100
_________________________________________________________________
None
Train on 177278 samples, validate on 19698 samples
Epoch 1/5
177278/177278 [==============================] - 70s 396us/step - loss: 3.3672 - acc: 8.7433e-04 - val_loss: 0.3103 - val_acc: 0.0000e+00
Epoch 00001: loss improved from inf to 3.36719, saving model to ./checkpoints/model_1/lstm-01-3.367-0.001-0.310-0.000.hdf5
Epoch 2/5
177278/177278 [==============================] - 66s 371us/step - loss: 0.1950 - acc: 2.4820e-04 - val_loss: 0.1616 - val_acc: 0.0016
Epoch 00002: loss improved from 3.36719 to 0.19496, saving model to ./checkpoints/model_1/lstm-02-0.195-0.000-0.162-0.002.hdf5
Epoch 3/5
177278/177278 [==============================] - 66s 370us/step - loss: 0.1583 - acc: 0.0011 - val_loss: 0.1570 - val_acc: 0.0016
Epoch 00003: loss improved from 0.19496 to 0.15826, saving model to ./checkpoints/model_1/lstm-03-0.158-0.001-0.157-0.002.hdf5
Epoch 4/5
177278/177278 [==============================] - 65s 369us/step - loss: 0.1566 - acc: 0.0011 - val_loss: 0.1573 - val_acc: 0.0016
Epoch 00004: loss improved from 0.15826 to 0.15660, saving model to ./checkpoints/model_1/lstm-04-0.157-0.001-0.157-0.002.hdf5
Epoch 5/5
177278/177278 [==============================] - 66s 374us/step - loss: 0.1561 - acc: 0.0011 - val_loss: 0.1607 - val_acc: 0.0016
Epoch 00005: loss improved from 0.15660 to 0.15610, saving model to ./checkpoints/model_1/lstm-05-0.156-0.001-0.161-0.002.hdf5

Multilayer Neural Network - Loss Function is negative and Accuracy (low) remains unchanged

I build a Neural Network with two hidden layer. I Use ReLu activation for two layer and for the last ( out_layer) I use a linear activation function. Input value, x, have 125 columns (feature). The model is:
def multilayer_perceptron():
tf.reset_default_graph()
x = tf.placeholder(tf.float32, shape=[None,train_x.shape[1]])
y = tf.placeholder(tf.float32, shape=[None, 1])
weights = {
'h1': tf.Variable(tf.random_normal([train_x.shape[1], n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, 1]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([1]))
}
# Hidden layer with ReLU
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Hidden layer with ReLU
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
layer_2 = tf.nn.relu(layer_2)
# Output layer with linear activation
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
learning_rate = tf.placeholder(tf.float32)
is_training=tf.Variable(True,dtype=tf.bool)
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=out_layer,logits=y )
cost = tf.reduce_mean(cross_entropy)
with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)
predicted = tf.nn.sigmoid(out_layer)
correct_pred = tf.equal(tf.round(predicted), y)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Export the nodes
export_nodes = ['x', 'y', 'learning_rate','is_training', 'out_layer',
'cost', 'optimizer', 'predicted', 'accuracy']
Graph = namedtuple('Graph', export_nodes)
local_dict = locals()
graph = Graph(*[local_dict[each] for each in export_nodes])
return graph
pred1 = multilayer_perceptron()
The output of Session for define Loss and accuracy for train and valid set is this:
Epoch: 1/25 Train Loss: -219676.2812 Train Acc: 0.1047
....
....
Epoch: 7/25 Train Loss: -46358476.0000 Train Acc: 0.1047
Epoch: 7/25 Validation Loss: -46845576.0000 Validation Acc: 0.1038
Epoch: 7/25 Train Loss: -53906272.0000 Train Acc: 0.1047
Epoch: 7/25 Validation Loss: -54461228.0000 Validation Acc: 0.1038
Epoch: 7/25 Train Loss: -62156116.0000 Train Acc: 0.1047
Epoch: 7/25 Validation Loss: -62784408.0000 Validation Acc: 0.1038
...
...
Epoch: 13/25 Train Loss: -304887584.0000 Train Acc: 0.1047
Epoch: 13/25 Validation Loss: -307502528.0000 Validation Acc: 0.1038
Epoch: 13/25 Train Loss: -327771616.0000 Train Acc: 0.1047
Epoch: 13/25 Validation Loss: -330566752.0000 Validation Acc: 0.1038
Epoch: 13/25 Train Loss: -351641632.0000 Train Acc: 0.1047
Epoch: 13/25 Validation Loss: -354622816.0000 Validation Acc: 0.1038
...
...
Epoch: 25/25 Train Loss: -1748839296.0000 Train Acc: 0.1047
Epoch: 25/25 Validation Loss: -1762260480.0000 Validation Acc: 0.1038
Epoch: 25/25 Train Loss: -1811036800.0000 Train Acc: 0.1047
Epoch: 25/25 Validation Loss: -1824913024.0000 Validation Acc: 0.1038
Epoch: 25/25 Train Loss: -1874472576.0000 Train Acc: 0.1047
Epoch: 25/25 Validation Loss: -1888812416.0000 Validation Acc: 0.1038
'./insurance2.ckpt'
I think that this model didn't learn.
You have inverted the labels and logits parameters of the cross entropy.
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=out_layer,logits=y)
should be
cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits= out_layer)
than your code should work fine

Conv1D and batch_size questions

I face two problems when I implement 1D convnet for multi-channel sequential data.
(224 samples x 300 time sequential x 19 channels)
1) I set batch_size as 7 but it jumps with 5 times of that.
not 7 14 21 28, but 7, 56, 105, 147... what's wrong with mine?
2) when I look at the records of accuracy, it looks like to learn NOTHING.
is it impossible implement classifier for multi-channel sequential data with Conv1D?
If possible can you give me some advice from my code?
#result
x_train shape: (224, 300, 19)
224 train samples
28 test samples
Train on 224 samples, validate on 28 samples
Epoch 1/50
7/224 [..............................] - ETA: 68s - loss: 0.6945 - acc: 0.5714
56/224 [======>.......................] - ETA: 6s - loss: 0.6993 - acc: 0.4464
105/224 [=============>................] - ETA: 2s - loss: 0.6979 - acc: 0.4381
147/224 [==================>...........] - ETA: 1s - loss: 0.6968 - acc: 0.4422
189/224 [========================>.....] - ETA: 0s - loss: 0.6953 - acc: 0.4444
224/224 [==============================] - 2s - loss: 0.6953 - acc: 0.4420 - val_loss: 0.6956 - val_acc: 0.5000
Epoch 2/50
7/224 [..............................] - ETA: 0s - loss: 0.6759 - acc: 0.5714
63/224 [=======>......................] - ETA: 0s - loss: 0.6924 - acc: 0.5556
133/224 [================>.............] - ETA: 0s - loss: 0.6905 - acc: 0.5338
203/224 [==========================>...] - ETA: 0s - loss: 0.6903 - acc: 0.5567
224/224 [==============================] - 0s - loss: 0.6923 - acc: 0.5357 - val_loss: 0.6968 - val_acc: 0.5000
# code
from __future__ import print_function
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Activation
from keras.layers import Conv2D, MaxPooling2D, Conv1D, MaxPooling1D
import numpy as np
batch_size = 7
num_classes = 2
epochs = 50
# input data dimensions : 300 sequential x 19 channels
eeg_rows, num_ch = 300, 19
x_train = np.load('eeg_train.npy')
y_train = np.load('label_train.npy')
x_test = np.load('eeg_test.npy')
y_test = np.load('label_test.npy')
x_valid = np.load('eeg_valid.npy')
y_valid = np.load('label_valid.npy')
x_train = x_train.reshape(x_train.shape[0], eeg_rows, num_ch)
x_test = x_test.reshape(x_test.shape[0], eeg_rows,num_ch)
x_valid = x_valid.reshape(x_valid.shape[0], eeg_rows, num_ch)
input_shape = (eeg_rows, num_ch)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_valid = x_test.astype('float32')
x_train /= 100
x_test /= 100
x_valid /= 100
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# model
conv = Sequential()
conv.add(Conv1D(32, 3, input_shape=input_shape, activation='relu', padding='same'))
conv.add(Conv1D(32, 3, activation='relu', padding='same'))
conv.add(MaxPooling1D(pool_size=2, strides=2, padding='same'))
conv.add(Dropout(0.2))
conv.add(Flatten())
conv.add(Dense(16, activation='relu'))
conv.add(Dropout(0.5))
conv.add(Dense(2, activation='softmax'))
conv.compile(loss='categorical_crossentropy',
optimizer=keras.optimizers.Adam(),
metrics=['accuracy'])
# train
conv.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(x_valid, y_valid))
score = conv.evaluate(x_valid, y_valid, verbose=0)
print(conv.summary())
print(conv.input_shape)
print(conv.output_shape)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Resources