I am new to transfer learning and I need some guideline please:
I would like to use a pretrained model in this link :
(https://drive.google.com/file/d/1_yaxaXEINqAD-jlSqxtz1yIsor_0Lwam/view)
How I can load the pretrained model and fine tuning it by freeze the first layers and then train it again on different data(small data of 1000 image)
update :
I loaded my models and I got this error of size mismatch :
RuntimeError: Error(s) in loading state_dict for Generator: size
mismatch for crop_encoder.bn1.embed.weight: copying a param with shape
torch.Size([6, 128]) from checkpoint, the shape in current model is
torch.Size([7, 128]). size mismatch for
crop_encoder.bn2.embed.weight: copying a param with shape
torch.Size([6, 256]) from checkpoint, the shape in current model is
torch.Size([7, 256]). size mismatch for
crop_encoder.bn3.embed.weight: copying a param with shape
torch.Size([6, 512]) from checkpoint, the shape in current model is
torch.Size([7, 512]). size mismatch for
crop_encoder.bn4.embed.weight: copying a param with shape
torch.Size([6, 1024]) from checkpoint, the shape in current model is
torch.Size([7, 1024]). size mismatch for
crop_encoder.bn5.embed.weight: copying a param with shape
torch.Size([6, 2048]) from checkpoint, the shape in current model is
torch.Size([7, 2048]). size mismatch for
layout_encoder.embedding.weight: copying a param with shape
torch.Size([6, 64]) from checkpoint, the shape in current model is
torch.Size([7, 64]). size mismatch for
layout_encoder.bn1.embed.weight: copying a param with shape
torch.Size([6, 128]) from checkpoint, the shape in current model is
torch.Size([7, 128]). size mismatch for
layout_encoder.bn2.embed.weight: copying a param with shape
torch.Size([6, 256]) from checkpoint, the shape in current model is
torch.Size([7, 256]). size mismatch for
layout_encoder.bn3.embed.weight: copying a param with shape
torch.Size([6, 512]) from checkpoint, the shape in current model is
torch.Size([7, 512]). size mismatch for
layout_encoder.bn4.embed.weight: copying a param with shape
torch.Size([6, 1024]) from checkpoint, the shape in current model is
torch.Size([7, 1024]).
and this is my code :
import torch
import argparse
import torch.nn.functional as F
from tensorboardX import SummaryWriter
from models.generator_128 import Generator
from models.discriminator import ImageDiscriminator
from models.discriminator import ObjectDiscriminator
from models.discriminator import add_sn
from data.coco_custom_mask import get_dataloader as get_dataloader_coco
from data.vg_custom_mask import get_dataloader as get_dataloader_vg
from data.publaynet_custom_mask import get_dataloader as get_dataloader_publaynet
from utils.model_saver import load_model, save_model, prepare_dir
from utils.data import imagenet_deprocess_batch
from utils.miscs import str2bool
import torch.backends.cudnn as cudnn
def main(config):
cudnn.benchmark = True
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
log_save_dir, model_save_dir, sample_save_dir, result_save_dir = prepare_dir(config.exp_name)
if config.dataset == 'publaynet':
data_loader, _ = get_dataloader_publaynet(batch_size=config.batch_size, COCO_DIR=config.coco_dir)
vocab_num = data_loader.dataset.num_objects
assert config.clstm_layers > 0
netG = Generator(num_embeddings=vocab_num,
embedding_dim=config.embedding_dim,
z_dim=config.z_dim,
clstm_layers=config.clstm_layers).to(device)
netD_image = ImageDiscriminator(conv_dim=config.embedding_dim).to(device)
netD_object = ObjectDiscriminator(n_class=vocab_num).to(device)
netD_image = add_sn(netD_image)
netD_object = add_sn(netD_object)
netG_optimizer = torch.optim.Adam(netG.parameters(), config.learning_rate, [0.5, 0.999])
netD_image_optimizer = torch.optim.Adam(netD_image.parameters(), config.learning_rate, [0.5, 0.999])
netD_object_optimizer = torch.optim.Adam(netD_object.parameters(), config.learning_rate, [0.5, 0.999])
print('load model from: {}')
netG.load_state_dict(torch.load("/home/user/PycharmProjects/synth_doc_layout/layout2im/checkpoints/pretrained/iter-300000_netG.pkl"))
netD_image.load_state_dict(torch.load("/home/user/PycharmProjects/synth_doc_layout/layout2im/checkpoints/pretrained/iter-300000_netD_image.pkl"))
netD_object.load_state_dict(torch.load("/home/user/PycharmProjects/synth_doc_layout/layout2im/checkpoints/pretrained/iter-300000_netD_object.pkl"))
data_iter = iter(data_loader)
#if start_iter < config.niter:
if config.use_tensorboard: writer = SummaryWriter(log_save_dir)
for i in range(data_iter, config.niter):
try:
batch = next(data_iter)
except:
data_iter = iter(data_loader)
batch = next(data_iter)
# =================================================================================== #
# 1. Preprocess input data #
# =================================================================================== #
imgs, objs, boxes, masks, obj_to_img = batch
z = torch.randn(objs.size(0), config.z_dim)
imgs, objs, boxes, masks, obj_to_img, z = imgs.to(device), objs.to(device), boxes.to(device), \
masks.to(device), obj_to_img, z.to(device)
# =================================================================================== #
# 2. Train the discriminator #
# =================================================================================== #
# Generate fake image
output = netG(imgs, objs, boxes, masks, obj_to_img, z)
crops_input, crops_input_rec, crops_rand, img_rec, img_rand, mu, logvar, z_rand_rec = output
# Compute image adv loss with fake images.
out_logits = netD_image(img_rec.detach())
d_image_adv_loss_fake_rec = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 0))
out_logits = netD_image(img_rand.detach())
d_image_adv_loss_fake_rand = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 0))
d_image_adv_loss_fake = 0.5 * d_image_adv_loss_fake_rec + 0.5 * d_image_adv_loss_fake_rand
# Compute image src loss with real images rec.
out_logits = netD_image(imgs)
d_image_adv_loss_real = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 1))
# Compute object sn adv loss with fake rec crops
out_logits, _ = netD_object(crops_input_rec.detach(), objs)
g_object_adv_loss_rec = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 0))
# Compute object sn adv loss with fake rand crops
out_logits, _ = netD_object(crops_rand.detach(), objs)
d_object_adv_loss_fake_rand = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 0))
d_object_adv_loss_fake = 0.5 * g_object_adv_loss_rec + 0.5 * d_object_adv_loss_fake_rand
# Compute object sn adv loss with real crops.
out_logits_src, out_logits_cls = netD_object(crops_input.detach(), objs)
d_object_adv_loss_real = F.binary_cross_entropy_with_logits(out_logits_src,
torch.full_like(out_logits_src, 1))
d_object_cls_loss_real = F.cross_entropy(out_logits_cls, objs)
# Backward and optimizloe.
d_loss = 0
d_loss += config.lambda_img_adv * (d_image_adv_loss_fake + d_image_adv_loss_real)
d_loss += config.lambda_obj_adv * (d_object_adv_loss_fake + d_object_adv_loss_real)
d_loss += config.lambda_obj_cls * d_object_cls_loss_real
netD_image.zero_grad()
netD_object.zero_grad()
d_loss.backward()
netD_image_optimizer.step()
netD_object_optimizer.step()
# Logging.
loss = {}
loss['D/loss'] = d_loss.item()
loss['D/image_adv_loss_real'] = d_image_adv_loss_real.item()
loss['D/image_adv_loss_fake'] = d_image_adv_loss_fake.item()
loss['D/object_adv_loss_real'] = d_object_adv_loss_real.item()
loss['D/object_adv_loss_fake'] = d_object_adv_loss_fake.item()
loss['D/object_cls_loss_real'] = d_object_cls_loss_real.item()
# =================================================================================== #
# 3. Train the generator #
# =================================================================================== #
# Generate fake image
output = netG(imgs, objs, boxes, masks, obj_to_img, z)
crops_input, crops_input_rec, crops_rand, img_rec, img_rand, mu, logvar, z_rand_rec = output
# reconstruction loss of ae and img
# g_img_rec_loss = torch.abs(img_rec - imgs).view(imgs.shape[0], -1).mean(1)
g_img_rec_loss = torch.abs(img_rec - imgs).mean()
g_z_rec_loss = torch.abs(z_rand_rec - z).mean()
# kl loss
kl_element = mu.pow(2).add_(logvar.exp()).mul_(-1).add_(1).add_(logvar)
g_kl_loss = torch.sum(kl_element).mul_(-0.5)
# Compute image adv loss with fake images.
out_logits = netD_image(img_rec)
g_image_adv_loss_fake_rec = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 1))
out_logits = netD_image(img_rand)
g_image_adv_loss_fake_rand = F.binary_cross_entropy_with_logits(out_logits, torch.full_like(out_logits, 1))
g_image_adv_loss_fake = 0.5 * g_image_adv_loss_fake_rec + 0.5 * g_image_adv_loss_fake_rand
# Compute object adv loss with fake images.
out_logits_src, out_logits_cls = netD_object(crops_input_rec, objs)
g_object_adv_loss_rec = F.binary_cross_entropy_with_logits(out_logits_src,
torch.full_like(out_logits_src, 1))
g_object_cls_loss_rec = F.cross_entropy(out_logits_cls, objs)
out_logits_src, out_logits_cls = netD_object(crops_rand, objs)
g_object_adv_loss_rand = F.binary_cross_entropy_with_logits(out_logits_src,
torch.full_like(out_logits_src, 1))
g_object_cls_loss_rand = F.cross_entropy(out_logits_cls, objs)
g_object_adv_loss = 0.5 * g_object_adv_loss_rec + 0.5 * g_object_adv_loss_rand
g_object_cls_loss = 0.5 * g_object_cls_loss_rec + 0.5 * g_object_cls_loss_rand
# Backward and optimize.
g_loss = 0
g_loss += config.lambda_img_rec * g_img_rec_loss
g_loss += config.lambda_z_rec * g_z_rec_loss
g_loss += config.lambda_img_adv * g_image_adv_loss_fake
g_loss += config.lambda_obj_adv * g_object_adv_loss
g_loss += config.lambda_obj_cls * g_object_cls_loss
g_loss += config.lambda_kl * g_kl_loss
netG.zero_grad()
g_loss.backward()
netG_optimizer.step()
loss['G/loss'] = g_loss.item()
loss['G/image_adv_loss'] = g_image_adv_loss_fake.item()
loss['G/object_adv_loss'] = g_object_adv_loss.item()
loss['G/object_cls_loss'] = g_object_cls_loss.item()
loss['G/rec_img'] = g_img_rec_loss.item()
loss['G/rec_z'] = g_z_rec_loss.item()
loss['G/kl'] = g_kl_loss.item()
# =================================================================================== #
# 4. Log #
# =================================================================================== #
if (i + 1) % config.log_step == 0:
log = 'iter [{:06d}/{:06d}]'.format(i + 1, config.niter)
for tag, roi_value in loss.items():
log += ", {}: {:.4f}".format(tag, roi_value)
print(log)
if (i + 1) % config.tensorboard_step == 0 and config.use_tensorboard:
for tag, roi_value in loss.items():
writer.add_scalar(tag, roi_value, i + 1)
writer.add_image('Result/crop_real', imagenet_deprocess_batch(crops_input).float() / 255, i + 1)
writer.add_image('Result/crop_real_rec', imagenet_deprocess_batch(crops_input_rec).float() / 255, i + 1)
writer.add_image('Result/crop_rand', imagenet_deprocess_batch(crops_rand).float() / 255, i + 1)
writer.add_image('Result/img_real', imagenet_deprocess_batch(imgs).float() / 255, i + 1)
writer.add_image('Result/img_real_rec', imagenet_deprocess_batch(img_rec).float() / 255, i + 1)
writer.add_image('Result/img_fake_rand', imagenet_deprocess_batch(img_rand).float() / 255, i + 1)
if (i + 1) % config.save_step == 0:
save_model(netG, model_dir=model_save_dir, appendix='netG', iter=i + 1, save_num=5,
save_step=config.save_step)
save_model(netD_image, model_dir=model_save_dir, appendix='netD_image', iter=i + 1, save_num=5,
save_step=config.save_step)
save_model(netD_object, model_dir=model_save_dir, appendix='netD_object', iter=i + 1, save_num=5,
save_step=config.save_step)
if config.use_tensorboard: writer.close()
if __name__ == '__main__':
parser = argparse.ArgumentParser()
# Training configuration
parser.add_argument('--dataset', type=str, default='publaynet')
#parser.add_argument('--vg_dir', type=str, default='datasets/vg')
parser.add_argument('--coco_dir', type=str, default='/home/user/PycharmProjects/synth_doc_layout/layout2im/datasets/annotations/')
parser.add_argument('--batch_size', type=int, default=8)
parser.add_argument('--niter', type=int, default=300000, help='number of training iteration')
parser.add_argument('--image_size', type=int, default=128, help='image size')
parser.add_argument('--object_size', type=int, default=64, help='object size')
parser.add_argument('--embedding_dim', type=int, default=64)
parser.add_argument('--z_dim', type=int, default=64)
parser.add_argument('--learning_rate', type=float, default=1e-4)
parser.add_argument('--resi_num', type=int, default=6)
parser.add_argument('--clstm_layers', type=int, default=3)
# Loss weight
parser.add_argument('--lambda_img_adv', type=float, default=1.0, help='weight of adv img')
parser.add_argument('--lambda_obj_adv', type=float, default=1.0, help='weight of adv obj')
parser.add_argument('--lambda_obj_cls', type=float, default=1.0, help='weight of aux obj')
parser.add_argument('--lambda_z_rec', type=float, default=10.0, help='weight of z rec')
parser.add_argument('--lambda_img_rec', type=float, default=1.0, help='weight of image rec')
parser.add_argument('--lambda_kl', type=float, default=0.01, help='weight of kl')
# Log setting
parser.add_argument('--resume_iter', type=str, default='l',
help='l: from latest; s: from scratch; xxx: from iteration xxx')
parser.add_argument('--log_step', type=int, default=10)
parser.add_argument('--tensorboard_step', type=int, default=100)
parser.add_argument('--save_step', type=int, default=1000)
parser.add_argument('--use_tensorboard', type=str2bool, default='true')
config = parser.parse_args()
config.exp_name = 'layout2im_{}'.format(config.dataset)
print(config)
main(config)
Please How I can solve this issue?
thanks
It bases on your Pytorch model structure. But the key of freezing layer in Pytorch is
param.requires_grad = False
If you want to freeze only some first layers, use counter and set requires_grad = False to that layer
Or you can explicitly do like this
net.fc.weight.requres_grad = False
to freeze fully connected layer of model named net
Related
The proposed method can automatically detect the features of hyperspectral images under the condition determined by the algorithms, and achieve the correct and fast recognition results.
Here I was trying to run the face recognition with using CNN method but then I got the error message as below ---
**
File "<ipython-input-6-fdb29ac830ce>", line 1, in <module>
runfile('C:/Users/MDIC/Desktop/Face Recognition With CNN.py', wdir='C:/Users/MDIC/Desktop')
File "C:\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "C:\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/MDIC/Desktop/Face Recognition With CNN.py", line 221, in <module>
plt.plot(epochs, val_acc)
File "C:\Anaconda3\lib\site-packages\matplotlib\pyplot.py", line 2811, in plot
is not None else {}), **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\__init__.py", line 1810, in inner
return func(ax, *args, **kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\axes\_axes.py", line 1611, in plot
for line in self._get_lines(*args, **kwargs):
File "C:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 393, in _grab_next_args
yield from self._plot_args(this, kwargs)
File "C:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 370, in _plot_args
x, y = self._xy_from_xy(x, y)
File "C:\Anaconda3\lib\site-packages\matplotlib\axes\_base.py", line 231, in _xy_from_xy
"have shapes {} and {}".format(x.shape, y.shape))
ValueError: x and y must have same first dimension, but have shapes (2,) and (1,)
**
This is my coding ---
# Importing libraries
from matplotlib import pyplot as plt
from tensorflow.keras.preprocessing.image import array_to_img, img_to_array, load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
import numpy as np
import os
# Preparing dataset
# Setting names of the directies for both sets
base_dir = 'data'
seta ='Man_One'
setb ='Man_Two'
# Each of the sets has three sub directories train, validation and test
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')
test_dir = os.path.join(base_dir, 'test')
def prepare_data(base_dir, seta, setb):
# Take the directory names for the base directory and both the sets
# Returns the paths for train, validation for each of the sets
seta_train_dir = os.path.join(train_dir, seta)
setb_train_dir = os.path.join(train_dir, setb)
seta_valid_dir = os.path.join(validation_dir, seta)
setb_valid_dir = os.path.join(validation_dir, setb)
seta_train_fnames = os.listdir(seta_train_dir)
setb_train_fnames = os.listdir(setb_train_dir)
return seta_train_dir, setb_train_dir, seta_valid_dir, setb_valid_dir, seta_train_fnames, setb_train_fnames
seta_train_dir, setb_train_dir, seta_valid_dir, setb_valid_dir, seta_train_fnames, setb_train_fnames = prepare_data(base_dir, seta, setb)
seta_test_dir = os.path.join(test_dir, seta)
setb_test_dir = os.path.join(test_dir, setb)
test_fnames_seta = os.listdir(seta_test_dir)
test_fnames_setb = os.listdir(setb_test_dir)
datagen = ImageDataGenerator(
height_shift_range = 0.2,
width_shift_range = 0.2,
rotation_range = 40,
shear_range = 0.2,
zoom_range = 0.2,
horizontal_flip = True,
fill_mode = 'nearest')
img_path = os.path.join(seta_train_dir, seta_train_fnames[3])
img = load_img(img_path, target_size = (150, 150))
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
i = 0
for batch in datagen.flow(x, batch_size = 1):
plt.figure(i)
imgplot = plt.imshow(array_to_img(batch[0]))
i += 1
if i % 5 == 0:
break
# Convolutional Neural Network model
# Import TensorFlow libraries
from tensorflow.keras import layers
from tensorflow.keras import Model
img_input = layers.Input(shape = (150, 150, 3))
# 2D Convolution layer with 64 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(64, 3, activation = 'relu')(img_input)
# 2D max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 128 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(128, 3, activation = 'relu')(x)
# 2D Max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 256 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(256, 3, activation = 'relu')(x)
# 2D Max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 512 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(512, 3, activation = 'relu')(x)
# 2D Max pooling layer
x = layers.MaxPooling2D(2)(x)
# 2D Convolution layer with 512 filters of dimension 3x3 and ReLU activation algorithm
x = layers.Conv2D(512, 3, activation = 'relu')(x)
# Flatten layer
x = layers.Flatten()(x)
# Fully connected layers and ReLU activation algorithm
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dense(1024, activation = 'relu')(x)
x = layers.Dense(1000, activation = 'relu')(x)
# Dropout layers for optimisation
x = layers.Dropout(0.5)(x)
# Fully connected layers and sigmoid activation algorithm
output = layers.Dense(1, activation = 'sigmoid')(x)
model = Model(img_input, output)
model.summary()
import tensorflow as tf
# Using binary_crossentropy as the loss function and
# Adam optimizer as the optimizing function when training
model.compile(loss = 'binary_crossentropy',
optimizer = tf.optimizers.Adam(learning_rate = 0.0005),
metrics = ['acc'])
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# All images will be rescaled by 1./255
train_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)
# Flow training images in batches of 20 using train_datagen generator
train_generator = train_datagen.flow_from_directory(
train_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
validation_generator = test_datagen.flow_from_directory(
validation_dir,
target_size = (150, 150),
batch_size = 20,
class_mode = 'binary')
# 4x4 grid
ncols = 5
nrows = 5
pic_index = 0
# Set up matpotlib fig and size it to fit 5x5 pics
fig = plt.gcf()
fig.set_size_inches(ncols * 5, nrows * 5)
pic_index += 10
next_seta_pix = [os.path.join(seta_train_dir, fname)
for fname in seta_train_fnames[pic_index - 10:pic_index]]
next_setb_pix = [os.path.join(setb_train_dir, fname)
for fname in setb_train_fnames[pic_index - 10:pic_index]]
for i, img_path in enumerate(next_seta_pix + next_setb_pix):
# Set up subplot; subplot indices start at 1
sp = plt.subplot(nrows, ncols, i + 1)
sp.axis('Off')
img = mpimg.imread(img_path)
plt.imshow(img)
plt.show()
# Train the model
mymodel = model.fit_generator(
train_generator,
steps_per_epoch = 10,
epochs = 80,
validation_data = validation_generator,
validation_steps = 7,
verbose = 2)
import random
from tensorflow.keras.preprocessing.image import img_to_array, load_img
successive_outputs = [layer.output for layer in model.layers[1:]]
visualization_model = Model(img_input, successive_outputs)
a_img_files = [os.path.join(seta_train_dir, f) for f in seta_train_fnames]
b_img_files = [os.path.join(setb_train_dir, f) for f in setb_train_fnames]
img_path = random.choice(a_img_files + b_img_files)
img = load_img(img_path, target_size = (150, 150))
x = img_to_array(img)
x = x.reshape((1,) + x.shape)
x /= 255
successive_feature_maps = visualization_model.predict(x)
layer_names = [layer.name for layer in model.layers]
for layer_name, feature_map in zip(layer_names, successive_feature_maps):
if len(feature_map.shape) == 4:
# Just do this for the conv/maxpool layers
n_features = feature_map.shape[-1]
# The feature map has shape(1, size, size, n_features)
size = feature_map.shape[1]
# Will tile images in this matrix
display_grid = np.zeros((size, size * n_features))
for i in range(n_features):
# Postprocess the feature
x = feature_map[0, :, :, i]
x -= x.mean()
x *= 64
x += 128
x = np.clip(x, 0, 255).astype('float32')
# Will tile each filter into this big horizontal grid
display_grid[:, i * size : (i + 1) * size] = x
# Accuracy results for each training and validation epoch
acc = mymodel.history['acc']
val_acc = mymodel.history['val_acc']
# Loss results for each training and validation epoch
loss = mymodel.history['loss']
val_loss = mymodel.history['val_loss']
epochs = range(len(acc))
# Plot accuracy for each training and validation epoch
plt.plot(epochs, acc)
plt.plot(epochs, val_acc)
plt.title('Training and validation accuracy')
plt.legend(['train', 'val'], loc='center')
plt.figure()
# Plot loss for each training and validation epoch
plt.plot(epochs, loss)
plt.plot(epochs, val_loss)
plt.title('Training and validation loss')
plt.legend(['train', 'val'], loc='center')
plt.figure()
# Testing model on a random train image from set a
train_img = random.choice(seta_train_fnames)
train_image_path = os.path.join(seta_train_dir, train_img)
train_img = load_img(train_image_path, target_size = (150, 150))
plt.figure()
plt.imshow(train_img)
train_img = (np.expand_dims(train_img, 0))
train_img = tf.cast(train_img, tf.float32)
print(train_img.shape)
model.predict(train_img)
# Testing model on a random train image from set b
train_img = random.choice(setb_train_fnames)
train_image_path = os.path.join(setb_train_dir, train_img)
train_img = load_img(train_image_path, target_size = (150, 150))
plt.figure()
plt.imshow(train_img)
train_img = (np.expand_dims(train_img, 0))
train_img = tf.cast(train_img, tf.float32)
print(train_img.shape)
model.predict(train_img)
# Testing a random image from the test set a
cal_mo = 0
cal_mt = 0
cal_unconclusive = 0
alist = []
for fname in test_fnames_seta:
if fname.startswith('.'):
continue
file_path = os.path.join(seta_test_dir, fname)
load_file = load_img(file_path, target_size = (150, 150))
load_file = (np.expand_dims(load_file, 0))
load_file = tf.cast(load_file, tf.float32)
pred_img = model.predict(load_file)
if(pred_img[0]<0.5):
cal_mo+=1
elif(pred_img[0]>0.5):
cal_mt+=1
else:
print(pred_img[0], "\n")
cal_unconclusive+=1
alist.append(file_path)
print(alist)
print("Identified as: \n")
print("Man_One:", cal_mo)
print("Man_Two:", cal_mt)
print( "Inconclusive:", cal_unconclusive)
print( "Percentage:", (cal_mo/(cal_mo + cal_mt + cal_unconclusive)) * 100)
a = (cal_mo/(cal_mo + cal_mt + cal_unconclusive)) * 100
# Testing a random image from the test set b
cal_mo = 0
cal_mt = 0
cal_unconclusive = 0
alist = []
for fname in test_fnames_setb:
if fname.startswith('.'):
continue
file_path = os.path.join(setb_test_dir, fname)
load_file = load_img(file_path, target_size = (150, 150))
load_file = (np.expand_dims(load_file, 0))
load_file = tf.cast(load_file, tf.float32)
pred_img = model.predict(load_file)
if(pred_img[0]<0.5):
cal_mo+=1
elif(pred_img[0]>0.5):
cal_mt+=1
else:
print(pred_img[0], "\n")
cal_unconclusive+=1
alist.append(file_path)
print(alist)
print("Identified as: \n")
print("Man_One:", cal_mo)
print("Man_Two:", cal_mt)
print( "Inconclusive:", cal_unconclusive)
print( "Percentage:", (cal_mt/(cal_mo + cal_mt + cal_unconclusive)) * 100)
b = (cal_mt/(cal_mo + cal_mt + cal_unconclusive)) * 100
avg = (a+b)/2
print("Average Percentage:", avg)
Kindly look carefully at the above programming since it is a little bit long
Please help me a soon as possible
Thank you very much
It could be that your validation generated data terminates before reaching the 80 epochs of training. Check that you have at least 7*80 validation images.
Then check the number of elements in your: mymodel.history['val_acc']. It must be the same for training and validation if you use the epochs = range(len(acc)) as your x values for the graphs. The problem is that your acc and val_acc have different number of elements.
I'm trying to create a modified MNIST model which takes input 1x28x28 MNIST tensor images, and it kind of branches into different models with different sized kernels, and accumulates at the end, so as to give a multi-scale-kerneled response in the spatial domain of the images. I'm worried about the model, since, I'm unable to construct it.
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torchvision import datasets, transforms
import torch.nn.functional as F
import timeit
import unittest
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(0)
# check availability of GPU and set the device accordingly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# define a transforms for preparing the dataset
transform = transforms.Compose([
transforms.ToTensor(), # convert the image to a pytorch tensor
transforms.Normalize((0.1307,), (0.3081,)) # normalise the images with mean and std of the dataset
])
# Load the MNIST training, test datasets using `torchvision.datasets.MNIST` using the transform defined above
train_dataset = datasets.MNIST('./data',train=True,transform=transform,download=True)
test_dataset = datasets.MNIST('./data',train=False,transform=transform,download=True)
# create dataloaders for training and test datasets
# use a batch size of 32 and set shuffle=True for the training set
train_dataloader = Data.DataLoader(dataset=train_dataset, batch_size=32, shuffle=True)
test_dataloader = Data.DataLoader(dataset=test_dataset, batch_size=32, shuffle=True)
# My Net
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
# define a conv layer with output channels as 16, kernel size of 3 and stride of 1
self.conv11 = nn.Conv2d(1, 16, 3, 1) # Input = 1x28x28 Output = 16x26x26
self.conv12 = nn.Conv2d(1, 16, 5, 1) # Input = 1x28x28 Output = 16x24x24
self.conv13 = nn.Conv2d(1, 16, 7, 1) # Input = 1x28x28 Output = 16x22x22
# define a conv layer with output channels as 32, kernel size of 3 and stride of 1
self.conv21 = nn.Conv2d(16, 32, 3, 1) # Input = 16x26x26 Output = 32x24x24
self.conv22 = nn.Conv2d(16, 32, 5, 1) # Input = 16x24x24 Output = 32x20x20
self.conv23 = nn.Conv2d(16, 32, 7, 1) # Input = 16x22x22 Output = 32x16x16
# define a conv layer with output channels as 64, kernel size of 3 and stride of 1
self.conv31 = nn.Conv2d(32, 64, 3, 1) # Input = 32x24x24 Output = 64x22x22
self.conv32 = nn.Conv2d(32, 64, 5, 1) # Input = 32x20x20 Output = 64x16x16
self.conv33 = nn.Conv2d(32, 64, 7, 1) # Input = 32x16x16 Output = 64x10x10
# define a max pooling layer with kernel size 2
self.maxpool = nn.MaxPool2d(2), # Output = 64x11x11
# define dropout layer with a probability of 0.25
self.dropout1 = nn.Dropout(0.25)
# define dropout layer with a probability of 0.5
self.dropout2 = nn.Dropout(0.5)
# define a linear(dense) layer with 128 output features
self.fc11 = nn.Linear(64*11*11, 128)
self.fc12 = nn.Linear(64*8*8, 128) # after maxpooling 2x2
self.fc13 = nn.Linear(64*5*5, 128)
# define a linear(dense) layer with output features corresponding to the number of classes in the dataset
self.fc21 = nn.Linear(128, 10)
self.fc22 = nn.Linear(128, 10)
self.fc23 = nn.Linear(128, 10)
self.fc33 = nn.Linear(30,10)
def forward(self, x1):
# Use the layers defined above in a sequential way (folow the same as the layer definitions above) and
# write the forward pass, after each of conv1, conv2, conv3 and fc1 use a relu activation.
x = F.relu(self.conv11(x1))
x = F.relu(self.conv21(x))
x = F.relu(self.maxpool(self.conv31(x)))
#x = torch.flatten(x, 1)
x = x.view(-1,64*11*11)
x = self.dropout1(x)
x = F.relu(self.fc11(x))
x = self.dropout2(x)
x = self.fc21(x)
y = F.relu(self.conv12(x1))
y = F.relu(self.conv22(y))
y = F.relu(self.maxpool(self.conv32(y)))
#x = torch.flatten(x, 1)
y = y.view(-1,64*8*8)
y = self.dropout1(y)
y = F.relu(self.fc12(y))
y = self.dropout2(y)
y = self.fc22(y)
z = F.relu(self.conv13(x1))
z = F.relu(self.conv23(z))
z = F.relu(self.maxpool(self.conv33(z)))
#x = torch.flatten(x, 1)
z = z.view(-1,64*5*5)
z = self.dropout1(z)
z = F.relu(self.fc13(z))
z = self.dropout2(z)
z = self.fc23(z)
out = self.fc33(torch.cat((x, y, z), 0))
output = F.log_softmax(out, dim=1)
return output
import unittest
class TestImplementations(unittest.TestCase):
# Dataloading tests
def test_dataset(self):
self.dataset_classes = ['0 - zero',
'1 - one',
'2 - two',
'3 - three',
'4 - four',
'5 - five',
'6 - six',
'7 - seven',
'8 - eight',
'9 - nine']
self.assertTrue(train_dataset.classes == self.dataset_classes)
self.assertTrue(train_dataset.train == True)
def test_dataloader(self):
self.assertTrue(train_dataloader.batch_size == 32)
self.assertTrue(test_dataloader.batch_size == 32)
def test_total_parameters(self):
model = Net().to(device)
#self.assertTrue(sum(p.numel() for p in model.parameters()) == 1015946)
suite = unittest.TestLoader().loadTestsFromModule(TestImplementations())
unittest.TextTestRunner().run(suite)
def train(model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# send the image, target to the device
data, target = data.to(device), target.to(device)
# flush out the gradients stored in optimizer
optimizer.zero_grad()
# pass the image to the model and assign the output to variable named output
output = model(data)
# calculate the loss (use nll_loss in pytorch)
loss = F.nll_loss(output, target)
# do a backward pass
loss.backward()
# update the weights
optimizer.step()
if batch_idx % 100 == 0:
print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
epoch, batch_idx * len(data), len(train_loader.dataset),
100. * batch_idx / len(train_loader), loss.item()))
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
# send the image, target to the device
data, target = data.to(device), target.to(device)
# pass the image to the model and assign the output to variable named output
output = model(data)
test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
model = Net().to(device)
## Define Adam Optimiser with a learning rate of 0.01
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)
start = timeit.default_timer()
for epoch in range(1, 11):
train(model, device, train_dataloader, optimizer, epoch)
test(model, device, test_dataloader)
stop = timeit.default_timer()
print('Total time taken: {} seconds'.format(int(stop - start)) )
Here is my full code. I couldn't understand what could possibly go wrong...
It is giving
<ipython-input-72-194680537dcc> in forward(self, x1)
46 x = F.relu(self.conv11(x1))
47 x = F.relu(self.conv21(x))
---> 48 x = F.relu(self.maxpool(self.conv31(x)))
49 #x = torch.flatten(x, 1)
50 x = x.view(-1,64*11*11)
TypeError: 'tuple' object is not callable
Error.
P.S.: Pytorch Noob here.
You have mistakenly placed a comma at the end of the line where you define self.maxpool : self.maxpool = nn.MaxPool2d(2), # Output = 64x11x11 see?
This comma makes self.maxpool a tuple instead of a torch.nn.modules.pooling.MaxPool2d. Drop the comma at the end and this error is fixed.
I see you haven't given the stride argument in you definition of self.maxpool = nn.MaxPool2d(2). Choose one: e.g. self.maxpool = nn.MaxPool2d(2, stride = 2).
I'm new to tensorflow and I'm trying it, so if one of you could be able to help me I will really appreciate it.
So I've created a model CNN and train it to classify a series of images in 2 categories, for example, FLOWERS and OTHERS and I think I did a good job for that but if do you have any idea how can I improve this model please let me know.
But my problem is after I train this model, how can I use it to classify just one specific image? I don't want to use baches if is possible. Could anyone give me some advice or examples about it, please?
My Code:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import time
import math
import numpy as np
from PIL import Image
import tensorflow as tf
import os
# Basic model parameters as external flags.
flags = tf.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 1e-4, 'Initial learning rate.')
flags.DEFINE_integer('max_steps', 10000, 'Number of steps to run trainer.')
flags.DEFINE_integer('hidden1', 256, 'Number of units in hidden layer 1.')
flags.DEFINE_integer('hidden2', 64, 'Number of units in hidden layer 2.')
flags.DEFINE_integer('batch_size', 32, 'Batch size. '
'Must divide evenly into the dataset sizes.')
flags.DEFINE_string('train_dir', "ModelData/data", 'Directory to put the training data.')
flags.DEFINE_boolean('fake_data', False, 'If true, uses fake data '
'for unit testing.')
NUM_CLASSES = 2
IMAGE_SIZE = 200
CHANNELS = 3
IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE * CHANNELS
# starter_learning_rate = 0.1
def inference(images, hidden1_units, hidden2_units):
# Hidden 1
with tf.name_scope('hidden1'):
weights = tf.Variable(
tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))),
name='weights')
biases = tf.Variable(tf.zeros([hidden1_units]), name='biases')
hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases)
# Hidden 2
with tf.name_scope('hidden2'):
weights = tf.Variable(
tf.truncated_normal([hidden1_units, hidden2_units], stddev=1.0 / math.sqrt(float(hidden1_units))),
name='weights')
biases = tf.Variable(tf.zeros([hidden2_units]), name='biases')
hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
# Linear
with tf.name_scope('softmax_linear'):
weights = tf.Variable(
tf.truncated_normal([hidden2_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden2_units))),
name='weights')
biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases')
logits = tf.matmul(hidden2, weights) + biases
return logits
def cal_loss(logits, labels):
labels = tf.to_int64(labels)
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy')
loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
return loss
def training(loss, learning_rate):
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
global_step = tf.Variable(0, name='global_step', trainable=False)
train_op = optimizer.minimize(loss, global_step=global_step)
return train_op
def evaluation(logits, labels):
correct = tf.nn.in_top_k(logits, labels, 1)
return tf.reduce_sum(tf.cast(correct, tf.int32))
def placeholder_inputs(batch_size):
images_placeholder = tf.placeholder(tf.float32, shape=(batch_size, IMAGE_PIXELS))
labels_placeholder = tf.placeholder(tf.int32, shape=batch_size)
return images_placeholder, labels_placeholder
def fill_feed_dict(images_feed, labels_feed, images_pl, labels_pl):
feed_dict = {
images_pl: images_feed,
labels_pl: labels_feed,
}
return feed_dict
def do_eval(sess, eval_correct, images_placeholder, labels_placeholder, data_set):
# And run one epoch of eval.
true_count = 0 # Counts the number of correct predictions.
steps_per_epoch = 32 // FLAGS.batch_size
num_examples = steps_per_epoch * FLAGS.batch_size
for step in range(steps_per_epoch):
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
true_count += sess.run(eval_correct, feed_dict=feed_dict)
precision = true_count / num_examples
print(' Num examples: %d Num correct: %d Precision # 1: %0.04f' % (num_examples, true_count, precision))
# Get the sets of images and labels for training, validation, and
def init_training_data_set(dir):
train_images = []
train_labels = []
def GetFoldersList():
mylist = []
filelist = os.listdir(dir)
for name in filelist:
if os.path.isdir(os.path.join(dir, name)):
mylist.append(name)
return mylist
def ReadImagesFromFolder(folder):
fin_dir = os.path.join(dir, folder)
images_name = os.listdir(fin_dir)
images = []
for img_name in images_name:
img_location = os.path.join(dir, folder)
final_loc = os.path.join(img_location, img_name)
try:
hash_folder = int(folder.split("_")[0])
images.append((np.array(Image.open(final_loc).convert('RGB')), hash_folder))
except:
pass
return images
folders = GetFoldersList()
for folder in folders:
for imgs in ReadImagesFromFolder(folder):
train_images.append(imgs[0])
train_labels.append(imgs[1])
return train_images, train_labels
train_images, train_labels = init_training_data_set(os.path.join("FetchData", "Image"))
train_images = np.array(train_images)
train_images = train_images.reshape(len(train_images), IMAGE_PIXELS)
train_labels = np.array(train_labels)
def restore_model_last_version(saver, sess):
def get_biggest_index(folder):
import re
index_vals = []
for file in os.listdir(folder):
split_data = file.split(".")
extension = split_data[len(split_data) - 1]
if extension == "meta":
index = int(re.findall(r"\d+", file)[0])
index_vals.append(index)
index_vals.sort(reverse=True)
if index_vals:
return index_vals[0]
else:
return ""
real_path = os.path.abspath(os.path.split(FLAGS.train_dir)[0])
index = get_biggest_index(real_path)
isdir = os.path.isdir(real_path)
is_empty = True
if isdir:
if os.listdir(real_path):
is_empty = False
if not is_empty:
saver.restore(sess, FLAGS.train_dir + "-" + str(index))
def run_training():
# Tell TensorFlow that the model will be built into the default Graph.
with tf.Graph().as_default():
# Generate placeholders for the images and labels.
images_placeholder, labels_placeholder = placeholder_inputs(len(train_images))
# Build a Graph that computes predictions from the inference model.
logits = inference(images_placeholder, FLAGS.hidden1, FLAGS.hidden2)
# Add to the Graph the Ops for loss calculation.
loss = cal_loss(logits, labels_placeholder)
# Add to the Graph the Ops that calculate and apply gradients.
train_op = training(loss, FLAGS.learning_rate)
# Add the Op to compare the logits to the labels during evaluation.
eval_correct = evaluation(logits, labels_placeholder)
# Create a saver for writing training checkpoints.
saver = tf.train.Saver(save_relative_paths=True)
# Create a session for running Ops on the Graph.
# sess = tf.Session()
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.per_process_gpu_memory_fraction = 0.9
# gpu_options = tf.GPUOptions(allow_growth=True)
# sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
sess = tf.Session(config=config)
# Run the Op to initialize the variables.
# init = train_op.g
init = tf.global_variables_initializer()
sess.run(init)
restore_model_last_version(saver, sess)
# And then after everything is built, start the training loop.
for step in range(FLAGS.max_steps):
start_time = time.time()
feed_dict = fill_feed_dict(train_images, train_labels, images_placeholder, labels_placeholder)
_, loss_value = sess.run([train_op, loss], feed_dict=feed_dict)
duration = time.time() - start_time
if (step) % 1000 == 0 or (step + 1) == FLAGS.max_steps:
print("Current step is: " + str(step))
print("Current los value: " + str(loss_value))
print("Current duration: " + str(duration))
print("\n")
saver.save(sess, save_path=FLAGS.train_dir, global_step=step)
print('Training Data Eval:')
do_eval(sess, eval_correct, images_placeholder, labels_placeholder, train_images)
def main(_):
run_training()
if __name__ == '__main__':
tf.app.run()
So if anyone could help me with this and knows how can I make that evaluation for just one pictures please help me.
Thanks :)
Pretty much every operation in Tensorflow expect you to pass a batched input to make great use of the parallelization capacities of modern GPUs.
Now, if you want to infer on a single image, you simply need to consider this image as a batch of size 1. Here is quick code snippet :
# Load image
img = np.array(Image.open(your_path).convert('RGB'))
# Expand dimensions to simulate a batch of size 1
img = np.expand_dims(img, 0)
...
# Get prediction
pred = sess.run(tf.nn.softmax(logits), {images_placeholder: img})
I am learning CNN, right now, working on deconvolution of the layers. I have begun the process of learning upsampling and observe how convolution layers see the world by generating feature maps from the filters from the source Visualization of the filters of VGG16, with the Source code. I have changed the input and the code is as follows:
import imageio
import numpy as np
import time
from keras.applications import vgg16
from keras import backend as K
import cv2
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
# dimensions of the generated pictures for each filter.
img_width = 128
img_height = 128
# the name of the layer we want to visualize
# (see model definition at keras/applications/vgg16.py)
layer_name = 'block5_conv1'
# util function to convert a tensor into a valid image
def deprocess_image(x):
# normalize tensor: center on 0., ensure std is 0.1
x -= x.mean()
x /= (x.std() + K.epsilon())
x *= 0.1
# clip to [0, 1]
x += 0.5
x = np.clip(x, 0, 1)
# convert to RGB array
x *= 255
if K.image_data_format() == 'channels_first':
x = x.transpose((1, 2, 0))
x = np.clip(x, 0, 255).astype('uint8')
return x
# build the VGG16 network with ImageNet weights
model = vgg16.VGG16(weights='imagenet', include_top=False)
print('Model loaded.')
model.summary()
# this is the placeholder for the input images
input_img = model.input
# get the symbolic outputs of each "key" layer (we gave them unique names).
layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]])
def normalize(x):
# utility function to normalize a tensor by its L2 norm
return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon())
kept_filters = []
for filter_index in range(200):
# we only scan through the first 200 filters,
# but there are actually 512 of them
print('Processing filter %d' % filter_index)
start_time = time.time()
# we build a loss function that maximizes the activation
# of the nth filter of the layer considered
layer_output = layer_dict[layer_name].output
if K.image_data_format() == 'channels_first':
loss = K.mean(layer_output[:, filter_index, :, :])
else:
loss = K.mean(layer_output[:, :, :, filter_index])
# we compute the gradient of the input picture wrt this loss
grads = K.gradients(loss, input_img)[0]
# normalization trick: we normalize the gradient
grads = normalize(grads)
# this function returns the loss and grads given the input picture
iterate = K.function([input_img], [loss, grads])
# step size for gradient ascent
step = 1.
inpImgg = '/home/sanaalamgeer/Downloads/cat.jpeg'
inpImg = mpimg.imread(inpImgg)
inpImg = cv2.resize(inpImg, (img_width, img_height))
# we start from a gray image with some random noise
if K.image_data_format() == 'channels_first':
input_img_data = inpImg.reshape((1, 3, img_width, img_height))
else:
input_img_data = inpImg.reshape((1, img_width, img_height, 3))
input_img_data = (input_img_data - 0.5) * 20 + 128
# we run gradient ascent for 20 steps
for i in range(20):
loss_value, grads_value = iterate([input_img_data])
input_img_data += grads_value * step
print('Current loss value:', loss_value)
if loss_value <= 0.:
# some filters get stuck to 0, we can skip them
break
# decode the resulting input image
if loss_value > 0:
img = deprocess_image(input_img_data[0])
kept_filters.append((img, loss_value))
end_time = time.time()
print('Filter %d processed in %ds' % (filter_index, end_time - start_time))
# we will stich the best 64 filters on a 8 x 8 grid.
n = 8
# the filters that have the highest loss are assumed to be better-looking.
# we will only keep the top 64 filters.
kept_filters.sort(key=lambda x: x[1], reverse=True)
kept_filters = kept_filters[:n * n]
# build a black picture with enough space for
# our 8 x 8 filters of size 128 x 128, with a 5px margin in between
margin = 5
width = n * img_width + (n - 1) * margin
height = n * img_height + (n - 1) * margin
stitched_filters = np.zeros((width, height, 3))
# fill the picture with our saved filters
for i in range(n):
for j in range(n):
img, loss = kept_filters[i * n + j]
stitched_filters[(img_width + margin) * i: (img_width + margin) * i + img_width,
(img_height + margin) * j: (img_height + margin) * j + img_height, :] = img
# save the result to disk
imageio.imwrite('stitched_filters_%dx%d.png' % (n, n), stitched_filters)
The input image I am using is
It is supposed to generate an output with 64 feature maps embedded into one image as shown in Visualization of the filters of VGG16, but it is generating the same input image at each filter,
.
I am confused what's wrong or where I should make changes.
Please help.
What a complex code....
I'd do this:
from keras.applications.vgg16 import preprocess_input
layer_name = 'block5_conv1'
#create a section of the model to output the layer we want
model = vgg16.VGG16(weights='imagenet', include_top=False)
model = Model(model.input, model.get_layer(layer_name).output)
#open and preprocess the cat image
catImage = openTheCatImage(catFile)
catImage = np.expand_dims(catImage,axis=0)
catImage = preprocess_input(catImage)
#get the layer outputs
features = model.predict(catImage)
#plot
for channel in range(features.shape[-1]): #or .shape[1], or up to a limit you like
featureMap = features[:,:,:,channel] #or features[:,channel]
featureMap = deprocess_image(feature_map)[0]
saveOrPlot(featureMap)
I'm working on creating an image classifier that can differentiate between cats and dogs. I have the follwing code:
import cv2
import os
from tqdm import tqdm
import numpy as np
import tensorflow as tf
img_height = 128
img_width = 128
path = "./train"
# class info
file = os.listdir(path)
index = []
images = []
# image size and channels
channels = 3
n_inputs = img_width * img_height * channels
# First convolutional layer
conv1_fmaps = 96 # Number of feature maps created by this layer
conv1_ksize = 4 # kernel size 3x3
conv1_stride = 2
conv1_pad = "SAME"
# Second convolutional layer
conv2_fmaps = 192
conv2_ksize = 4
conv2_stride = 4
conv2_pad = "SAME"
# Third layer is a pooling layer
pool3_fmaps = conv2_fmaps # Isn't it obvious?
n_fc1 = 192 # Total number of output features
n_outputs = 2
with tf.name_scope("inputs"):
X = tf.placeholder(tf.float32, shape=[None, img_width, img_height, channels], name="X")
X_reshaped = tf.reshape(X, shape=[-1, img_height, img_width, channels])
y = tf.placeholder(tf.int32, shape=[None, 2], name="y")
conv1 = tf.layers.conv2d(X_reshaped, filters=conv1_fmaps, kernel_size=conv1_ksize, strides=conv1_stride, padding=conv1_pad, activation=tf.nn.relu, name="conv1")
conv2 = tf.layers.conv2d(conv1, filters=conv2_fmaps, kernel_size=conv2_ksize, strides=conv2_stride, padding=conv2_pad, activation=tf.nn.relu, name="conv2")
n_epochs = 10
batch_size = 250
with tf.name_scope("pool3"):
pool3 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding="VALID")
pool3_flat = tf.reshape(pool3, shape=[-1, pool3_fmaps * 8 * 8])
with tf.name_scope("fc1"):
fc1 = tf.layers.dense(pool3_flat, n_fc1, activation=tf.nn.relu name="fc1")
with tf.name_scope("output"):
logits = tf.layers.dense(fc1, n_outputs, name="output")
Y_proba = tf.nn.softmax(logits, name="Y_proba")
with tf.name_scope("train"):
xentropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y)
loss = tf.reduce_mean(xentropy)
optimizer = tf.train.AdamOptimizer()
training_op = optimizer.minimize(loss)
with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))
init = tf.global_variables_initializer()
with tf.name_scope("init_and_save"):
saver = tf.train.Saver()
def next_batch(num):
index = []
images = []
# Data set Creation
print("Creating batch dataset "+str(num+1)+"...")
for f in tqdm(range(num * batch_size, (num+1)*batch_size)):
if file[f].find("dog"):
index.append(np.array([0, 1]))
else:
index.append(np.array([1, 0]))
image = cv2.imread(path + "/" + file[f])
image = cv2.resize(image, (img_width, img_height), 0, 0, cv2.INTER_LINEAR)
# image = image.astype(np.float32)
images.append(image)
images = np.array(images, dtype=np.uint8)
images = images.astype('float32')
images = images / 255
print("\nBatch "+str(num+1)+" creation finished.")
# print([images, index])
return [images, index]
with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for iteration in range(25000 // batch_size):
X_batch, y_batch = next_batch(iteration)
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch})
print(epoch, "Train accuracy:", acc_train)
save_path = saver.save(sess, "./dogvscat_mnist_model.ckpt")
But I'm getting this error:
ValueError: Rank mismatch: Rank of labels (received 2) should equal rank of logits minus 1 (received 2).
Can anyone point out the problem and help me to solve it. I'm totally new to this.
For tf.nn.sparse_softmax_corss_entropy_with_logits rank(labels) = rank(logits) - 1, so you need to redefine the labels placeholder as follows
...
y = tf.placeholder(tf.int32, shape=[None], name="y")
...
xentropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=y)
...
X_batch, y_batch = next_batch(iteration)
y_batch = np.argmax(y_batch, axis=1)
OR you can you just use tf.nn.softmax_cross_entropy_with_logits without changing labels placeholder.
xentropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits,labels=y)