I'm loading a dataset of images with image_dataset_from_directory and it gives me a PrefetchDataset with my images and their associated label one-hot encoded.
In order to build a binary image classifier, I want to transform my PrefetchDataset labels to know if an image is a photo or somethings else.
Here's how I wrote it:
batch_size = 32
img_height = 250
img_width = 250
train_ds = image_dataset_from_directory(
data_dir,
validation_split=0.2,
color_mode="rgb",
subset="training",
seed=69,
crop_to_aspect_ratio=False,
image_size=(img_height, img_width),
batch_size=batch_size)
class_names = train_ds.class_names
# ['Painting', 'Photo', 'Schematics', 'Sketch', 'Text'] in my case
# Convert label to 1 is a photo or else 0
i = 1 # class_names.index('Photo')
def is_photo(batch):
for images, labels in batch:
bool_labels = tf.constant([int(l == 1) for l in labels],
dtype=np.int32)
labels = bool_labels
return batch
new_train_ds = train_ds.apply(is_photo)
My problem is that the new_train_ds doesn't defers from train_ds which leads me to thinks there must be an issue with the apply method.
I also checked bool_labels and it works just fine.
Does anyone have an idea on how to solve this issue.
Maybe try something like this:
train_ds = train_ds.map(lambda x, y: (x, tf.cast(y == 1, dtype=tf.int64)))
Related
I have no coding experience, new to python.
task: use cnn to do image binary classification
problem: memory error
# data is confidential, image example is pasted. [enter image description here][1]
# two classes of images: 294 images for class 'e'; 5057 images for class 'l'. Given datasets were imbalanced, the original plan was set batch_size=500 in datagen.flow_from_directory for each class. So, in every batch, the whole dataset of class 'e' and 500 images of class 'l' were fed to the model. However, google colab keeps crashing out of ram. Batch_size was downgraded to 50, still failed.
# x=image data; y=label; bs=batch_size
bs = 50
def generate_batch_data_random(x, y, bs):
ylen = len(y)
loopcount = ylen // bs
while (True):
i = random.randint(0,loopcount)
yield x[i * bs:(i + 1) * bs], y[i * bs:(i + 1) * bs]
def train_and_validate_model(model, x, y):
(trainX, testX, trainY, testY) = train_test_split(x, y, test_size=0.25, random_state=6)
trainY = to_categorical(trainY, num_classes=2)
testY = to_categorical(testY, num_classes=2)
logger = CSVLogger(kfold_train_and_validate, append=True)
H = model.fit_generator(generator=generate_batch_data_random(trainX, trainY, bs),
steps_per_epoch= len(trainX) / bs,
epochs=10,
validation_data=generate_batch_data_random(testX, testY, bs), validation_steps= len(testX) /bs,
callbacks=[checkpoint])
return H,testX,testY
# use imagedatagenerator to save memory. Split groups seem more appropriate than fixed training and validate groups. So dataset structure was built based on the classes of images (one folder of images for each class), not the training and validation groups. The plan was to use imagedatagenerator to send images in batches, then use kfold to split each batch into training and validate groups.
path = '/content/drive/MyDrive/er_lr/erlr_vs_er'
datagen = ImageDataGenerator(rescale=1./255)
data_e = datagen.flow_from_directory(directory=path,
target_size=(128,128),
classes='e',
batch_size=50,
class_mode='categorical')
x_e, y_e = next(data_e)
data_l = datagen.flow_from_directory(directory=path,
classes='l',
target_size=(128,128),
batch_size=50,
class_mode='categorical')
x_l, y_l = next(data_l)
for i in range(0,len(y_e)):
y_e[i] = 0
for j in range(0,len(y_l)):
y_l[j] = 1
x = []
y = []
x.extend(np.array(data_e)[0][0])
x.extend(np.array(data_l)[0][0])
y.extend(np.array(y_e))
y.extend(np.array(y_l))
seed = 10
np.random.seed(seed)
filepath = '/content/drive/MyDrive/er_lr/hdf5/my_best_model.epoch{epoch:02d}-loss{val_loss:.2f}.hdf5'
fold = 1
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
for train, test in kfold.split(x, y):
model = create_model()
checkpoint = keras.callbacks.ModelCheckpoint(filepath,
monitor='val_loss', save_weights_only=True,verbose=1,save_best_only=True, save_freq='epoch', period=1)
H,validationX,validationY=train_and_validate_model(model, x[train], y[train])
training_ACCs.append(H.history['accuracy'])
training_loses.append(H.history['loss'])
val_ACCs.append(H.history['val_accuracy'])
val_loses.append(H.history['val_loss'])
labels_test_cat = to_categorical(y[test], num_classes=2)
scores = model.evaluate(x[test], labels_test_cat, verbose=0)
fold = fold + 1
crashed in google colab repeatedly for out of ram. batch_size of 50 and np.shape of (128, 128, 3) of each image do not seem large-scaled.
Any thoughts?
[1]: https://i.stack.imgur.com/Lp1H9.png
Basically this code allowed me to achieve is randomly applying image augmentation to my training samples in tfrecords. The following code will treat each batch (32 pics) the same: either flip/rotation, cutout or do nothing. But I would like to apply the image_aug within each batch such that each batch contains 25%, 25% and 50% of above mentioned transformed image.
Here is my image augmentation and parse function for tfrecords
def decode_image(image_data, shape):
image = tf.io.decode_png(image_data, channels=shape[-1])
image = tf.cast(image, tf.float32) / 255.0 # convert image to floats in [0, 1] range
image = tf.reshape(image, shape) # explicit size needed for TPU
return image
def image_aug(image):
random_num = np.random.rand()
if random_num < 0.25:
data_augmentation = keras.Sequential([
keras.layers.RandomFlip("horizontal_and_vertical"),
keras.layers.RandomRotation(0.2),
])
image = data_augmentation(tf.expand_dims(image, axis=0))
elif random_num < 0.5:
image = tfa.image.random_cutout(
# image,
tf.expand_dims(image, axis=0),
mask_size = (100, 100),
constant_values = 1
)
return tf.squeeze(image)
def parse_example(serialized, shape, data_aug=False):
features = {'image': tf.io.FixedLenFeature([], tf.string),
'label': tf.io.FixedLenFeature([], tf.int64)
}
# Parse the serialized data so we get a dict with our data.
parsed_example = tf.io.parse_single_example(serialized=serialized, features=features)
image_raw = parsed_example['image'] # Get the image as raw bytes.
image = decode_image(image_raw, shape) # Decode the raw bytes so it becomes a tensor with type.
# label = tf.io.decode_raw(parsed_example['label'], tf.uint8)
# label = tf.cast(parsed_example['label'], tf.int64)
if data_aug:
# image = image.numpy()
# for i in range(len(image)):
image = image_aug(image)
return image, tf.cast(parsed_example['label'], tf.float32)
The call of tf.dataset looks like this:
train_dataset = tf.data.TFRecordDataset(np.asarray(train_val)[tr_idx])
train_dataset = train_dataset.map(partial(parse_example, data_aug = True, shape=IMAGE_SIZE)).cache().shuffle(2048).prefetch(AUTOTUNE).batch(BATCH_SIZE).repeat(NUM_EPOCHS)
Any ideas will be appreciated!
First, I made a custom dataset to load in images from my dataframe (containing the image filepath and corresponding int label):
class Dataset(torch.utils.data.Dataset):
def __init__(self, dataframe, transform=None):
self.frame = dataframe
self.transform = transform
def __len__(self):
return len(self.frame)
def __getitem__(self, idx):
if torch.is_tensor(idx):
idx = idx.tolist()
filename = self.frame.iloc[idx, 0]
image = torch.from_numpy(io.imread(filename).transpose((2, 0, 1))).float()
label = self.frame.iloc[idx, 1]
sample = {'image': image, 'label': label}
if self.transform:
sample = self.transform(sample)
return sample
Then, I use pre-existing model architecture like so:
model = models.densenet161()
num_ftrs = model.classifier.in_features
model.classifier = nn.Linear(num_ftrs, 10) # where 10 is my number of classes
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
Finally, for training, I do the following:
model.train() # switch to train mode
for epoch in range(5):
for i, sample in enumerate(train_set): # where train_set is an instance of my Dataset class
optimizer.zero_grad()
image, label = sample['image'].unsqueeze(0), torch.Tensor(sample['label']).long()
output = model(image)
loss = criterion(output, label)
loss.backward()
optimizer.step()
However, I am experiencing errors with loss = criterion(output, label). It tells me that ValueError: Expected input batch_size (1) to match target batch_size (2).. Can someone teach me how to properly use a custom dataset, especially with loading in batches of data? Also, why am I experiencing that ValueError? Thank you!
please check the following lines:
label = self.frame.iloc[idx, 1] in dataset defination, you may print this to re-check, is this return two int
image, label = sample['image'].unsqueeze(0), torch.Tensor(sample['label']).long() in training code, you need to check the shape of the tensor
I want to create my own custom DataGenerator on my own dataset. I have read all the images and stored the locations and their labels in two variables named images and labels. I have written this custom generator:
def data_gen(img_folder, y, batch_size):
c = 0
n_image = list(np.arange(0,len(img_folder),1)) #List of training images
random.shuffle(n_image)
while (True):
img = np.zeros((batch_size, 224, 224, 3)).astype('float') #Create zero arrays to store the batches of training images
label = np.zeros((batch_size)).astype('float') #Create zero arrays to store the batches of label images
for i in range(c, c+batch_size): #initially from 0 to 16, c = 0.
train_img = imread(img_folder[n_image[i]])
# row,col= train_img.shape
train_img = cv2.resize(train_img, (224,224), interpolation = cv2.INTER_LANCZOS4)
train_img = train_img.reshape(224, 224, 3)
# binary_img = binary_img[:,:128//2]
img[i-c] = train_img #add to array - img[0], img[1], and so on.
label[i-c] = y[n_image[i]]
c+=batch_size
if(c+batch_size>=len((img_folder))):
c=0
random.shuffle(n_image)
# print "randomizing again"
yield img, label
What I want to know is how can I add other augmentations like flip, crop, rotate to this generator? Moreover, how should I yield these augmentations so that they are linked with the correct label.
Please let me know.
You can add flip, crop, rotate on train_img before putting it into the img. That is,
# ....
While(True):
# ....
# add your data augmentation function here
train_img = data_augmentor(train_img)
img[i-c] = train_img
# ....
I'm working on a waste/garbage detector for a personal project. I rely on Tensorflow (in Python 3) to train my own dataset.
I have a script that creates and trains a model from scratch. Then, I freeze the checkpoints to get a PB file for detection.
The code I have for the detection (found here) requires two files to work: the previous PB file and a labelmap.txt.
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_CKPT = 'frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = 'label_map.pbtxt'
I know how a labelmap.txt looks like and it is quite simple to write it myself actually, but I don't know how to generate it because it links each class to an ID and the ID is unknown to me.
I tried to search on the Internet, when people mention the labelmap.txt, it involves Tfrecords. However, I don't use Tf records for my project, I extract each region of interest and save them in subfolders, one subfolder for a class (can, bottle...).
As I am new to Tensorflow, I may have misunderstood something in the training process. Do you have any lead so I can see if my model is accurate by testing it ? I can provide some codes if you need it.
Thanking you in advance,
The labelmap.pbtxt file maps the IDs used internally in the network to the label names. You cannot simply generate one after training. You need to make sure to use same ID-label mapping was used during training or you might get incorrect results.
If you use the training instructions for the tensorflow object_detection model then you will have generated this labelmap-file at some point and you can just re-use it.
Check out the steps you used to train the network or post them here.
Before training, I gathered and labelled thousands of images, extracted each labelled area, resized each of them and, according to their classes, I splitted them in different folders.
There are several files involved in the training step. I originally retrieved the code from this repository and added the possibility to resume training.
trainer.py
import os
import tensorflow as tf
import model_architecture
from utils import utils
from build_model import model_tools
# Images directory.
data_path = os.path.join('dataset' + os.sep)# contains subfolders, one per item
all_classes = os.listdir(data_path)
number_of_classes = len(all_classes)
# Images dimensions.
height = 64
width = 64
# Checkpoints directory.
output_dir = os.path.join(os.pardir + os.sep, 'checkpoints' + os.sep)
model_pattern = 'model.ckpt'
model_base_path = os.path.join(output_dir, model_pattern)
meta_file_path = model_base_path + '.meta'
# Training params.
color_channels = 3
start = 0
epochs = 5
batch_size = 10
batch_counter = 0
# Create Placeholders for images and labels.
images_ph = tf.placeholder(tf.float32, shape=[None, height, width, color_channels])
labels_ph = tf.placeholder(tf.float32, shape=[None, number_of_classes])
def trainer(network, number_of_images):
cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits=network, labels=labels_ph)
cost = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer().minimize(cost)
tf.summary.scalar('cost', cost)
tf.add_to_collection('optimizer', optimizer)
global_step = tf.Variable(0, name='global_step', trainable=False)
saver = tf.train.Saver()
# Launch the graph in a session
with tf.Session() as sess:
# Initialize all variables.
tf.global_variables_initializer().run()
# Read checkpoints directory.
ckpt = tf.train.get_checkpoint_state(output_dir)
if ckpt and ckpt.model_checkpoint_path:
saver.restore(sess, ckpt.model_checkpoint_path)
print('Reloading existing model.')
else:
init = tf.global_variables_initializer()
sess.run(init)
print('Creating a new model.')
# Get last epoch index.
start = global_step.eval()
writer = tf.summary.FileWriter(output_dir, graph=tf.get_default_graph())
merged = tf.summary.merge_all()
saver = tf.train.Saver(write_version=tf.train.SaverDef.V2, max_to_keep=5)
counter = 0
# Training.
for epoch in range(start, epochs):
tools = utils()
for batch in range(int(number_of_images / batch_size)):
counter += 1
images, labels = tools.batch_dispatch()
if images is None:
break
loss, summary = sess.run([cost, merged], feed_dict={images_ph: images, labels_ph: labels})
sess.run(optimizer, feed_dict={images_ph: images, labels_ph: labels})
print('Epoch number {epoch} batch {batch} complete - loss {loss}'.format(
epoch=epoch, batch=batch, loss=loss))
writer.add_summary(summary, counter)
global_step.assign(epoch).eval()
# Save progression.
saver.save(sess, model_base_path, global_step=epoch)
# Main program.
if __name__ == '__main__':
tools = utils()
model = model_tools()
network = model_architecture.generate_model(images_ph, number_of_classes)
number_of_images = sum([len(files) for r, d, files in os.walk('dataset')])
trainer(network, number_of_images)
model_tools.py
class model_tools:
def add_weights(self, shape):
return tf.Variable(tf.truncated_normal(shape=shape, stddev=0.05))
def add_biases(self, shape):
return tf.Variable(tf.constant(0.05, shape=shape))
def conv_layer(self, layer, kernel, input_shape, output_shape, stride_size):
weights = self.add_weights([kernel, kernel, input_shape, output_shape])
biases = self.add_biases([output_shape])
stride = [1, stride_size, stride_size, 1]
layer = tf.nn.conv2d(layer, weights, strides=stride, padding='SAME') + biases
return layer
def pooling_layer(self, layer, kernel_size, stride_size):
kernel = [1, kernel_size, kernel_size, 1]
stride = [1, stride_size, stride_size, 1]
return tf.nn.max_pool(layer, ksize=kernel, strides=stride, padding='SAME')
def flattening_layer(self, layer):
input_size = layer.get_shape().as_list()
new_size = input_size[-1] * input_size[-2] * input_size[-3]
return tf.reshape(layer, [-1, new_size]), new_size
def fully_connected_layer(self, layer, input_shape, output_shape):
weights = self.add_weights([input_shape, output_shape])
biases = self.add_biases([output_shape])
layer = tf.matmul(layer, weights) + biases
return layer
def activation_layer(self, layer):
return tf.nn.relu(layer)
utils.py
import cv2
import random
class utils:
image_count = []
count_buffer = []
class_buffer = all_classes[:]
def __init__(self):
self.image_count = []
self.count_buffer = []
for i in os.walk(data_path):
if len(i[2]):
self.image_count.append(len(i[2]))
self.count_buffer = self.image_count[:]
def batch_dispatch(self, batch_size=batch_size):
global batch_counter
if sum(self.count_buffer):
class_name = random.choice(self.class_buffer)
choice_index = all_classes.index(class_name)
choice_count = self.count_buffer[choice_index]
if choice_count == 0:
class_name = all_classes[self.count_buffer.index(max(self.count_buffer))]
choice_index = all_classes.index(class_name)
choice_count = self.count_buffer[choice_index]
slicer = batch_size if batch_size < choice_count else choice_count
img_ind = self.image_count[choice_index] - choice_count
indices = [img_ind, img_ind + slicer]
images = self.generate_images(class_name, indices)
labels = self.generate_labels(class_name, slicer)
self.count_buffer[choice_index] = self.count_buffer[choice_index] - slicer
else:
images, labels = (None,) * 2
return images, labels
def generate_labels(self, class_name, number_of_samples):
one_hot_labels = [0] * number_of_classes
one_hot_labels[all_classes.index(class_name)] = 1
one_hot_labels = [one_hot_labels] * number_of_samples
return one_hot_labels
def generate_images(self, class_name, indices):
batch_images = []
choice_folder = os.path.join(data_path, class_name)
selected_images = os.listdir(choice_folder)[indices[0]:indices[1]]
for image in selected_images:
img = cv2.imread(os.path.join(choice_folder, image))
batch_images.append(img)
return batch_images
model_architecture.py contains the structure of the 3 layered Image classifier.
When I run trainer.py, I get a checkpoints folder filled with meta and index files. It seems correct.
About exporting the model, I'm embarrassed as I don't know what to give as parameter for the pipeline config path.
python3 export_inference_graph.py \
--input_type image_tensor \
--trained_checkpoint_prefix "/home/user/model/model.ckpt-4" \
--pipeline_config_path ???? \
--output_directory /home/user/exports/
To get the PB file, I used this:
checkpoint_location = 'checkpoints/model.ckpt-0'
export_dir = 'frozen/'
loaded_graph = tf.Graph()
with tf.Session(graph=loaded_graph) as sess:
loader = tf.train.import_meta_graph(checkpoint_location+ '.meta')
loader.restore(sess, checkpoint_location)
builder = tf.saved_model.builder.SavedModelBuilder(export_dir)
builder.add_meta_graph_and_variables(sess,
[tf.saved_model.tag_constants.TRAINING],
strip_default_attrs=True)
builder.add_meta_graph([tf.saved_model.tag_constants.SERVING], strip_default_attrs=True)
builder.save()
It creates a save_model.pb file but not a labelmap.pbtxt.
Should I completely change the way I train my model ?