Problem with adding smiles on photos with convolutional autoencoder

Problem with adding smiles on photos with convolutional autoencoder - pytorch

I have a dataset with images and another dataset as it's description:
There are a lot of pictures: people with and without sunglasses, smiles and other attributes. What I want to do is be able to add smiles to photos where people are not smiling.
I've started like this:
smile_ids = attrs['Smiling'].sort_values(ascending=False).iloc[100:125].index.values
smile_data = data[smile_ids]
no_smile_ids = attrs['Smiling'].sort_values(ascending=True).head(5).index.values
no_smile_data = data[no_smile_ids]
eyeglasses_ids = attrs['Eyeglasses'].sort_values(ascending=False).head(25).index.values
eyeglasses_data = data[eyeglasses_ids]
sunglasses_ids = attrs['Sunglasses'].sort_values(ascending=False).head(5).index.values
sunglasses_data = data[sunglasses_ids]
When I print them their are fine:
plot_gallery(smile_data, IMAGE_H, IMAGE_W, n_row=5, n_col=5, with_title=True, titles=smile_ids)
Plot gallery looks like this:
def plot_gallery(images, h, w, n_row=3, n_col=6, with_title=False, titles=[]):
plt.figure(figsize=(1.5 * n_col, 1.7 * n_row))
plt.subplots_adjust(bottom=0, left=.01, right=.99, top=.90, hspace=.35)
for i in range(n_row * n_col):
plt.subplot(n_row, n_col, i + 1)
try:
plt.imshow(images[i].reshape((h, w, 3)), cmap=plt.cm.gray, vmin=-1, vmax=1, interpolation='nearest')
if with_title:
plt.title(titles[i])
plt.xticks(())
plt.yticks(())
except:
pass
Then I do:
def to_latent(pic):
with torch.no_grad():
inputs = torch.FloatTensor(pic.reshape(-1, 45*45*3))
inputs = inputs.to('cpu')
autoencoder.eval()
output = autoencoder.encode(inputs)
return output
def from_latent(vec):
with torch.no_grad():
inputs = vec.to('cpu')
autoencoder.eval()
output = autoencoder.decode(inputs)
return output
After that:
smile_latent = to_latent(smile_data).mean(axis=0)
no_smile_latent = to_latent(no_smile_data).mean(axis=0)
sunglasses_latent = to_latent(sunglasses_data).mean(axis=0)
smile_vec = smile_latent-no_smile_latent
sunglasses_vec = sunglasses_latent - smile_latent
And finally:
def add_smile(ids):
for id in ids:
pic = data[id:id+1]
latent_vec = to_latent(pic)
latent_vec[0] += smile_vec
pic_output = from_latent(latent_vec)
pic_output = pic_output.view(-1,45,45,3).cpu()
plot_gallery([pic,pic_output], IMAGE_H, IMAGE_W, n_row=1, n_col=2)
def add_sunglasses(ids):
for id in ids:
pic = data[id:id+1]
latent_vec = to_latent(pic)
latent_vec[0] += sunglasses_vec
pic_output = from_latent(latent_vec)
pic_output = pic_output.view(-1,45,45,3).cpu()
plot_gallery([pic,pic_output], IMAGE_H, IMAGE_W, n_row=1, n_col=2)
But when I execute this line I don't get any faces:
add_smile(no_smile_ids)
The output:
Could someone please explain where is my mistake or why it can happen? Thanks for any help.
Added: checking the shape of pic_output:

Wild guess, but it seems you are broadcasting your images instead of permuting the axes. The former will have the undesired effect of mixing information across the batches/channels.
pic_output = pic_output.view(-1, 45, 45, 3).cpu()
should be replaced with
pic_output = pic_output.permute(0, 2, 3, 1).cpu()
Assuming tensor pic_output is already shaped like (-1, 3, 45, 45).

Related

Why are albumentations Augmentations (Yolo / YoloV5) altering Bounding Boxes if no augmentations are being placed?

I was using the Albumentations library in order to perform some data augmentations on an object detection dataset that I intended to train a YoloV5 model on.
I have to perform the augmentations seperately and save the images locally to disk, but when I do I noticed that some of the output bounding boxes returned aren't generating properly.
I have my augmentations set up in a seperate aug.py file, shown below (augmentations purposefully removed in debugging attempts, see below) -
import albumentations as A
import cv2
PROB = 0.5
bbp = A.BboxParams(format="yolo")
horizontal_flip_transform = A.Compose([
], bbox_params = bbp)
vertical_flip_transform = A.Compose([
], bbp)
pixel_dropout_transform = A.Compose([
], bbox_params = bbp)
random_rotate = A.Compose([
], bbox_params = bbp )
#NOTE: THIS METHOD IMPLIES THAT THE IMAGE WIDTHS MUST BE AT LEAST 50 PIXELS
#Remove this aug to remove this constraint
random_crop = A.Compose([
], bbox_params = bbp)
augs = [horizontal_flip_transform, vertical_flip_transform, pixel_dropout_transform, random_rotate, random_crop]
def get_augmentations():
return augs
And the relevant parts of my implementation for performing the augmentations and saving them to disk is below:
def run_augments_on_image(img_name, bboxes, max_images_to_generate = 500):
ret = []
img = np.array(Image.open(img_name), dtype=np.uint8)
transforms = get_augmentations()
for i in range(min(len(transforms), max_images_to_generate)):
transformed = transforms[i](image=img, bboxes = bboxes)
ret.append((transformed["image"] , transformed["bboxes"]))
return ret
def run_and_save_augments_on_image_sets(batch_img_names, bboxes_urls, max_images_to_generate, dataset_dir, trainval):
num_images = 0
for i in range(len(batch_img_names)):
bboxes = []
with open(os.path.join(dataset_dir, trainval, 'labels', bboxes_urls[i]), 'r') as f:
for row in f:
x = row.strip().split(' ')
x.append(row[0])
x.pop(0)
x[0] = float(x[0])
x[1] = float(x[1])
x[2] = float(x[2])
x[3] = float(x[3])
bboxes.append(x)
trans = run_augments_on_image(os.path.join(dataset_dir, trainval, 'images', batch_img_names[i]), bboxes)
img_index = len(os.listdir(os.path.join(dataset_dir, 'train' , 'images'))) + len(os.listdir(os.path.join(dataset_dir, 'valid', 'images'))) + 1
for j in range(len(trans)):
img_trans, bboxes_trans = trans[j]
p = Image.fromarray(img_trans).save(os.path.join(dataset_dir, trainval, 'images' , f'image-{img_index}.{batch_img_names[j].split(".")[-1]}'))
with open(os.path.join(dataset_dir, trainval, 'labels', f'image-{img_index}.txt'), 'w') as f:
for boxs in bboxes_trans:
print(f'{boxs[-1]} {boxs[0]} {boxs[1]} {boxs[2]} {boxs[3]}', file=f)
num_images += 1
img_index += 1
if num_images >= max_images_to_generate:
break
if num_images >= max_images_to_generate:
break
For testing purposes (some of the bounding boxes were off), I removed all the actual augmentations, expecting the input image label (one augmented image example shown below) to be equal to augmented label since there were no augmentations. But, as you can see, the two labels are different.
img-original.txt
0 0.5662285714285714 0.2740066225165563 0.5297714285714286 0.4837913907284769
img-augmented.txt
0 0.51488 0.47173333333333334 0.6405099999999999 0.6527333333333334
(The labels above are in normalized xywh YOLO format)
Why is albumentations altering the labels? None of the augmentations in augs.py contain anything.

How to apply a function to convert the paths to arrays using cv2 in tensorflow data pipeline?

Any help will be highly appreciated
I'm trying to load two lists containing image paths and their corresponding labels. Something like this:
p0 = ['a','b',....] #paths to images .tif format
p1 = [1,2,3,......] #paths to images .tif format
labels = [0,1,1,...] #corresponding labels w.r.t both the lists
I used a tf.data in the following way:
def TFData(p_0, p_1, batch_size, labels=None, is_train=True):
dset = tf.data.Dataset.from_tensor_slices((p_0,p_1))
if labels is not None:
label = tf.data.Dataset.from_tensor_slices(labels)
AUTO = tf.data.experimental.AUTOTUNE
final_dset = tf.data.Dataset.zip((dset, label))
final_dset = final_dset.batch(batch_size, drop_remainder=is_train).prefetch(AUTO)
return final_dset
This returns:
<PrefetchDataset shapes: (((64,), (64,)), (64,)), types: ((tf.string, tf.string), tf.int32)>
My question is how to apply a function to convert the paths to arrays using cv2 as the images are .tif files? such that the result will be:
<PrefetchDataset shapes: (((64,256,256,3), (64,256,256,3)), (64,)), types: ((tf.float64, tf.float64), tf.int32)>
I'm using a dataset.map. However it's throwing error:
def to_array(p_0):
im_1 = cv2.imread(p_0,1)
#im = tfio.experimental.image.decode_tiff(paths)
im_1 = cv2.resize(im_1,(img_w,img_h)) #img_w=img_h=256
im_1 = np.asarray(im_1, dtype=np.float64)
im_1 /= 255
return im_1
def parse_fn(p_0):
[p_0,] = tf.py_function(to_array, [p_0], [tf.float64])
return p_0
def TFData(p_0, p_1, batch_size, labels=None, is_train=True):
dset_1 = tf.data.Dataset.from_tensor_slices(p_0)
dset_1 = dset_1.map(parse_fn)
dset_2 = tf.data.Dataset.from_tensor_slices(p_1)
dset_2 = dset_2.map(parse_fn)
if labels is not None:
label = tf.data.Dataset.from_tensor_slices(labels)
AUTO = tf.data.experimental.AUTOTUNE
final_dset = tf.data.Dataset.zip((dset_1, dset_2, label))
final_dset = final_dset.batch(batch_size, drop_remainder=is_train).prefetch(AUTO)
return final_dset
print(train_data) #where train_data is defined as TFData()
<PrefetchDataset shapes: ((<unknown>, <unknown>), (64,)), types: ((tf.float64, tf.float64), tf.int32)>
This throws an error:
for (t,p),l in train_data.as_numpy_iterator():
print(t)
print(p)
print(l)
print(type(t))
break
SystemError: <built-in function imread> returned NULL without setting an error
[[{{node EagerPyFunc}}]] [Op:IteratorGetNext]
Any help will be highly appreciated

I think your problem is in cv2.imread.
Have you checked outside the functions to see if it is reading and plotting the data accordingly?
Please, try with -1 instead:
im_1 = cv2.imread(p_0,-1)

How to read and display an image from a .rec file

I am using im2rec.py tool to first generate lst files and then to generate .rec and .idx files as following:
BASE_DIR = './'
IMAGES_DIR = os.path.join(BASE_DIR,'IMAGES')
DATASET_DIR = os.path.join(BASE_DIR,'Dataset')
TRAIN_RATIO = 0.8
TEST_DATA_RATIO = 0.1
Dataset_lst_file = os.path.join(DATASET_DIR,"dataset")
!python $BASE_DIR/tools/im2rec.py --list --recursive --test-ratio=$TEST_DATA_RATIO --train-ratio=$TRAIN_RATIO $Dataset_lst_file $IMAGES_DIR
!python $BASE_DIR/tools/im2rec.py --resize 224 --center-crop --num-thread 4 $Dataset_lst_file $IMAGES_DIR
I am successfully generating .lst, .rec and .idx files. However, my doubt is how can I read a specific image from the .rec file and plot it. For instance, to know if the images were recorded ok or just to explore my dataset.
------------Update----------
I was able to plot as following:
#https://mxnet.apache.org/versions/1.5.0/tutorials/basic/data.html
data_iter = mx.image.ImageIter(batch_size=4, data_shape=(3, 224, 224),
path_imgrec=Dataset_lst_file+'_train.rec',
path_imgidx=Dataset_lst_file+'_train.idx')
data_iter.reset()
for j in range(4):
batch = data_iter.next()
data = batch.data[0]
#print(batch)
label = batch.label[0].asnumpy()
for i in range(4):
ax = plt.subplot(1,4,i+1)
plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1,2,0)))
ax.set_title('class: ' + str(label[i]))
plt.axis('off')
plt.show()

This tutorial includes an example of image visualization from a .rec file: https://gluon-cv.mxnet.io/build/examples_detection/finetune_detection.html
dataset = gcv.data.RecordFileDetection('pikachu_train.rec')
classes = ['pikachu'] # only one foreground class here
image, label = dataset[0]
print('label:', label)
# display image and label
ax = viz.plot_bbox(image, bboxes=label[:, :4], labels=label[:, 4:5], class_names=classes)
plt.show()

For completeness to previous answer. This will display images to screen using plot_bbox or render images to a folder.
Usage :
dumpRecordFileDetection('./data/val.rec', False, True, classes, ctx)
def dumpRecordFileDetection(record_filename, display_ui, output_to_directory, classes, ctx):
"""Dump RecordFileDetection to screen or a directory"""
if isinstance(ctx, mx.Context):
ctx = [ctx]
dataset = gcv.data.RecordFileDetection(record_filename)
print('images:', len(dataset))
image, label = dataset[0]
bboxes=label[:, :4]
labels=label[:, 4:5]
print(image.shape, label.shape)
print('labeldata:', label)
print('bboxes:', bboxes)
print('labels:', labels)
image_dump_dir = os.path.join("./dump")
if not os.path.exists(image_dump_dir):
os.makedirs(image_dump_dir)
for i, batch in enumerate(dataset):
size = len(batch)
image, label = batch
print(image.shape, label.shape)
bboxes = label[:, :4]
labels = label[:, 4:5].astype(np.uint8)
if output_to_directory:
file_path = os.path.join("./dump", "{0}_.png".format(i))
# Format (c x H x W)
img = image.asnumpy().astype(np.uint8)
for box, lbl, in zip(bboxes, labels):
cv2.rectangle(img,(box[0], box[1]),(box[2], box[3]),(0, 0, 255), 2)
txt = "{0}".format(classes[lbl[0]])
cv2.putText(img,txt,(box[0], box[1]), cv2.FONT_HERSHEY_PLAIN,1,(0,255,0),1,cv2.LINE_AA, False)
cv2.imwrite(file_path, img)
if display_ui:
ax = viz.plot_bbox(image, bboxes=bboxes, labels=labels, class_names=classes)
plt.show()

getting TypeError: Expected int32, got None of type 'NoneType' instead

I have implemented sequence to sequence model with attention layer if I 300000 data points I'm not getting any error if I use all of my data points I'm getting following error model.fit
TypeError: Expected int32, got None of type 'NoneType' instead.
what would be the reason for this?
the code before model.fit is
class encoder_decoder(tf.keras.Model):
def __init__(self,embedding_size,encoder_inputs_length,output_length,vocab_size,output_vocab_size,score_fun,units):
super(encoder_decoder,self).__init__()
self.vocab_size = vocab_size
self.enc_units = units
self.embedding_size = embedding_size
self.encoder_inputs_length = encoder_inputs_length
self.output_length = output_length
self.lstm_output = 0
self.state_h = 0
self.state_c = 0
self.output_vocab_size = output_vocab_size
self.dec_units = units
self.score_fun = score_fun
self.att_units = units
self.encoder=Encoder(self.vocab_size,self.embedding_size,self.enc_units,self.encoder_inputs_length)
self.decoder = Decoder(self.output_vocab_size, self.embedding_size, self.output_length, self.dec_units ,self.score_fun ,self.att_units)
# self.dense = Dense(self.output_vocab_size,activation = "softmax")
def call(self,data):
input,output = data[0],data[1]
encoder_hidden = self.encoder.initialize_states(input.shape[0])
encoder_output,encoder_hidden,encoder_cell = self.encoder(input,encoder_hidden)
decoder_hidden = encoder_hidden
decoder_cell =encoder_cell
decoder_output = self.decoder(output,encoder_output,decoder_hidden,decoder_cell)
return decoder_output
Inside the call function I'm initializing states for the encoder where I'm getting
the number of rows from input using the following line of code
encoder_hidden = self.encoder.initialize_states(input.shape[0])
If I print input, I'm getting shape as (None,55)
That's the reason I'm getting this error.
Here my total number data points is 330614 when I use all my data I getting this
error, when I use only 330000 data points I'm getting this error,
if I print batch inside def method I'm getting shape as (64,55)
Please find my below code for creating dataset for my sequence to sequence model
the function to reprocess the data and the function to create the dataset
and a function the load the dataset
def preprocess_sentence(w):
# w = unicode_to_ascii(w.lower().strip())
w = re.sub(r"([?.!,¿])", r" \1 ", w)
w = re.sub(r'[" "]+', " ", w)
w = re.sub(r"[^a-zA-Z?.!,¿]+", " ", w)
w = w.strip()
w = '<start> ' + w + ' <end>'
return w
def create_dataset(path, num_examples):
lines = io.open(path, encoding='UTF-8').read().strip().split('\n')
# lines1 = lines[330000:]
# lines = lines[0:323386]+lines1
word_pairs = [[preprocess_sentence(w) for w in l.split('\t')] for l in lines[:num_examples]]
word_pairs = [[i[0],i[1]] for i in word_pairs]
return zip(*word_pairs)
def tokenize(lang):
lang_tokenizer = tf.keras.preprocessing.text.Tokenizer(
filters='')
lang_tokenizer.fit_on_texts(lang)
tensor = lang_tokenizer.texts_to_sequences(lang)
tensor = tf.keras.preprocessing.sequence.pad_sequences(tensor,padding='post')
return tensor, lang_tokenizer
def load_dataset(path, num_examples=None):
# creating cleaned input, output pairs
targ_lang, inp_lang = create_dataset(path, num_examples)
input_tensor, inp_lang_tokenizer = tokenize(inp_lang)
target_tensor, targ_lang_tokenizer = tokenize(targ_lang)
return input_tensor, target_tensor, inp_lang_tokenizer, targ_lang_tokenizer,targ_lang,inp_lang
# Try experimenting with the size of that dataset
num_examples = None
input_tensor, target_tensor, inp_lang, targ_lang,targ_lang_text,inp_lang_text = load_dataset(path, num_examples)
# Calculate max_length of the target tensors
max_length_targ, max_length_inp = target_tensor.shape[1], input_tensor.shape[1]
max_length_targ,max_length_inp
input_tensor_train, input_tensor_val, target_tensor_train, target_tensor_val = train_test_split(input_tensor, target_tensor, test_size=0.2)
the shape of datasets as follows
shape of input train (269291, 55)
shape of target train (269291, 53)
shape of input test (67323, 55)
shape of target test (67323, 53)

You can share the code block before the model.fit.
NoneType error is indicating that the final array which is passed to the model is for some reason empty. You can add print statements at previous steps to understand where along the way your array became empty.
Compare the scenario to the case where you are taking all your data points so that you can understand where the array is changing and how it is handled prior to passing it through model.fit.

Bokeh – ColumnDataSource not updating whiskered-plot

I’m having issues with the following code (I’ve cut out large pieces but I can add them back in – these seemed like the important parts). In my main code, I set up a plot (“sectionizePlot”) which is a simple variation on another whiskered-plot
I’m looking to update them on the fly. In the same script, I’m using a heatmap (“ModifiedGenericHeatMap”) which updates fine.
Any ideas how I might update my whiskered-plot? Updating the ColumnDataSource doesn’t seem to work (which makes sense). I’m guessing that I am running into issues with adding each circle/point individually onto the plot.
One idea would be to clear the plot each time and manually add the points onto the plot, but it would need to be cleared each time, which I’m unsure of how to do.
Any help would be appreciated. I’m just a lowly Scientist trying to utilize Bokeh in Pharma research.
def ModifiedgenericHeatMap(source, maxPct):
colors = ["#75968f", "#a5bab7", "#c9d9d3", "#e2e2e2", "#dfccce", "#ddb7b1", "#cc7878", "#933b41", "#550b1d"]
#mapper = LinearColorMapper(palette=colors, low=0, high=data['count'].max())
mapper = LinearColorMapper(palette=colors, low=0, high=maxPct)
TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom"
globalDist = figure(title="derp",
x_range=cols, y_range=list(reversed(rows)),
x_axis_location="above", plot_width=1000, plot_height=400,
tools=TOOLS, toolbar_location='below')
globalDist.grid.grid_line_color = None
globalDist.axis.axis_line_color = None
globalDist.axis.major_tick_line_color = None
globalDist.axis.major_label_text_font_size = "5pt"
globalDist.axis.major_label_standoff = 0
globalDist.xaxis.major_label_orientation = pi / 3
globalDist.rect(x="cols", y="rows", width=1, height=1,
source=source,
fill_color={'field': 'count', 'transform': mapper},
line_color=None)
color_bar = ColorBar(color_mapper=mapper, major_label_text_font_size="5pt",
ticker=BasicTicker(desired_num_ticks=len(colors)),
# fix this via using a formatter with accounts for
formatter=PrintfTickFormatter(format="%d%%"),
label_standoff=6, border_line_color=None, location=(0, 0))
text_props = {"source": source, "text_align": "left", "text_baseline": "middle"}
x = dodge("cols", -0.4, range=globalDist.x_range)
r = globalDist.text(x=x, y=dodge("rows", 0.3, range=globalDist.y_range), text="count", **text_props)
r.glyph.text_font_size = "8pt"
globalDist.add_layout(color_bar, 'right')
globalDist.select_one(HoverTool).tooltips = [
('Well:', '#rows #cols'),
('Count:', '#count'),
]
return globalDist
def sectionizePlot(source, source_error, type, base):
print("sectionize plot created with typ: " + type)
colors = []
for x in range(0, len(base)):
colors.append(getRandomColor())
title = type + "-wise Intensity Distribution"
p = figure(plot_width=600, plot_height=300, title=title)
p.add_layout(
Whisker(source=source_error, base="base", upper="upper", lower="lower"))
for i, sec in enumerate(source.data['base']):
p.circle(x=source_error.data["base"][i], y=sec, color=colors[i])
p.xaxis.axis_label = type
p.yaxis.axis_label = "Intensity"
if (type.split()[-1] == "Row"):
print("hit a row")
conv = dict(enumerate(list("nABCDEFGHIJKLMNOP")))
conv.pop(0)
p.xaxis.major_label_overrides = conv
p.xaxis.ticker = SingleIntervalTicker(interval=1)
return p
famData = dict()
e1FractSource = ColumnDataSource(dict(count=[], cols=[], rows=[], index=[]))
e1Fract = ModifiedgenericHeatMap(e1FractSource, 100)
rowSectTotSource = ColumnDataSource(data=dict(base=[]))
rowSectTotSource_error = ColumnDataSource(data=dict(base=[], lower=[], upper=[]))
rowSectPlot_tot = sectionizePlot(rowSectTotSource,rowSectTotSource_error, "eSum Row", rowBase)
def update(selected=None):
global famData
famData = getFAMData(file_source_dt1, True)
global e1Stack
e1Fract = (famData['e1Sub'] / famData['eSum']) * 100
e1Stack = e1Fract.stack(dropna=False).reset_index()
e1Stack.columns = ["rows", "cols", "count"]
e1Stack['count'] = e1Stack['count'].apply(lambda x: round(x, 1))
e1FractSource.data = dict(cols=e1Stack["cols"], count=(e1Stack["count"]),
rows=e1Stack["rows"], index=e1Stack.index.values, codon=wells, )
rowData, colData = sectionize(famData['eSum'], rows, cols)
rowData_lower, rowData_upper = getLowerUpper(rowData)
rowBase = list(range(1, 17))
rowSectTotSource_error.data = dict(base=rowBase, lower=rowData_lower, upper=rowData_upper, )
rowSectTotSource.data = dict(base=rowData)
rowSectPlot_tot.title.text = "plot changed in update"
layout = column(e1FractSource, rowSectPlot_tot)
update()
curdoc().add_root(layout)
curdoc().title = "Specs"
print("ok")

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Problem with adding smiles on photos with convolutional autoencoder - pytorch

Related

Why are albumentations Augmentations (Yolo / YoloV5) altering Bounding Boxes if no augmentations are being placed?

How to apply a function to convert the paths to arrays using cv2 in tensorflow data pipeline?

How to read and display an image from a .rec file

getting TypeError: Expected int32, got None of type 'NoneType' instead

Bokeh – ColumnDataSource not updating whiskered-plot

Categories

Resources