I'm trying to generate three channel IUV image from detectrone2 densepose as:
IUV image needed
But instead, I receive this image:
Final UV map
I've used the code from Is there a way to obtain IUV map from image in tensorflow? and modified it to the following code:
from PIL import Image
import pickle
import numpy as np
import copy
from sys import argv
import sys
pklpath = '/home/haiho/Dropbox/vid2vid_folder/pose_datase/train/densepose-pkl/frame0.pkl'
with open(pklpath, "rb") as hFile:
dp_frame = pickle.load(hFile)[0]
if len(dp_frame['scores']) == 0:
print(f'{pklpath} does not contain persons')
instance_id = 0
bbox = np.array(dp_frame['pred_boxes_XYXY'][instance_id])
result = dp_frame['pred_densepose'][instance_id]
uv = np.array(result.uv.detach().cpu()*255, dtype=np.uint8)
labels = np.array(result.labels.detach().cpu(),dtype=np.uint8)
# 2=Labels
c,h,w = uv.shape
x1, y1, x2, y2 = round(bbox[0]), round(bbox[1]), round(bbox[0]+w), round(bbox[1]+h)
print(x1, y1, x2, y2)
canvas_size = (1080, 1920, 3)
canvas = np.zeros(canvas_size, dtype=np.uint8)
iuv = np.stack((uv[1,:,:], uv[0,:,:], labels))
iuv = np.transpose(iuv, (1,2,0))
canvas[y1:y2, x1:x2, :] = iuv
import cv2
import numpy as np
from boxdetect import config
from boxdetect.pipelines import get_boxes
import matplotlib.pyplot as plt
image1 = cv2.imread('kyc_sample1.jpg')
img = cv2.cvtColor(image1, cv2.COLOR_BGR2GRAY)
ret, thresh4 = cv2.threshold(img, 120, 255, cv2.THRESH_TOZERO)
cv2.imshow('Kyc', thresh4)
cfg = config.PipelinesConfig()
cfg.width_range = (30,55)
cfg.height_range = (25,40)
cfg.scaling_factors = [0.7]
cfg.wh_ratio_range = (0.5, 1.7)
cfg.group_size_range = (2, 100)
cfg.dilation_iterations = 0
rects, grouping_rects, image, output_image = get_boxes(
thresh4, cfg=cfg, plot=False)
Required solution:
Name: Mr William Smith Jons , PAN: 577634563744
Is there any solution to detect the text from the box and extract the text based on the detection of boxes. I have used boxdetection method in python to detect the text form the form
I've been trying to save the final transform of the PyWavelets transform, but it only saves the total figure. Does anyone have any recommendations
I get 4 transforms and the end goal is to convert the transforms into bytes. Which I have already solved but I just need a way to straight up save this image as a jpeg or png.
import numpy as np
import matplotlib.pyplot as plt
import cv2
from matplotlib.pyplot import imread
import pywt
import colorspacious
from matplotlib import cm
from colorspacious import cspace_converter
import os
import sys
from PIL import Image
## Load Images
A = imread('gabieureka.jpg')
originalA = np.mean(A, -1)
B = imread('votetemi.jpg')
originalB = np.mean(B, -1)
## Transform A into wavelets
titles = ['Approximation', ' Horizontal detail',
'Vertical detail', 'Diagonal detail']
coeffs1 = pywt.dwt2(originalA, 'bior1.3')
LL, (LH, HL, HH) = coeffs1
fig = plt.figure(figsize=(12, 3))
for i, a in enumerate([LL, LH, HL, HH]):
ax = fig.add_subplot(1, 4, i + 1)
ax.imshow(a, interpolation="nearest", cmap=plt.cm.gray)
ax.set_title(titles[i], fontsize=10)
#with open('gabieureka.jpg', 'rb') as f:
# byte_A = f.read(HH)
titles = ['Approximation', ' Horizontal detail',
'Vertical detail', 'Diagonal detail']
coeffs2 = pywt.dwt2(originalB, 'bior1.3')
LL, (LH, HL, HH) = coeffs2
fig = plt.figure(figsize=(12, 3))
for i, a in enumerate([LL, LH, HL, HH]):
ax = fig.add_subplot(1, 4, i + 1)
ax.imshow(a, interpolation="nearest", cmap=plt.cm.gray)
ax.set_title(titles[i], fontsize=10)
waveGabi = imageio.imwrite("Wavelett_Compressed_" + A, A)
waveGabi.save("Wavelett_Compressed_" + A)
I would like to plot a heatmap where the input data is not in the typical rectangularly spaced grid. Here is some sample data:
import numpy as np
xmin = 6
xmax= 12
ymin = 0
x = np.linspace(xmin, xmax, 100)
ymax = x**2
final = []
for i in range(len(ymax)):
yrange = np.linspace(0, ymax[i], 100)
for j in range(len(yrange)):
intensity = np.random.rand()
final.append([x[i], yrange[j], intensity])
data_for_plotting = np.asarray(final) # (10000, 3) shaped array
I would like to plot intensity (in the colorbar) as a function of (x,y) which represents the position and I would like to do this without interpolation.
Here is my solution which uses matplotlib's griddata and linear interpolation.
import matplotlib.pyplot as plt
from matplotlib.mlab import griddata
total_length = 100
x1 = np.linspace(min(data_for_plotting[:,0]), max(data_for_plotting[:,0]), total_length)
y1 = np.linspace(min(data_for_plotting[:,1]), max(data_for_plotting[:,1]), total_length)
z1 = griddata(data_for_plotting[:,0], data_for_plotting[:,1], data_for_plotting[:,2], x1, y1, interp='linear')
p=plt.pcolormesh(x1, y1, z1, vmin = 0. , vmax=1.0, cmap='viridis')
clb = plt.colorbar(p)
I am looking for an alternate solution without interpolation as I would like to see the smallest unit of measurement in my x and y position (pixel size/rectangle). Based on the sample data given above I expect the height of the pixel to increase for large values of x.
I'm unsure what matplotlib.mlab.griddata is about. Maybe some very old version?
You could use scipy.interpolate.griddata which needs its parameters in a slightly different format. method='nearest' switches off the interpolation (default method='linear').
Here is how it could look with your test data (see griddata's documentation for more explanation and examples):
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
import numpy as np
xmin = 6
xmax = 12
ymin = 0
x = np.linspace(xmin, xmax, 100)
ymax = x ** 2
final = []
for i in range(len(ymax)):
yrange = np.linspace(0, ymax[i], 100)
for j in range(len(yrange)):
intensity = np.random.rand()
final.append([x[i], yrange[j], intensity])
data_for_plotting = np.asarray(final) # (10000, 3) shaped array
total_length = 100
x1 = np.linspace(min(data_for_plotting[:, 0]), max(data_for_plotting[:, 0]), total_length)
y1 = np.linspace(min(data_for_plotting[:, 1]), max(data_for_plotting[:, 1]), total_length)
grid_x, grid_y = np.meshgrid(x1, y1)
z1 = griddata(data_for_plotting[:, :2], data_for_plotting[:, 2], (grid_x, grid_y), method='nearest')
img = plt.imshow(z1, extent=[x1[0], x1[-1], y1[0], y1[-1]], origin='lower',
vmin=0, vmax=1, cmap='inferno', aspect='auto')
cbar = plt.colorbar(img)
An alernative, is to create one rectangle for each of the prolonged pixels. Beware that this can be a rather slow operation. If really needed, one could create a pcolormesh for each column.
import matplotlib.pyplot as plt
from matplotlib.cm import ScalarMappable
import numpy as np
# ... create x and data_for_plotting as before
fig, ax = plt.subplots()
cmap = plt.get_cmap('inferno')
norm = plt.Normalize(0, 1)
x_step = x[1] - x[0]
y_step = 0
for i, (xi, yi, intensity_i) in enumerate(data_for_plotting):
if i + 1 < len(data_for_plotting) and data_for_plotting[i + 1, 0] == xi: # when False, the last y_step is reused
y_step = data_for_plotting[i + 1, 1] - yi
ax.add_artist(plt.Rectangle((xi, yi), x_step, y_step, color=cmap(norm(intensity_i))))
cbar = plt.colorbar(ScalarMappable(cmap=cmap, norm=norm))
ax.set_xlim(x[0], x[-1])
ax.set_ylim(0, data_for_plotting[:, 1].max())
After applying mask original image
import cv2
import dlib
import numpy as np
img = cv2.imread("Aayush.jpg")
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
msk = np.zeros_like(img_gray)
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")
faces = detector(img_gray)
for face in faces:
landmarks = predictor(img_gray, face)
lp = []
for n in range(0,68):
x = landmarks.part(n).x
y = landmarks.part(n).y
p = np.array(lp, np.int32)
#cv2.circle(img, (x,y), 3, (0, 0, 255), -1)
convexhull = cv2.convexHull(p)
#cv2.polylines(img, [convexhull], True, (255,0,0), 3)
cv2.fillConvexPoly(msk, convexhull, 255)
img1 = cv2.bitwise_and(img, img, mask = msk)
img1 containsa complete black image with face cut from img, I just require the pixel values of face portion and not complete image
As original image and mask have not been provided in the question itself. I am assuming a simple input image and a mask image with circular cavity as:
The mask here is a single channel matrix with a value of 255 in the central cavity. To get the pixel info inside the cavity only you can use following numpy operation:
pixel_info = original_image[mask == 255]
# You may need to convert the numpy array to Python list.
pixel_info_list = pixel_info.tolist()
Now you may serialize the list to any format you want (csv in this case.)
Full code:
import cv2
import numpy as np
original_image = cv2.imread("/path/to/lena.png")
mask = np.zeros(original_image.shape[:2], dtype=original_image.dtype)
mask = cv2.circle(mask, (256, 256), 100, [255], -1)
pixel_info = original_image[mask == 255]
pixel_info_list = pixel_info.tolist()
I have been dealing with a project on ML in which we are willing to build an offline application. So we are not using the API's for this project, instead we two models. One for Object classification and the other for Gender classification & emotion recognition. Now I have problem with integrating the two models into one. Both the models are in OpenCV.
Code for deep_learning_object_detecti*on
# python deep_learning_object_detection.py --image images/example_01.jpg \
#--prototxt MobileNetSSD_deploy.prototxt.txt --model MobileNetSSD_deploy.caffemodel
# import the necessary packages
import numpy as np
import argparse
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-i", "--image", required=True,
help="path to input image")
ap.add_argument("-p", "--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("-m", "--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
args = vars(ap.parse_args())
# initialize the list of class labels MobileNet SSD was trained to
# detect, then generate a set of bounding box colors for each class
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# load the input image and construct an input blob for the image
# by resizing to a fixed 300x300 pixels and then normalizing it
# (note: normalization is done via the authors of the MobileNet SSD
# implementation)
image = cv2.imread(args["image"])
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 0.007843, (300, 300), 127.5)
# pass the blob through the network and obtain the detections and
# predictions
print("[INFO] computing object detections...")
detections = net.forward()
# loop over the detections
for i in np.arange(0, detections.shape[2]):
# extract the confidence (i.e., probability) associated with the
# prediction
confidence = detections[0, 0, i, 2]
# filter out weak detections by ensuring the `confidence` is
# greater than the minimum confidence
if confidence > args["confidence"]:
# extract the index of the class label from the `detections`,
# then compute the (x, y)-coordinates of the bounding box for
# the object
idx = int(detections[0, 0, i, 1])
box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
(startX, startY, endX, endY) = box.astype("int")
# display the prediction
label = "{}: {:.2f}%".format(CLASSES[idx], confidence * 100)
print("[INFO] {}".format(label))
cv2.rectangle(image, (startX, startY), (endX, endY),
COLORS[idx], 2)
y = startY - 15 if startY - 15 > 15 else startY + 15
cv2.putText(image, label, (startX, y),
# show the output image
cv2.imshow("Output", image)
Code for Gender and emotion recognition
import sys
import argparse
import cv2
from keras.models import load_model
import numpy as np
from utils.datasets import get_labels
from utils.inference import detect_faces
from utils.inference import draw_text
from utils.inference import draw_bounding_box
from utils.inference import apply_offsets
from utils.inference import load_detection_model
from utils.inference import load_image
from utils.preprocessor import preprocess_input
# parameters for loading data and images
image_path = sys.argv[1]
detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml'
emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5'
gender_model_path = '../trained_models/gender_models/simple_CNN.81-0.96.hdf5'
emotion_labels = get_labels('fer2013')
gender_labels = get_labels('imdb')
# hyper-parameters for bounding boxes shape
gender_offsets = (30, 60)
gender_offsets = (10, 10)
emotion_offsets = (20, 40)
emotion_offsets = (0, 0)
# loading models
face_detection = load_detection_model(detection_model_path)
emotion_classifier = load_model(emotion_model_path, compile=False)
gender_classifier = load_model(gender_model_path, compile=False)
# getting input model shapes for inference
emotion_target_size = emotion_classifier.input_shape[1:3]
gender_target_size = gender_classifier.input_shape[1:3]
# loading images
rgb_image = load_image(image_path, grayscale=False)
gray_image = load_image(image_path, grayscale=True)
gray_image = np.squeeze(gray_image)
gray_image = gray_image.astype('uint8')
faces = detect_faces(face_detection, gray_image)
for face_coordinates in faces:
x1, x2, y1, y2 = apply_offsets(face_coordinates, gender_offsets)
rgb_face = rgb_image[y1:y2, x1:x2]
x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets)
gray_face = gray_image[y1:y2, x1:x2]
rgb_face = cv2.resize(rgb_face, (gender_target_size))
gray_face = cv2.resize(gray_face, (emotion_target_size))
rgb_face = preprocess_input(rgb_face, False)
rgb_face = np.expand_dims(rgb_face, 0)
gender_prediction = gender_classifier.predict(rgb_face)
gender_label_arg = np.argmax(gender_prediction)
gender_text = gender_labels[gender_label_arg]
gray_face = preprocess_input(gray_face, True)
gray_face = np.expand_dims(gray_face, 0)
gray_face = np.expand_dims(gray_face, -1)
emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face))
emotion_text = emotion_labels[emotion_label_arg]
if gender_text == gender_labels[0]:
color = (0, 0, 255)
color = (255, 0, 0)
draw_bounding_box(face_coordinates, rgb_image, color)
draw_text(face_coordinates, rgb_image, gender_text, color, 0, -20, 1, 2)
draw_text(face_coordinates, rgb_image, emotion_text, color, 0, -50, 1, 2)
bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR)
cv2.imwrite('../images/predicted_test_image.png', bgr_image)
How to integrate these two models into a single model.
Thanks in advance.