Related
I'm trying to paste a picture on the face detected through a haarcascade.
While I was writing the code, I came across this error..
error: (-215:Assertion failed) (mtype == CV_8U || mtype == CV_8S) && _mask.sameSize(*psrc1) in function 'cv::binary_op'
At first I thought the problem was the size of the mask and the roi wasn't same, so I refered to a same problem on stackoverflow and tried implementing the same on my code and then change it accordingly to my requirements. My aim to paste the desired picture on the roi (which is face in my case), but still the problem persists.
Here is the code..
import cv2
import numpy as np
import imutils
cap=cv2.VideoCapture(0)
classifier= cv2.CascadeClassifier('D:\\Downloads\\Computer-Vision-Tutorial-master\\Computer-Vision-Tutorial-master\\Haarcascades\\haarcascade_frontalface_default.xml')
img=cv2.imread("D:\\Downloads\\anonymous_face_mask.jpg")
rows,cols,channels=img.shape
while(cap.isOpened()):
img=cv2.imread("D:\\Downloads\\anonymous_face_mask.jpg")
_,frame=cap.read()
gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
img_gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,original_mask=cv2.threshold(img_gray,100,255,cv2.THRESH_BINARY_INV)
original_mask_inv=cv2.bitwise_not(original_mask)
#cv2.imshow("mask_inv",mask_inv)
face=classifier.detectMultiScale(gray,1.4,5)
for x,y,w,h in face:
face_w = w
face_h = h
face_x1 = x
face_x2 = face_x1 + face_h
face_y1 = y
face_y2 = face_y1 + face_h
img_width=3*face_w
img_height=int(img_width*rows/cols)
img_x1=face_x2-int(face_w/2)-int(img_width/2)
img_x2=face_x1+img_width
img_y1=face_y2+5
img_y2=img_y1+img_height
if img_x1<0:
img_x1=0
if img_y1<0:
img_y1=0
if img_x2<cols:
img_x2=cols
if img_y2<rows:
img_y2=rows
imgwidth=img_x2-img_x1
imgheight=img_y2-img_y1
if imgwidth<0 or imgheight<0:
continue
image=cv2.resize(img,(imgwidth,imgheight),cv2.INTER_AREA)
mask=cv2.resize(original_mask,(imgwidth,imgheight),cv2.INTER_AREA)
mask_inv=cv2.resize(original_mask_inv,(imgwidth,imgheight),cv2.INTER_AREA)
roi=frame[img_y1:img_y2,img_x1:img_x2]
frame_bg=cv2.bitwise_and(roi,roi,mask=mask)
img_fg=cv2.bitwise_and(image,image,mask=mask_inv)
dst=cv2.add(frame_bg,img_fg)
frame[img_y1:img_y2,img_x1:img_x2]=dst
cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,255),2)
cv2.imshow("framee",frame)
k = cv2.waitKey(1) & 0xff
if k == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
Please point out where did I go wrong?
Thanks in advance..
I had to run it to find the problem. It is actually that roi is not the same size as the mask all the time... I think it goes "out of bounds" when setting the variable roi, numpy just gives up to the point it can.
It is hard to know for sure what is wrong, since the code (in my opinion) is a mess. For each face found you calculate several widths and heights which are not needed if you want to simply replace the face found with that of an image and to create a mask of the image.
Try something like this:
import cv2
import numpy as np
# initialize camera, classifier and load the new image
cap=cv2.VideoCapture(0)
img=cv2.imread("image.png")
classifier= cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
# create masks to be used later
img_gray=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
ret,original_mask=cv2.threshold(img_gray,100,255,cv2.THRESH_BINARY_INV)
original_mask_inv=cv2.bitwise_not(original_mask)
while(cap.isOpened()):
# get image and find the face(s)
_,frame=cap.read()
gray=cv2.cvtColor(frame,cv2.COLOR_BGR2GRAY)
face = classifier.detectMultiScale(gray,1.4,5)
for x,y,w,h in face:
# resize images and mask to size of the face
newFace = cv2.resize(img,(w,h),cv2.INTER_AREA)
mask = cv2.resize(original_mask,(w,h),cv2.INTER_AREA)
mask_inv = cv2.resize(original_mask_inv,(w,h),cv2.INTER_AREA)
# obtain the foreground of the image and the background of the camera frame
roi=frame[y:y+h,x:x+w]
frame_bg=cv2.bitwise_and(roi,roi,mask=mask)
img_fg=cv2.bitwise_and(newFace,newFace,mask=mask_inv)
# replace the face with the image data and draw a rectangle
frame[y:y+h,x:x+w]= frame_bg + img_fg
cv2.rectangle(frame,(x,y),(x+w,y+h),(0,255,255),2)
# show image and wait parse key
cv2.imshow("framee",frame)
k = cv2.waitKey(1) & 0xff
if k == ord('q'):
break
cap.release()
cv2.destroyAllWindows()
I hope this helps, if not, just leave a comment
Make sure your mask in encoded to be in uint8 format.
mask = mask.astype("uint8")
and Try Printing the shapes of the 2 images you are AND-ing, if they match then it's an encoding issue, else it's a shape issue
I am working on a project that where I am required to use classes and objects to manipulate an image in Python using PIL.
I have eliminated formatted the path to the file correctly so there must be something in the code itself.
class image_play(object):
def __init__(self,im_name):
self.im_name = im_name
def rgb_to_gray_image(self):
im = Image.open(self.im_name)
im = im.convert('LA')
return im
# editing pixels of image to white
def loop_over_image(self):
im = Image.open(self.im_name)
width, height = im.size
# nested loop over all pixels of image
temp = []
for i in range(width):
for j in range(height):
temp.append((255,255,255)) # append tuple for the RGB values for each pixel
image_out = Image.new(im.mode,im.size) #create new image using PIl
image_out.putdata(temp) #use the temp list to create the image
return image_out
pic = image_play('test.png')
picGray = pic.rgb_to_gray_image()
picWhite = pic.loop_over_image()
I simply added picGray.show() and picWhite.show() an now I have view-able output. Hmmmm...
I am trying to detect logo in invoices. Though I am able to get some results but not sufficient enough to process. While detecting logos, Unwanted text is also getting detected.
The following is from actual invoice:-original Image
and the following results I am getting Image after operations
I am using the`following code which I have written:-
gray=cv2.imread("Image",0)
ret,thresh1 = cv2.threshold(gray,180,255,cv2.THRESH_BINARY)
kernel_logo = np.ones((10,10),np.uint8)
closing_logo = cv2.morphologyEx(thresh1,cv2.MORPH_CLOSE,kernel_logo,
iterations = 1)
n=3
noise_removed_logo = cv2.medianBlur(closing_logo, n)
eroded_logo = cv2.erode(noise_removed_logo,kernel_logo, iterations = 8)
dilated_logo=cv2.dilate(eroded_logo,kernel_logo, iterations=3)
Could you please help me what changes should I make to remove noise from my documented image. I am new to Computer Vision
Few more sample:- Original document
The result I am getting:- Result after operations on document
Hello Mohd Anas Khan .
Your approch to define logo is too simple so it couldn't work. If you want a product-level approach, use some machine learning or deep learning. If you want just some toys, then a simple countours finder with fixed rules should work.
For example, in the following approach i defined "logo" as "the contour which has biggest area". You'll need more rules later, so good luck.
import numpy as np
import cv2
im = cv2.imread('contours_1.jpg')
imgray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
ret,thresh = cv2.threshold(imgray,127,255, cv2.THRESH_BINARY_INV)
rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
threshed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, rect_kernel)
cv2.imwrite("contours_1_thres.jpg", threshed)
im2, contours, hierarchy = cv2.findContours(threshed,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
ws = []
hs = []
areas = []
for contour in contours:
area = cv2.contourArea(contour)
x, y, w, h = cv2.boundingRect(contour)
print("w: {}, h: {}, area: {}".format(w, h, area))
ws.append(w)
hs.append(h)
areas.append(area)
max_idx = np.argmax(areas)
cv2.drawContours(im, [contours[max_idx]], -1, (0, 255, 0), 3)
# cv2.drawContours(im, contours, -1, (0, 255, 0), 3)
cv2.imwrite("contours_1_test.jpg", im)
The output images are as follow : (The detected logo is covered in green box )
I recently started to learn how to work with Pillow in python and this is what i have so far (code below).
If i run this i get the following error:
ValueError: cannot determine region size; use 4-item box
i've tried removing the base.paste line, this removes the error but doesn't show any text.
i hope someone here could help me to fix this so the text is showing in the image.
#app.route("/imgtest/")
def imgtest():
f_text = textwrap.fill(request.args.get('text'), 10)
base = Image.open(config.assetsfolder+'/'+'facts.bmp').convert("RGBA")
txtO = Image.new("RGBA", base.size, (255, 255, 255, 0))
font = ImageFont.truetype(config.assetsfolder+'/'+'fonts'+'/'+'Roboto-Bold.ttf', 15)
canv = ImageDraw.Draw(txtO)
canv.text((95, 283), f_text, font=font, fill="Black")
base.paste(f_text)
base.save(config.assetsfolder+'/'+'done'+'/'+'boop.png')
return send_file(config.assetsfolder+'/'+'done'+'/'+'boop.png')```
How do i show the text in the image?
If you are using pillow 4.0, I believe this would be an issue with the pillow version. Try in your cmd the following:
pip uninstall pillow
pip install Pillow==3.4.2
Played a bit around by adding some random code and reading the docs a bit better
#app.route("/fact/")
def fact():
filename=(config.assetsfolder+'/'+'facts.bmp')
args=request.args.get('text')
font = ImageFont.truetype(config.assetsfolder+'/'+'fonts'+'/'+'Roboto-Bold.ttf', size=20)
text = wrap(font, args, 340)
im = Image.open(filename)
text_layer = Image.new('RGBA', im.size)
d = ImageDraw.Draw(text_layer)
location = (90, 600)
text_color = (20, 20, 20)
d.text(location, text, font=font, fill=text_color)
text_layer = text_layer.rotate(-13, resample=Image.BICUBIC)
im.paste(text_layer, (0, 0), text_layer)
im.save(config.assetsfolder+'/'+'done'+'/'+'fact.png')
return send_file(config.assetsfolder+'/'+'done'+'/'+'fact.png')
This did the job for me.
This question already has answers here:
OpenCV: Choosing HSV thresholds for color filtering
(2 answers)
Closed 10 months ago.
Can anyone please tell me a name of a website or any place from where I can get the upper and lower range of HSV of basic colours like
yellow,green,red,blue,black,white,orange
Actually I was making a bot which would at first follow black coloured line and then in the middle of the line there would be another colour given from where 3 different lines of different colour gets divided.The bot needs to decide which line to follow.
For that I need the proper range of hsv colours
Inspired from the answer at answers.opencv link.
According to docs here
the HSV ranges like H from 0-179, S and V from 0-255,
so as for your requirements for lower range and upper range example you can do for any given [h, s, v] to
[h-10, s-40, v-40] for lower
and
[h+10, s+10, v+40] for upper
for the yellow,green,red,blue,black,white,orange rgb values.
Copied code from the example :
import cv2
import numpy as np
image_hsv = None # global ;(
pixel = (20,60,80) # some stupid default
# mouse callback function
def pick_color(event,x,y,flags,param):
if event == cv2.EVENT_LBUTTONDOWN:
pixel = image_hsv[y,x]
#you might want to adjust the ranges(+-10, etc):
upper = np.array([pixel[0] + 10, pixel[1] + 10, pixel[2] + 40])
lower = np.array([pixel[0] - 10, pixel[1] - 10, pixel[2] - 40])
print(pixel, lower, upper)
image_mask = cv2.inRange(image_hsv,lower,upper)
cv2.imshow("mask",image_mask)
def main():
import sys
global image_hsv, pixel # so we can use it in mouse callback
image_src = cv2.imread(sys.argv[1]) # pick.py my.png
if image_src is None:
print ("the image read is None............")
return
cv2.imshow("bgr",image_src)
## NEW ##
cv2.namedWindow('hsv')
cv2.setMouseCallback('hsv', pick_color)
# now click into the hsv img , and look at values:
image_hsv = cv2.cvtColor(image_src,cv2.COLOR_BGR2HSV)
cv2.imshow("hsv",image_hsv)
cv2.waitKey(0)
cv2.destroyAllWindows()
if __name__=='__main__':
main()
Above code is for when you want to directly select the HSV range from the image or video you are capturing, by clicking on the desired color.
If you want to predefine your ranges you can just use write simple code snippet using inbuilt python library colorsys to convert rbg to hsv using colorsys.rgb_to_hsv function
example in docs
Note this function accepts rgb values in range of 0 to 1 only and gives hsv values also in 0 to 1 range so to use the same values you will need to normalize it for opencv
code snippet
import colorsys
'''
convert given rgb to hsv opencv format
'''
def rgb_hsv_converter(rgb):
(r,g,b) = rgb_normalizer(rgb)
hsv = colorsys.rgb_to_hsv(r,g,b)
(h,s,v) = hsv_normalizer(hsv)
upper_band = [h+10, s+40, v+40]
lower_band = [h-10, s-40, v-40]
return {
'upper_band': upper_band,
'lower_band': lower_band
}
def rgb_normalizer(rgb):
(r,g,b) = rgb
return (r/255, g/255, b/255)
def hsv_normalizer(hsv):
(h,s,v) = hsv
return (h*360, s*255, v*255)
rgb_hsv_converter((255, 165, 0))
will return
{'upper_band': [48.82352941176471, 295.0, 295.0], 'lower_band': [28.82352941176471, 215.0, 215.0]}
which is your orange hsv bands.