OpenCV in Python - python-3.x

Code runs but instead of posting my name during the detector.py it says unknown. Anyone have any solutions?
import cv2
import numpy as np
recognizer = cv2.face.LBPHFaceRecognizer_create()
recognizer.read('trainer/trainer.yml')
cascadePath = "haarcascade_frontalface_default.xml"
faceCascade = cv2.CascadeClassifier(cascadePath);
font = cv2.FONT_HERSHEY_SIMPLEX
cam = cv2.VideoCapture(0)
while True:
ret, im =cam.read()
gray = cv2.cvtColor(im,cv2.COLOR_BGR2GRAY)
faces = faceCascade.detectMultiScale(gray, 1.2,5)
for(x,y,w,h) in faces:
cv2.rectangle(im, (x-20,y-20), (x+w+20,y+h+20), (0,255,0), 4)
Id = recognizer.predict(gray[y:y+h,x:x+w])
if(Id == 1):
Id = "Alec"
elif(Id == 2):
Id = "Chase"
else:
Id = "Unknown"
cv2.rectangle(im, (x-22,y-90), (x+w+22, y-22), (0,255,0), -1)
cv2.putText(im, str(Id), (x,y-40), font, 2, (255,255,255), 3)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
cam.release()
cv2.destroyAllWindows()
It is not showing any types of error. I have commented out the If(id==X) code just to see what it would print on the screen. The program printed (1, 30-40). So I'm guessing the 1 is my ID. I have the DataSet and trainer program if I need to provide it.

recognizer.predict returns both the Id and confidence score.
Id, conf = recognizer.predict(gray[y:y+h,x:x+w])
if(conf<50):
if(Id==1):
Id="asciime"
elif(Id==2):
Id="Something"
else:
Id="Unknown"
OpenCV's Python API documentation is very poor. I often use the C++ reference. In this case the predict method is
void cv::face::FaceRecognizer::predict(InputArray src, int& label, double& confidence) const
See https://docs.opencv.org/3.2.0/dd/d65/classcv_1_1face_1_1FaceRecognizer.html#ab0d593e53ebd9a0f350c989fcac7f251 .

Confidence is standard set to 50.
However the Id has 2 values in it. the int(ID) and double(Conf).
https://docs.opencv.org/3.0.0/dd/d65/classcv_1_1face_1_1FaceRecognizer.html#aede3fa2ec7a4ee35e67bc998df23883b
getting the first value with Id[1] would work
for(x,y,w,h) in faces:
cv2.rectangle(im, (x-20,y-20), (x+w+20,y+h+20), (0,255,0), 4)
Id = recognizer.predict(gray[y:y+h,x:x+w])
if(Id[1] == 1):
Id = "Alec"
elif(Id[1] == 2):
Id = "Chase"
else:
Id = "Unknown"
cv2.rectangle(im, (x-22,y-90), (x+w+22, y-22), (0,255,0), -1)
cv2.putText(im, str(Id), (x,y-40), font, 2, (255,255,255), 3)
if cv2.waitKey(10) & 0xFF == ord('q'):
break
or
for(x,y,w,h) in faces:
cv2.rectangle(im, (x-20,y-20), (x+w+20,y+h+20), (0,255,0), 4)
Id,conf = recognizer.predict(gray[y:y+h,x:x+w])
if(Id == 1):
Id = "Alec"
elif(Id == 2):
Id = "Chase"
else:
Id = "Unknown"
cv2.rectangle(im, (x-22,y-90), (x+w+22, y-22), (0,255,0), -1)
cv2.putText(im, str(Id), (x,y-40), font, 2, (255,255,255), 3)
if cv2.waitKey(10) & 0xFF == ord('q'):
break

Related

Delay in drawing an element with mouse click if user input is requested in open cv python

I wanted to draw points with labels at mouse click on an image. In the following code it draws blue points for first 3 left mouse clicks, thereafter red points. It works fine so far. However, if I enable the user input requesting part marked by <--- it doesn't show the 3rd blue point until I give the user input. What I was expecting was the user input request comes after it draws the 3rd blue point. Any help is appreciated. Thanks !!
Here's the code. Please comment out <--- marked line #26 which calls for the user input and it works fine.
import cv2
img_path = r'test.jpg'
count = 0
red_count = 0
user_inp = None
def click_event(event, x, y, flags, params):
global count, red_count, user_inp
font_scale, point_radius = 0.4, 2
font = cv2.FONT_HERSHEY_SIMPLEX
if event == cv2.EVENT_LBUTTONDOWN:
if count < 3:
text = f"blue{count}"
cv2.putText(img, text, (x + 2, y), font, font_scale, (255, 0, 0), 1)
cv2.circle(img, (x, y), radius=point_radius, color=(255, 0, 0), thickness=-1)
cv2.imshow('image', img)
if count == 2:
print("blue index 2")
user_inp = input('input anything: ') # <------------ check here
if count >= 3:
text = f"red{red_count}"
cv2.putText(img, text, (x + 2, y), font, font_scale, (0, 0, 255), 1)
cv2.circle(img, (x, y), radius=point_radius, color=(0, 0, 255), thickness=-1)
cv2.imshow('image', img)
red_count += 1
count += 1
if __name__ == "__main__":
img = cv2.imread(img_path, 1)
cv2.imshow('image', img)
cv2.setMouseCallback('image', click_event)
cv2.waitKey(0)
cv2.destroyAllWindows()
print(user_inp)

vtk: how to obtain the image pixel index from a world point

If I pick a world point from a image, How can I convert the world coordinate to image index?
import vtk
import numpy as np
from vtk.util.numpy_support import numpy_to_vtk
def numpyToVTK(data, multi_component=False, type='float'):
if type == 'float':
data_type = vtk.VTK_FLOAT
elif type == 'char':
data_type = vtk.VTK_UNSIGNED_CHAR
else:
raise RuntimeError('unknown type')
if multi_component == False:
if len(data.shape) == 2:
data = data[:, :, np.newaxis]
flat_data_array = data.transpose(2,1,0).flatten()
vtk_data = numpy_to_vtk(num_array=flat_data_array, deep=True, array_type=data_type)
shape = data.shape
else:
assert len(data.shape) == 3, 'only test for 2D RGB'
flat_data_array = data.transpose(1, 0, 2)
flat_data_array = np.reshape(flat_data_array, newshape=[-1, data.shape[2]])
vtk_data = numpy_to_vtk(num_array=flat_data_array, deep=True, array_type=data_type)
shape = [data.shape[0], data.shape[1], 1]
img = vtk.vtkImageData()
img.GetPointData().SetScalars(vtk_data)
img.SetDimensions(shape[0], shape[1], shape[2])
return img
global sphereActor, textActor
sphereActor = None
textActor = None
def mouseMoveEvent(iren, event):
x, y = iren.GetEventPosition()
picker = vtk.vtkWorldPointPicker()
picker.Pick(x, y, 0, render)
worldPoint = picker.GetPickPosition()
##############################################
## convert world point to image index
##############################################
sphere = vtk.vtkSphereSource()
sphere.SetCenter(worldPoint[0], worldPoint[1], worldPoint[2])
sphere.SetRadius(2)
sphere.Update()
sphereMapper = vtk.vtkPolyDataMapper()
sphereMapper.SetInputData(sphere.GetOutput())
global sphereActor, textActor
if sphereActor != None:
render.RemoveActor(sphereActor)
sphereActor = vtk.vtkActor()
sphereActor.SetMapper(sphereMapper)
sphereActor.GetProperty().SetColor(255, 0, 0)
render.AddActor(sphereActor)
render.Render()
if textActor != None:
render.RemoveActor(textActor)
textActor = vtk.vtkTextActor()
textActor.SetInput('world coordinate: (%.2f, %.2f, %.2f)'%(worldPoint[0], worldPoint[1], worldPoint[2]))
textActor.GetTextProperty().SetColor(1, 0, 0)
textActor.GetTextProperty().SetFontSize(15)
render.AddActor(textActor)
img = np.zeros(shape=[128, 128])
for i in range(128):
for j in range(128):
img[i, j] = i+j
vtkImg = numpyToVTK(img)
imgActor = vtk.vtkImageActor()
imgActor.SetInputData(vtkImg)
render = vtk.vtkRenderer()
render.AddActor(imgActor)
# render.Render()
renWin = vtk.vtkRenderWindow()
renWin.AddRenderer(render)
renWin.Render()
iren = vtk.vtkRenderWindowInteractor()
iren.SetRenderWindow(renWin)
iren.SetInteractorStyle(vtk.vtkInteractorStyleTrackballCamera())
iren.Initialize()
iren.AddObserver('MouseMoveEvent', mouseMoveEvent)
iren.Start()
In the above code, if I don't rotate the image, the world point is (x, y, 0):
And it is agree with what I know. For the world point (x, y, z) and the image index (i, j, k), the conversion should be:
worldPoint (x,y,z) = i*spacingX*directionX + j*spacingY*directionY + k*spacingZ*directionZ + originPoint
In the above code, the image is converted from numpy, thus:
directionX = [1, 0, 0]
directionY = [0, 1, 0]
directionZ = [0, 0, 1]
originPoint=[0, 0, 0]
spacingX=1
spacingY=1
spacingZ=1
In this way, x=i, y=j, z=k. Since this image is a 2D image, the k should be 0 and 'z' should also be 0.
Then, I rotate the image, z is not 0. Like the following picture.
I don't know why z is -0.24.
It means the following conversion is wrong. And how can I obtain the image index by the world point?
worldPoint (x,y,z) = i*spacingX*directionX + j*spacingY*directionY + k*spacingZ*directionZ + originPoint
Any suggestion is appreciated!
vtkImageData has the method TransformPhysicalPointToContinuousIndex for going from world space to image space and TransformIndexToPhysicalPoint to go the other way.
I don't think the computation you're doing is right, since direction is 3x3 rotation matrix.

How to get rid of image noise for improving results with PyTesseract?

I'm trying to get the text "P1" and "P2" from the upper left corner of the video.
P1
P2
I take a frame and crop them down to following images and then apply the image processing found here:
P1 Crop
P2 Crop
use pytesseract to recognize text from image
and while it works on cropped still images I edited manually using an image editor, it doesn't work when taking frames from the video using cv2.
I'm not sure why this is but I suspect it has something to do with the black and white background like in the picture below, but I don't know how to get rid of it without also removing the text.
P1 post image manipulation
and here's my code
import cv2
import pytesseract
import re
from difflib import SequenceMatcher
def determineWinner(video):
winnerRect = [(70,95),(146,152)]
cap = cv2.VideoCapture(video)
if(cap.isOpened() == False):
print("No dice")
return
fps = cap.get(cv2.CAP_PROP_FPS)
frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
print(fps)
print(frames)
desiredSeek = frames - int(fps * 9)
print(desiredSeek)
seconds = desiredSeek/fps
print(seconds)
minutes = seconds/60
print(minutes)
partial = minutes - int(minutes)
print(partial)
seconds = partial * 60
print(seconds)
print(str(int(minutes)) +":"+ str(seconds))
cap.set(cv2.CAP_PROP_POS_FRAMES,(desiredSeek))
ret,img = cap.read()
winTxt = []
p1Count = 0
p2Count = 0
cv2.namedWindow("",cv2.WINDOW_NORMAL)
ret,img = cap.read()
while ret:
key = cv2.waitKey(1)
if key == ord('q'):
break
if key == ord('e'):
ret,img = cap.read()
if ret:
winROI = img[winnerRect[0][1]:winnerRect[1][1],winnerRect[0][0]:winnerRect[1][0]]
gray = cv2.cvtColor(winROI, cv2.COLOR_BGR2GRAY)
blur = cv2.GaussianBlur(gray, (3,3), 0)
thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Morph open to remove noise and invert image
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
invert = 255-opening
invert=cv2.resize(invert,None,fx=2,fy=2)
wConfig='-l eng --oem 1 --psm 10 -c tessedit_char_whitelist=P12'
winTxt = pytesseract.image_to_string(invert,config=wConfig)
cv2.rectangle(img,winnerRect[0],winnerRect[1],(255,0,0),2)
cv2.imshow("winroi",invert)
cv2.imshow("",img)
cv2.resizeWindow("",800,600)
print(winTxt)
desiredSeek+=1
seconds = desiredSeek/fps
minutes = seconds/60
partial = minutes - int(minutes)
seconds = partial * 60
print(str(int(minutes)) +":"+ str(seconds))
else:
break
cap.release()
cv2.destroyAllWindows()
This code works as a testing script. I only extracted the parameters for the image containing P1. For applying a filter on a new image, just erase the predefined thresholds values as following:
From:
low_blue, low_green, low_red, upper_blue, upper_green, upper_red = (115, 0, 0, 255, 178, 255)
To:
low_blue, low_green, low_red, upper_blue, upper_green, upper_red = (0, 0, 0, 255, 255, 255)
And start modifying the parameters as described below. After determining the parameters, press esc to exit the program, take the parameters shown in the console and paste them in the thresholds tuple.
How to use it:
Very important. For this to work properly, you have to select with left click of the mouse, the window from cv2.imshow() , in this case Original image or Binary image
q increases and w decreases the lower blue threshold
a increases and s decreases the lower green threshold
... so on and so forth for both lower and upper colors (BGR) thresholds
import numpy as np
import cv2
low_blue, low_green, low_red, upper_blue, upper_green, upper_red = (115, 0, 0, 255, 178, 255)
# Get picture
path = "C:\\Users\\asd\\asd\\P1.png"
frame = cv2.imread(path)
while 1:
lower_color = np.array((low_blue, low_green, low_red))
upper_color = np.array((upper_blue, upper_green, upper_red))
# extract binary image with active blue regions
binary_image = cv2.inRange(frame, lower_color, upper_color)
cv2.imshow('Original image', binary_image)
#erode for the little white contour to dissapear
binary_image = cv2.erode(binary_image, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)))
binary_image = cv2.dilate(binary_image, cv2.getStructuringElement(cv2.MORPH_RECT,(3,3)))
cv2.imshow('Binary image ', binary_image)
k = cv2.waitKey(5) & 0xFF
if k == 27:
break
if k == ord('q'):
low_blue += 1
if k == ord('w'):
low_blue -= 1
if k == ord('a'):
low_green += 1
if k == ord('s'):
low_green -= 1
if k == ord('z'):
low_red += 1
if k == ord('x'):
low_red -= 1
if k == ord('e'):
upper_blue += 1
if k == ord('r'):
upper_blue -= 1
if k == ord('d'):
upper_green += 1
if k == ord('f'):
upper_green -= 1
if k == ord('c'):
upper_red += 1
if k == ord('v'):
upper_red -= 1
print("low_blue=", low_blue, "low_green=", low_green, "low_red=",low_red, "upper_blue", upper_blue, "upper_green=",
upper_green, "upper_red=",upper_red)
cv2.destroyAllWindows()
The results
From:
To:

if function in python. getting an error in break statement

getting an error in the last if statement. show an syntax error as break is outside loop. The code is unable to break the statement when q is pressed.The code is from the github sorry for not providing the github link. ooking forward for some simple solutions. some packages have been imported like cv2, numpy and pandas
first_frame = None
status_list = [None, None]
times = []
df = pandas.DataFrame(columns=["start", "End"])
video = cv2.VideoCapture(0)
while True:
check, frame = video.read()
status = 0
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (21, 21), 0)
if first_frame is None:
first_frame = gray
continue
delta_frame = cv2.absdiff(first_frame, gray)
thresh_delta = cv2.threshold(delta_frame, 30, 255, cv2.THRESH_BINARY)[1]
thresh_delta = cv2.dilate(thresh_delta, None, iterations=0)
countours, hierarchy = cv2.findContours(
thresh_delta.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for contour in countours:
if cv2.contourArea(contour) < 1000:
continue
status = 1
(x, y, w, h) = cv2.boundingRect(contour)
cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 3)
status_list.append(status)
status_list = status_list[-2:]
if status_list[-1] == 1 and status_list[-2] == 0:
times.append(datetime.now())
if status_list[-1] == 0 and status_list[-2] == 1:
times.append(datetime.now())
cv2.imshow("frame", frame)
cv2.imshow("capturing", gray)
cv2.imshow("delta", delta_frame)
cv2.imshow("thresh", thresh_delta)
for i in range(0, len(times), 2):
df = df.append({"start": times[i], "End": times[i + 1]}, ignore_index=true)
df.to_csv("Times.csv")
if cv2.waitKey(1) & 0xFF == ord('q'): # press q to quit
break
video.release()
cv2.destroyAllWindows()
if cv2.waitKey(1) & 0xFF == ord('q'): # press q to quit
break

Why is no picture being saved?

import cv2
cam = cv2.VideoCapture(0)
detector = cv2.CascadeClassifier('/usr/local/Cellar/opencv/3.4.2/share/OpenCV/haarcascades/haarcascade_frontalface_default.xml')
Id = input('enter your id')
sampleNum = 0
while(True):
ret, img = cam.read()
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = detector.detectMultiScale(gray, 1.3, 5)
for (x, y, w, h) in faces:
cv2.rectangle(img, (x, y), (x + w, y + h), (255, 0, 0), 2)
# incrementing sample number
sampleNum = sampleNum + 1
# saving the captured face in the dataset folder
cv2.imwrite("dataSet/User." + Id + '.' + str(sampleNum) + ".jpg", gray[y:y + h, x:x + w])
cv2.imshow('frame', img)
# wait for 100 miliseconds
if cv2.waitKey(100) & 0xFF == ord('q'):
break
# break if the sample number is morethan 20
elif sampleNum > 20:
break
cam.release()
cv2.destroyAllWindows()
cv2.imwrite should save the captured images in a folder called dataSet but nothing gets captured. what is the problem here? Thank you

Resources