How to remove noise after Otsu Binarization using OpenCV in Python? - python-3.x

I am using a Raspberry Pi and it's Camera to perform some Image Processing algorithm. So , I am performing a background subtraction on successive frames of the captured stream and trying to find if there is any object present in the image and if yes, print out it's area . The algorithm works fine as expected but there is a problem .
The thresholding function which uses cv2.THRESH_OTSU , results into a grainy image whenever there is no object present , i.e the background and the foreground images are same . However those noises/grain disappear when there is an object present in the foreground image . These are as follows -
Same Background Image and Foreground Image with noise
Different Background and Foreground Image without any noise
As you can see ,if the images are almost same , the noise is present and if any object is introduced in the frame , then the noise vanishes .
I have tried the following to remove the noise but it didn't work .
Tried using only cv2.THRESH_BINARY / cv2.THRESH_BINARY_INV without Otsu binariszation.
I have tried increasing the brightness/contrast/saturation of the captured image to see if the performance varies , but no change .
I have tried to increase/decrease the amount of erosion/dilation preceding the Thresholding step , but this did not make any change either .
This is my code -
from time import sleep
from picamera import PiCamera
from picamera.array import PiRGBArray
import cv2,os
import numpy as np
import threading
def imageSubtract(img):
bilateral_filtered_image = cv2.bilateralFilter(img, 9, 170, 170)
bilateral_filtered_image = cv2.cvtColor(bilateral_filtered_image,cv2.COLOR_BGR2GRAY)
return bilateral_filtered_image
def imageProcessing():
camera = PiCamera()
camera.resolution = (512,512)
camera.awb_mode="fluorescent"
camera.iso = 800
camera.contrast=33
camera.brightness=75
camera.sharpness=100
rawCapture = PiRGBArray(camera, size=(512, 512))
first_time=0
frame_buffer=0
counter=0
camera.start_preview()
sleep(2)
for frame in camera.capture_continuous(rawCapture, format="bgr", use_video_port=True):
if first_time==0:
rawCapture.truncate(0)
if frame_buffer<10:
print("Frame rejected -",str(frame_buffer))
frame_buffer+=1
continue
os.system("clear")
refImg=frame.array
refThresh=imageSubtract(refImg)
first_time=1
image = frame.array
cv2.imshow("Foreground", image)
key = cv2.waitKey(1)
rawCapture.truncate(0)
newThresh=imageSubtract(image)
diff=cv2.absdiff(refThresh,newThresh)
kernel = np.ones((5,5),np.uint8)
diff=cv2.dilate(diff,kernel,iterations = 3)
cv2.imshow("Background",refImg)
_, thresholded = cv2.threshold(diff, 0 , 255, cv2.THRESH_BINARY +cv2.THRESH_OTSU)
_, contours, _= cv2.findContours(thresholded,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
try:
c=max(contours,key=cv2.contourArea)
x,y,w,h = cv2.boundingRect(c)
cv2.rectangle(thresholded,(x,y),(x+w,y+h),(125,125,125),2)
if cv2.contourArea(c)>500:
print("Object detected with area = ",cv2.contourArea(c))
cv2.imshow("Threshold",thresholded)
if key == ord('q'):
camera.close()
cv2.destroyAllWindows()
break
except Exception as e:
pass
if __name__ == "__main__" :
imageProcessing()
Please help me to remove the noise when the background and foreground Images are same .
Thank You !

Related

Difficulty reading text with pytesseract

I need to read the highest temperature on thermographic images, as shown below:
IR_1544_INFRA.jpg
IR_1546_INFRA.jpg
IR_1560_INFRA.jpg
IR_1564_INFRA.jpg
I used the following code, this was the best result.
I also tried several other ways, such as: blur, gray scale, binarization, and others but they all failed.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\User\AppData\Local\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
entrada = cv2.imread('IR_1546_INFRA.jpg')
image = entrada[40:65, 277:319]
#image = cv2.imread('IR_1546_INFRA.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = 255 - cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Blur and perform text extraction
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.waitKey()
In the first image, I found
this
In the second image, I found this.
The imagem layout is always the same, that is, the temperature is always in the same place, so I cropped the image to isolate only the number. I would like (97.7 here, and 85.2 here).
My code needs to find from these images to always detect this temperature and generate a list indicating from highest to lowest.
What do you indicate for me to improve the assertiveness of pytesseract in the case of these images?
Note 1: When I annalyze the entire image (without cropping), it returns data that is not even present.
Note 2: In some images even with the binary number, pytesseract (image_to_string) does not return any data.
Thank you all and sorry for the typos, writing in english is still a challenge for me.
Because you have same images, you can crop the area you want and then do processing there. The processing is also simple. Change to gray, get threshold, invert, resize, and then do the OCR. You can see it in my code below. It works on all your attached images.
import cv2
import pytesseract
import os
image_path = "temperature"
for nama_file in sorted(os.listdir(image_path)):
print(nama_file)
img = cv2.imread(os.path.join(image_path, nama_file))
crop = img[43:62, 278:319]
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.bitwise_not(thresh)
double = cv2.resize(thresh, None, fx=2, fy=2)
custom_config = r'-l eng --oem 3 --psm 7 -c tessedit_char_whitelist="1234567890." '
text = pytesseract.image_to_string(double, config=custom_config)
print("detected: " + text)
cv2.imshow("img", img)
cv2.imshow("double", double)
cv2.waitKey(0)
cv2.destroyAllWindows()

Background removal from webcam OPENCV PYTHON

I'm creating a script that will read the state of a supermarket and tell me if there is products missing.
for example in the image below there is some places where there is products missing. I'm using FAST method to find all the corners in the frame. but sometimes the scripts detects the floor corners. What I want to do is remove the floor from the frame before I find the corners.
import cv2
import numpy as np
image = cv2.imread('gondola_imagem.jpeg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
fast = cv2.FastFeatureDetector_create()
# Obtain Key points, by default non max suppression is On
# to turn off set fast.setBool('nonmaxSuppression', False)
keypoints = fast.detect(gray, None)
print ("Number of keypoints Detected: ", len(keypoints))
image = cv2.drawKeypoints(image, keypoints, None,
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv2.imshow('Feature Method - FAST', image)
cv2.waitKey()
cv2.destroyAllWindows()
You can use a mask to remove the areas you are not interested. For example with the following image as a mask you can get the bellow results.
Mask
Result
Code is as follow:
import numpy as np
import cv2
image = cv2.imread('test.jpg')
mask = cv2.imread('mask.jpg', 0)
cv2.imshow('Original', image)
cv2.imshow('Mask', mask)
res = cv2.bitwise_and(image,image,mask = mask)
gray = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY)
fast = cv2.FastFeatureDetector_create()
# Obtain Key points, by default non max suppression is On
# to turn off set fast.setBool('nonmaxSuppression', False)
keypoints = fast.detect(gray, None)
print ("Number of keypoints Detected: ", len(keypoints))
image = cv2.drawKeypoints(image, keypoints, None,
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
cv2.imwrite('result.jpg', image)
cv2.imshow('Feature Method - FAST', image)
cv2.waitKey()
cv2.destroyAllWindows()
Edit:
If you want to do this in realtime (video from webcam) you just need to do it for every frame you get from the video camera. As long as the camera is not moving you should be able to use the same mask for all the frames. You could make the code above a function and then call it with an image as a parameter, as per the following code:
import numpy as np
import cv2
cap = cv2.VideoCapture(0)
while(True):
# Capture frame-by-frame
ret, frame = cap.read()
# Following function will have to be created with the previews code
CallFunctionToPreviewsCode(frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
The code above was taken from OpenCV Python-Tutorials It is a good place for learning OpenCV for Python programming language.

Error feeding numpy array d.type uint8 into adaptivethreshold function

I'm trying to feed a numpy array into the Process_img (adaptivethreshold) function. The numpy array has a data type of uint8 and 3 dimensions, which should be accepted by the function.
I am getting the following error message. I've tried converting it to grayscale but doesn't seem to work and i've tried numpy.ndarray.flatten (1 dimension), which gets it functioning but doesn't display it back correctly.
I end up getting a long gray bar. I'm not sure of what else i should do. Any help is appreciated.
error: OpenCV(3.4.4)
C:\projects\opencv-python\opencv\modules\imgproc\src\thresh.cpp:1524:
error: (-215:Assertion failed) src.type() == CV_8UC1 in function
'cv::adaptiveThreshold'
import time
import cv2
import mss
import numpy
# Attempts to change the image to black and white relative to a general area
def process_img(image):
processed_img = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
return processed_img
while (True):
last_time = time.time()
# Takes a snapshot of the screen location
with mss.mss() as sct:
monitor = {"top": 40, "left": 0, "width": 960, "height": 540}
# Converts the snapshot to a numpy array
npm = numpy.array(sct.grab(monitor))
# Checks the data type of the numpy array
print (npm.dtype)
# Feeds the numpy array into the "process_img" function
new_screen = process_img(npm)
# Displays the processed image
cv2.imshow('Window',new_screen)
#This keeps the screen displayed over time instead of flickering 1ms basically the screen's refresh rate
if cv2.waitKey(1) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Change your process_img() function to convert the image to grayscale:
def process_img(image):
image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
Also, you should move with mss.mss() as sct: outside the while to keep performant:
import time
import cv2
import mss
import numpy
# Attempts to change the image to black and white relative to a general area
def process_img(image):
image = cv2.cvtColor(image, cv2.COLOR_BGRA2GRAY)
return cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY,11,2)
with mss.mss() as sct:
# Takes a snapshot of the screen location
monitor = {"top": 40, "left": 0, "width": 960, "height": 540}
while True:
last_time = time.time()
# Converts the snapshot to a numpy array
npm = numpy.array(sct.grab(monitor))
# Checks the data type of the numpy array
print(npm.dtype)
# Feeds the numpy array into the "process_img" function
new_screen = process_img(npm)
# Displays the processed image
cv2.imshow("Window", new_screen)
# This keeps the screen displayed over time instead of flickering 1ms basically the screen's refresh rate
if cv2.waitKey(1) & 0xFF == ord("q"):
cv2.destroyAllWindows()
break

Perform face recognition on two cameras

I have a face recognition project using a camera without any problems.
I now want to do this simultaneously from two cameras.
This is my code for one camera, I haven't any idea on how to employ two cameras for this purpose.
import face_recognition
import cv2
import numpy as np
video_capture = cv2.VideoCapture('rtsp://admin:11111#192.168.1.13:554/mode=real&idc=1&ids=2')
farid_image = face_recognition.load_image_file("farid.jpg")
farid_face_encoding = face_recognition.face_encodings(farid_image)[0]
# Load a second sample picture and learn how to recognize it.
roice_image = face_recognition.load_image_file("roice.jpg")
roice_face_encoding = face_recognition.face_encodings(roice_image)[0]
known_face_encodings = [
farid_face_encoding,
roice_face_encoding
]
known_face_names = [
"farid",
"roice"
]
while True:
ret, frame = video_capture.read()
rgb_frame = frame[:, :, ::-1]
face_locations = face_recognition.face_locations(rgb_frame)
face_encodings = face_recognition.face_encodings(rgb_frame, face_locations)
# Loop through each face in this frame of video
for (top, right, bottom, left), face_encoding in zip(face_locations, face_encodings):
# See if the face is a match for the known face(s)
matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
name = "Unknown"
# Calculate face distance
face_distance = face_recognition.face_distance(known_face_encodings, face_encoding)
# If a match was found in known_face_encodings, just use the first one.
if True in matches:
# first_match_index = matches.index(True)
# Sort nearest distance
name = known_face_names[np.argsort(face_distance)[0]]
# Draw a box around the face
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2)
# Draw a label with a name below the face
cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), cv2.FILLED)
font = cv2.FONT_HERSHEY_DUPLEX
cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, (255, 255, 255), 1)
# Display the resulting image
cv2.imshow('Video', frame)
# Hit 'q' on the keyboard to quit!
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release handle to the webcam
video_capture.release()
cv2.destroyAllWindows()
I can Simply add more cameras using cv2.VideoCapture() module, but how can I make changes to face_recognition in order to work with two cameras?
You could try making this multithreaded. One thread for each camera that does it's own facial recognition on the images that it sees.
They would be acting independently on their respective streams, but you can then get results from both threads to combine the information for improved detection and/or recognition.

python opencv threshold red image

I am trying to threshold a BGR image after I separate the red channel, but
my code always return "Segmentation fault".
import numpy as np
import cv2
def mostrarVentana (titulo, imagen):
print('Mostrando imagen')
cv2.imshow(titulo, imagen)
k = cv2.waitKey(0)
if k == 27: # wait for ESC key to exit
cv2.destroyAllWindows()
img = cv2.imread('RepoImagenes/640x480/P5.jpg', 1) # loading image in BGR
redImg = img[:, :, 2] # extracting red channel
rbin, threshImg = cv2.threshold(redImg, 58, 255, cv2.THRESH_BINARY) # thresholding
mostrarVentana('Binary image', threshImg)
I have read the documentation on how to use the threshold() function and I can not figure out what's wrong. I only need to work on the red channel, how can I get this done?
I am using python 3.4 and opencv 3.1.0
First of all opencv provides a simple API to split n-channel image, using cv2.split() which would return a list of various channels in the image.
There is also a bug in your mostrarVentana method, you have never created a cv2.namedWindow() and you are directly referencing to cv2.imshow(), but you cannot simply cv2.imshow(), without creating a cv2.namedWindow().
Also you must be sure that the image is properly loaded and then access the desired channel, otherwise it would lead to weird errors. Your code with some scenario handling would look like this:
import numpy as np
import cv2
def mostrarVentana (titulo, imagen):
print('Mostrando imagen')
cv2.namedWindow(titulo, cv2.WINDOW_NORMAL)
cv2.imshow(titulo,imagen)
k = cv2.waitKey(0)
if k == 27: # wait for ESC key to exit
cv2.destroyAllWindows()
img = cv2.imread('RepoImagenes/640x480/P5.jpg', 1) # loading image in BGR
print img.shape #This should not print error response
if not img is None and len(img.shape) == 3 and img.shape[2] == 3:
blue_img, green_img, red_img = cv2.split(img) # extracting red channel
rbin, threshImg = cv2.threshold(red_img, 58, 255, cv2.THRESH_BINARY) # thresholding
mostrarVentana('Binary image', threshImg)
else:
if img is None:
print ("Sorry the image path was not valid")
else:
print ("Sorry the Image was not loaded in BGR; 3-channel format")

Resources