I am trying to add batching to a OpenCV python script I am making and I cannot for the life of me see what I am doing wrong. I am a beginner at this so its probably something stupid. In the end I want the script to read every image file in the current working directory of the script and then crop based on the face detect from openCV and output those cropped images with the same name to a folder inside the CWD. Right now all it does though is output the last image in the folder into the output folder. Any ideas from those who know what they are doing?
import cv2
import sys
import os.path
import glob
#Cascade path
cascPath = 'haarcascade_frontalface_default.xml'
# Create the haar cascade
faceCascade = cv2.CascadeClassifier(cascPath)
# Read Images
images = glob.glob('*.jpg')
for i in images:
image = cv2.imread(i,1)
#Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Find face(s) using cascade
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1, #size of groups
minNeighbors=5, #How many groups around are detected as face for it to be valid
minSize=(300, 300) #Min size in pixels for face
)
# Outputs number of faces found in image
print('Found {0} faces!'.format(len(faces)))
# Places a rectangle on face (For debugging, wont be in crop version)
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x+w, y+h), (255, 255, 255), 4)
# Resizes image to fit monitor and displayes it
imOut = cv2.resize(image, (750, 1142))
#cv2.imshow("Faces found", imS)
#cv2.waitKey(0)
#Saves image to output folder and creates folder if it doesnt exist
if not os.path.exists('output'):
os.makedirs('output')
os.chdir('output')
cv2.imwrite(i, imOut)
There are multiple corrections I have made in the code
You need the give the full path of the haarcascade_frontalface_default.xml
For instance: in Unix system:
cascPath = 'opencv/data/haarcascades/haarcascade_frontalface_default.xml'
You shouldn't create directories during the loop. You should create it before the loop.
if not os.path.exists('output'):
os.makedirs('output')
You don't need to change the directory to save the images. Just add the path before the image.
img_name = "output/out_{}.png".format(c) # c is the counter
Indentation is important, otherwise, you might have difficulties.
Example code:
import cv2
import os.path
import glob
# Cascade path
cascPath = '/opencv/data/haarcascades/haarcascade_frontalface_default.xml'
# Create the haar cascade
faceCascade = cv2.CascadeClassifier(cascPath)
if not os.path.exists('output'):
os.makedirs('output')
# Read Images
images = glob.glob('images/*.jpg')
for c, i in enumerate(images):
image = cv2.imread(i, 1)
# Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Find face(s) using cascade
faces = faceCascade.detectMultiScale(
gray,
scaleFactor=1.1, # size of groups
minNeighbors=5, # How many groups around are detected as face for it to be valid
minSize=(300, 300) # Min size in pixels for face
)
# Outputs number of faces found in image
print('Found {0} faces!'.format(len(faces)))
# Places a rectangle on face (For debugging, wont be in crop version)
for (x, y, w, h) in faces:
cv2.rectangle(image, (x, y), (x + w, y + h), (255, 255, 255), 4)
if len(faces) > 0:
# Resizes image to fit monitor and displayes it
imOut = cv2.resize(image, (750, 1142))
# cv2.imshow("Faces found", imS)
# cv2.waitKey(0)
# Saves image to output folder and creates folder if it doesnt exist
# os.chdir('output')
img_name = "output/out_{}.png".format(c)
cv2.imwrite(img_name, imOut)
Example output:
images = glob.glob('*.jpg')
for i in images:
image = cv2.imread(i,1)
#Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
...
What you are doing is: open every image one by one and when you reach the last image, apply the operations on that last image.
This can be easily fixed if you just include all your operations you want to apply on 1 image, all under the first for loop. Watch for the indentation, that's basically what you are doing wrong here.
images = glob.glob('*.jpg')
for i in images:
image = cv2.imread(i,1)
#Convert to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
#do all your operations here
Related
I have two images with given points, one point each image, that need to be aligned so that the result image is a summation of both images, while image 2 is pasted on image 1 with 40% opacity. I have taken this question into consideration but our case does not exactly match as the image co-ordinate is supplied by user and images can have wide range of sizes.
Image 1:
Image2:
Final result(desired output):
For this I have tried img.paste() function of PIL and replacing values in numpy array of images in cv2, both giving results that are far from desired.
I made two input images with ImageMagick like this:
magick -size 300x400 xc:"rgb(1,204,255)" -fill red -draw "point 280,250" 1.png
magick -size 250x80 xc:"rgb(150,203,0)" -fill red -draw "point 12,25" 2.png
Then ran the following code:
#!/usr/bin/env python3
"""
Paste one image on top of another such that given points in each are coincident.
"""
from PIL import Image
# Open images and ensure RGB
im1 = Image.open('1.png').convert('RGB')
im2 = Image.open('2.png').convert('RGB')
# x,y coordinates of point in each image
p1x, p1y = 280, 250
p2x, p2y = 12, 25
# Work out how many pixels of space we need left, right, above, below common point in new image
pL = max(p1x, p2x)
pR = max(im1.width-p1x, im2.width-p2x)
pT = max(p1y, p2y)
pB = max(im1.height-p1y, im2.height-p2y)
# Create background in solid white
bg = Image.new('RGB', (pL+pR, pT+pB),'white')
bg.save('DEBUG-bg.png')
# Paste im1 onto background
bg.paste(im1, (pL-p1x, pT-p1y))
bg.save('DEBUG-bg+im1.png')
# Make 40% opacity mask for im2
alpha = Image.new('L', (im2.width,im2.height), int(40*255/100))
alpha.save('DEBUG-alpha.png')
# Paste im2 over background with alpha
bg.paste(im2, (pL-p2x, pT-p2y), alpha)
bg.save('result.png')
The result is this:
The lines that save images with names starting "DEBUG-xxx.png" are just for easy debugging and can be removed. I can easily view them all to see what is going on with the code and I can easily delete them all by removing "DEBUG*png".
Without any more details, I will try to answer the question as best as I can and will name all the extra assumptions that I made (and how to handle them if you can't make them).
Since there were no provided images, I created a blue and green image with a black dot as merging coordinate, using the following code:
import numpy as np
from PIL import Image, ImageDraw
def create_image_with_point(name, color, x, y, width=3):
image = np.full((400, 400, 3), color, dtype=np.uint8)
image[y - width:y + width, x - width:x + width] = (0, 0, 0)
image = Image.fromarray(image, mode='RGB')
ImageDraw.Draw(image).text((x - 15, y - 20), 'Point', (0, 0, 0))
image.save(name)
return image
blue = create_image_with_point('blue.png', color=(50, 50, 255), x=300, y=100)
green = create_image_with_point('green.png', color=(50, 255, 50), x=50, y=50)
This results in the following images:
Now I will make the assumption that the images do not contain an alpha layer yet (as I created them without). Therefore I will load the image and add an alpha layer to them:
import numpy as np
from PIL import Image
blue = Image.open('blue.png')
blue.putalpha(255)
green = Image.open('green.png')
green.putalpha(255)
My following assumption is that you know the merge coordinates beforehand:
# Assuming x, y coordinates.
point_blue = (300, 100)
point_green = (50, 50)
Then you can create an empty image, that can hold both of the images easily:
new_image = np.zeros((1000, 1000, 4), dtype=np.uint8)
This is a far stretch assumption if you do not know the image size beforehand, and in case you do not know this you will have to calculate the combining size of the two images.
Then you can place the images dot in the center of the newly created images (in my case (500, 500). For this you use the merging points as offsets. And you can perform alpha blending (in any case: np.uint8(img_1*alpha + img_2*(1-alpha))) to merge the images using different opacity.
Which is in code:
def place_image(image: Image, point_xy: tuple[int, int], dest: np.ndarray, alpha: float = 1.) -> np.ndarray:
# Place the merging dot on (500, 500).
offset_x, offset_y = 500 - point_xy[0], 500 - point_xy[1]
# Calculate the location of the image and perform alpha blending.
destination = dest[offset_y:offset_y + image.height, offset_x:offset_x + image.width]
destination = np.uint8(destination * (1 - alpha) + np.array(image) * alpha)
# Copy the 'merged' imaged to the destination location.
dest[offset_y:offset_y + image.height, offset_x:offset_x + image.width] = destination
return dest
# Add the background image blue with alpha 1
new_image = place_image(blue, point_blue, dest=new_image, alpha=1)
# Add the second image with 40% opacity
new_image = place_image(green, point_green, dest=new_image, alpha=0.4)
# Store the resulting image.
image = Image.fromarray(new_image)
image.save('result.png')
The final result will be a bigger image, of the combined images, again you can calculate the correct bounding box, so you don't have these huge areas of 'nothing' sticking out. The final result will look like this:
I need to read the highest temperature on thermographic images, as shown below:
IR_1544_INFRA.jpg
IR_1546_INFRA.jpg
IR_1560_INFRA.jpg
IR_1564_INFRA.jpg
I used the following code, this was the best result.
I also tried several other ways, such as: blur, gray scale, binarization, and others but they all failed.
import cv2
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r"C:\Users\User\AppData\Local\Tesseract-OCR\tesseract.exe"
# Load image, grayscale, Otsu's threshold
entrada = cv2.imread('IR_1546_INFRA.jpg')
image = entrada[40:65, 277:319]
#image = cv2.imread('IR_1546_INFRA.jpg')
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
thresh = 255 - cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
# Blur and perform text extraction
thresh = cv2.GaussianBlur(thresh, (3,3), 0)
data = pytesseract.image_to_string(thresh, lang='eng', config='--psm 6')
print(data)
cv2.imshow('thresh', thresh)
cv2.waitKey()
In the first image, I found
this
In the second image, I found this.
The imagem layout is always the same, that is, the temperature is always in the same place, so I cropped the image to isolate only the number. I would like (97.7 here, and 85.2 here).
My code needs to find from these images to always detect this temperature and generate a list indicating from highest to lowest.
What do you indicate for me to improve the assertiveness of pytesseract in the case of these images?
Note 1: When I annalyze the entire image (without cropping), it returns data that is not even present.
Note 2: In some images even with the binary number, pytesseract (image_to_string) does not return any data.
Thank you all and sorry for the typos, writing in english is still a challenge for me.
Because you have same images, you can crop the area you want and then do processing there. The processing is also simple. Change to gray, get threshold, invert, resize, and then do the OCR. You can see it in my code below. It works on all your attached images.
import cv2
import pytesseract
import os
image_path = "temperature"
for nama_file in sorted(os.listdir(image_path)):
print(nama_file)
img = cv2.imread(os.path.join(image_path, nama_file))
crop = img[43:62, 278:319]
gray = cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)
thresh = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)[1]
thresh = cv2.bitwise_not(thresh)
double = cv2.resize(thresh, None, fx=2, fy=2)
custom_config = r'-l eng --oem 3 --psm 7 -c tessedit_char_whitelist="1234567890." '
text = pytesseract.image_to_string(double, config=custom_config)
print("detected: " + text)
cv2.imshow("img", img)
cv2.imshow("double", double)
cv2.waitKey(0)
cv2.destroyAllWindows()
I got a segmented image as entry in my program the goal is to split regions into two images one contains external contours(regions) and the other contains internal contours (regions).
Programme in python 3.7 and opencv
I try to use some morphological operations (close) and smoothing filter (median) then I apply a binary and otsu threshold and canny edge detection to get a better version of contours with the fonction find contour
In first I extrac external contours with CV2.RETR_EXTERNAL but this is what I get:
def function(image):
#pretraitement
im = cv2.imread(image,0)
_Kernel = 3
iteration__ = 5
im = Pretraitement.pretraitement.lissage_median(im, _Kernel, iteration__)
kernel = (3,3)
im = cv2.morphologyEx(im, cv2.MORPH_CLOSE,cv2.getStructuringElement(cv2.MORPH_CROSS,kernel))
high_thresh, im = cv2.threshold(im, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
lowThresh = 0.5 * high_thresh
cv2.rectangle(im, (0, 0), (im.shape[1], im.shape[0]), 0, 3)
contour = cv2.findcontours(
cv2.Canny(im.copy(), lowThresh, high_thresh),
Img_Colored_Readed.shape, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
MaskExtern = np.zeros((im.shape[0],im.shape[1],3),dtype=np.uint8)
MaskRegion = np.zeros((im.shape[0],im.shape[1],3),dtype=np.uint8)
MaskContour = = np.zeros(im.shape,dtype=np.uint8)
for i in range(len(contour)):
for j in range(len(contour)):
#to check if the contour j is inside contour i
if BoundaryBasedDescriptors.Contours.pointInContour3(contour[i],contour[j]):
pass
else:
cv2.drawContours(MaskExtern,contour, j, (0,255,255), 1)
cv2.drawContours(MaskContour,contour,i,255,1)
cv2.drawContours(MaskRegion,contour,i,(255,i*10,255-i*10),-1)
cv2.imwrite('_external.jpg', MaskExtern)
cv2.imwrite('_contour.bmp', MaskContour)
cv2.imwrite('_colore.jpg', MaskRegion)
The link to the image represent the segmented imageenter image description here
and this is what I get when I draw all contours with thickness -1enter image description here
I expect to get the rigth external contour (regions) I get some regions that are internalenter image description here
This is the error in your code:
cv2.rectangle(im, (0, 0), (im.shape[1], im.shape[0]), 0, 3)
The result is assigned to nothing, so it does nothing. If you add im = in front you'll get the behavior you're expecting.
If your purpose is to separate the internal and the external white area's, you could also try this approach. First invert the image. Then find the external outline of the black area (in original, white in inverted), which you can then use as a mask to separate the area's. If necessary you you can use the the masked interior image to find the smaller contours.
Result:
Code:
import cv2
import numpy as np
# load image as grayscale
img = cv2.imread('cSxN8.png',0)
# treshold to create binary image
tr, img = cv2.threshold(img,50,255,cv2.THRESH_BINARY)
# invert image
img_inv = cv2.bitwise_not(img)
# find external contours
contours, hier = cv2.findContours(img_inv, cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
# draw contours in gray
for cnt in contours:
cv2.drawContours(img,[cnt],0,(127),5)
# display image
cv2.imshow('Result', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
```python```
import cv2
import numpy as np
# load image as grayscale
img = cv2.imread('cSxN8.png',0)
# treshold to create binary image
tr, thresh = cv2.threshold(img,50,255,cv2.THRESH_BINARY)
img = thresh.copy()
# invert image
img_inv_ = cv2.bitwise_not(img)
#find_external_contours
_,cnt = cv2.findcontours(img_inv.copy(),cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
for i in cnt:
cv2.drawContours(img, [cnt], 0, (127), 1)
#to extract the edges of the external contour
img = cv2.bitwise_xor(img,img_inv)
#binarisation of the external contour
_, img = cv2.threshold(img,126,255,cv2.THRESH_BINARY)
#now we fill the external region
cnt = cv2.findcontours(img_prete_2,cv2.RETR_CCOPM,cv2.CHAIN_APPROX_SIMPLE)
mask_externe=np.zeros(img.shape,dtype=np.uint8)
for i in range(cnt.longeurCnt):
cv2.drawContours(mask_externe,[cnt.contour [i]],-1,255,-1)
#get the internal region
mask_internal = cv2.bitwise_xor(img_inv,mask_extern)
```
this is the complete solution (the approach is poposed by J.D.
, thanks to you)
I am developing an application to read the numbers from an image using opencv in Python 3. I first converted the image to gray scale,then Apply dilation and erosion to remove some noise, then Apply threshold to get image with only black and white, then Write the image to local disk to do some ..., then apply tesseract to recognise the number for python.
I need to extract the numbers from the image. I am new to openCV. Does anybody know any other method to get the result??
I have share the image link bellow, i was trying to extract from that image. Thanks in advance
https://drive.google.com/file/d/141y-3okLPGP_STje14ukSqSHcgtwMdRO/view?usp=sharing
import cv2
import numpy as np
import pytesseract
from PIL import Image
from pytesseract import image_to_string
# Path of working folder on Disk
src_path = "/Users/sougata.a.roy/Desktop/Images/"
def get_string(img_path):
# Read image with opencv
img = cv2.imread(img_path)
# Convert to gray
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
img = cv2.dilate(img, kernel, iterations=1)
img = cv2.erode(img, kernel, iterations=1)
# Write image after removed noise
cv2.imwrite(src_path + "removed_noise.jpg", img)
# Apply threshold to get image with only black and white
img = cv2.adaptiveThreshold(img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 31, 2)
# Write the image after apply opencv to do some ...
cv2.imwrite(src_path + 'thres.jpg', img)
# Recognize text with tesseract for python
result = pytesseract.image_to_string(Image.open(src_path + "thres.jpg"), lang='eng')
return result
print('--- Start recognize text from image ---')
print(get_string(src_path + 'abcdefg195.jpg'))
print("------ Done -------")
365
I am seriously struggling here. I'm using open cv2 and python3. tracking multiple objects of the same color this question is the exact same one I'm asking. But the pages are out of date and the links don't work anymore. I can't find anything else online about it. I can track multiple colors (red object, green object, a blue object, etc) However I cannot for the life of me figure out how to track two red objects.
# import the necessary packages
from collections import deque
import numpy as np
import argparse
import imutils
import cv2
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("-v", "--video",
help="path to the (optional) video file")
ap.add_argument("-b", "--buffer", type=int, default=64,
help="max buffer size")
args = vars(ap.parse_args())
# define the lower and upper boundaries of the "green"
# ball in the HSV color space, then initialize the
# list of tracked points
greenLower = (29, 86, 6)
greenUpper = (64, 255, 255)
pts = deque(maxlen=args["buffer"])
# if a video path was not supplied, grab the reference
# to the webcam
if not args.get("video", False):
camera = cv2.VideoCapture(0)
# otherwise, grab a reference to the video file
else:
camera = cv2.VideoCapture(args["video"])
# keep looping
while True:
# grab the current frame
(grabbed, frame) = camera.read()
# if we are viewing a video and we did not grab a frame,
# then we have reached the end of the video
if args.get("video") and not grabbed:
break
# resize the frame, blur it, and convert it to the HSV
# color space
frame = imutils.resize(frame, width=600)
# blurred = cv2.GaussianBlur(frame, (11, 11), 0)
hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)
# construct a mask for the color "green", then perform
# a series of dilations and erosions to remove any small
# blobs left in the mask
mask = cv2.inRange(hsv, greenLower, greenUpper)
mask = cv2.erode(mask, None, iterations=2)
mask = cv2.dilate(mask, None, iterations=2)
# find contours in the mask and initialize the current
# (x, y) center of the ball
cnts = cv2.findContours(mask.copy(), cv2.RETR_EXTERNAL,
cv2.CHAIN_APPROX_SIMPLE)[-2]
center = None
# only proceed if at least one contour was found
if len(cnts) > 0:
# find the largest contour in the mask, then use
# it to compute the minimum enclosing circle and
# centroid
c = max(cnts, key=cv2.contourArea)
I figured that in the line above this one that reads "c = max(cnts, key=cv2.contourArea)" I could simply find the second largest circle and use that one, but once again. I couldn't find anything online about how to do this.
((x, y), radius) = cv2.minEnclosingCircle(c)
M = cv2.moments(c)
center = (int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]))
# only proceed if the radius meets a minimum size
if radius > 10:
# draw the circle and centroid on the frame,
# then update the list of tracked points
cv2.circle(frame, (int(x), int(y)), int(radius),
(0, 255, 255), 2)
cv2.circle(frame, center, 5, (0, 0, 255), -1)
# update the points queue
pts.appendleft(center)