I am working on a image processing project where i have to perform center surround difference calculation with Earth Mover's distance(EMD) on multiscale level but the problem is that i can't figure it out how center surround difference works and how could i use EMD for it.
I found the python function for EMD but it works with 2 source image histograms whereas in my problem i have only one source.
I am generating multi scales of the image using skimage's pyramid_gaussian function using solution provided on
link: https://gist.github.com/duhaime/211365edaddf7ff89c0a36d9f3f7956c
I tried:
def get_img(path, norm_size=True, norm_exposure=False):
img = imread(path, flatten=True).astype(int)
if norm_size:
img = resize(img, (height, width), anti_aliasing=True, preserve_range=True)
if norm_exposure:
img = normalize_exposure(img)
return img
def get_histogram(img):
h, w = img.shape
hist = [0.0] * 256
for i in range(h):
for j in range(w):
hist[img[i, j]] += 1
return np.array(hist) / (h * w)
def normalize_exposure(img):
img = img.astype(int)
hist = get_histogram(img)
cdf = np.array([sum(hist[:i+1]) for i in range(len(hist))]) # get the sum of vals accumulated by each position in hist
sk = np.uint8(255 * cdf) # determine the normalization values for each unit of the cdf
height, width = img.shape # normalize each position in the output image
normalized = np.zeros_like(img)
for i in range(0, height):
for j in range(0, width):
normalized[i, j] = sk[img[i, j]]
return normalized.astype(int)
def earth_movers_distance(path_a, path_b):
img_a = get_img(path_a, norm_exposure=True)
img_b = get_img(path_b, norm_exposure=True)
hist_a = get_histogram(img_a)
hist_b = get_histogram(img_b)
return wasserstein_distance(hist_a, hist_b)
if __name__ == '__main__':
image = cv2.imread("images/test3.jpg")
pyramidlist=[]
dst = []
for (i, resized) in enumerate(pyramid_gaussian(image, downscale=1.4)):
if resized.shape[0] < 30 or resized.shape[1] < 30:
break
cv2.imshow(f"Layer {i+1}", resized)
cv2.waitKey(0)
pyramidlist.append(resized[i])
print(pyramidlist)
print(len(pyramidlist))
cv2.destroyAllWindows()
but don't know how to use EMD after generating pyramids and calculate center surround difference.
Related
I have the following code, which runs well under Visual Studio Code with python 3.9.10, opencv 4.5.5 and numpy 1.22.1.
I would like to migrate this code into the Spyder IDE (Version 5, another notebook), python 3.8, opencv 4.5.1 and numpy 1.22.2.
In spyder, I get the error message TypeError: only integer scalar arrays can be converted a scalar index in line: output_layers = [layer_names[i-1]...] (marked line down in the code section)
I have already checked other answers on this site such as
TypeError when indexing a list with a NumPy array: only integer scalar arrays can be converted to a scalar index
which suggests list comprehension, but in my understanding I am already implemented this.
What is the reason for running currectly in on environment but not in the other?
import cv2
import numpy as np
def get_output_layers(net):
layer_names = net.getLayerNames()
output_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]
return output_layers
def draw_prediction(img, class_id, confidence, x, y, x_plus_w, y_plus_h):
label = str(classes[class_id])
color = COLORS[class_id]
cv2.rectangle(img, (x,y), (x_plus_w,y_plus_h), color, 2)
cv2.putText(img, label, (x-10,y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
image = cv2.imread('horses.jpg')
Width = image.shape[1]
Height = image.shape[0]
scale = 0.00392
classes = None
with open(r'yolov3.txt', 'r') as f:
classes = [line.strip() for line in f.readlines()]
COLORS = np.random.uniform(0, 255, size=(len(classes), 3))
net = cv2.dnn.readNet('yolov3.weights','yolov3.cfg')
blob = cv2.dnn.blobFromImage(image, scale, (416,416), (0,0,0), True, crop=False)
net.setInput(blob)
outs = net.forward(get_output_layers(net))
class_ids = []
confidences = []
boxes = []
conf_threshold = 0.5
nms_threshold = 0.4
for out in outs:
for detection in out:
scores = detection[5:]
class_id = np.argmax(scores)
confidence = scores[class_id]
if confidence > 0.5:
center_x = int(detection[0] * Width)
center_y = int(detection[1] * Height)
w = int(detection[2] * Width)
h = int(detection[3] * Height)
x = center_x - w / 2
y = center_y - h / 2
class_ids.append(class_id)
confidences.append(float(confidence))
boxes.append([x, y, w, h])
indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
for i in indices:
box = boxes[i]
x = box[0]
y = box[1]
w = box[2]
h = box[3]
draw_prediction(image, class_ids[i], confidences[i], round(x), round(y),
round(x+w), round(y+h))
cv2.imshow("object detection", image)
cv2.waitKey()
cv2.imwrite("object-detection.jpg", image)
cv2.destroyAllWindows()
there were subtle, recent api changes wrt handling std::vector in python
(4.5.1 still expects a 2d array, but it's 1d in 4.5.5)
to avoid the whole trouble, please simply use:
output_layers = net.getUnconnectedOutLayersNames()
(like it is done in the sample)
I try to align measuring images according to the contour of the parts. Unfortunately, the surrounding particles are often considered too aligned and I get wrong results.
Here is the basic openCV code iam using. Maybe i have to filter the particels somehow and use the wrap matrix on the original image afterwards.
im1 = cv2.imread(im1Conv)
im2 = cv2.imread(im2Conv)
# Convert images to grayscale
im1 = cv2.cvtColor(im1,cv2.COLOR_BGR2GRAY)
im2 = cv2.cvtColor(im2,cv2.COLOR_BGR2GRAY)
# percent of original size
width = int(im1.shape[1] * scale_percent / 100)
height = int(im1.shape[0] * scale_percent / 100)
dim1 = (width, height)
# percent of original size
width = int(im2.shape[1] * scale_percent / 100)
height = int(im2.shape[0] * scale_percent / 100)
dim2 = (width, height)
# resize image
im1 = cv2.resize(im1, dim1, interpolation = cv2.INTER_AREA)
im2 = cv2.resize(im2, dim2, interpolation = cv2.INTER_AREA)
# Find size of image1
sz = im1.shape
# Define the motion model
if convMode != "down":
warp_mode = cv2.MOTION_EUCLIDEAN
else:
warp_mode = cv2.MOTION_HOMOGRAPHY
# Define 2x3 or 3x3 matrices and initialize the matrix to identity
if warp_mode == cv2.MOTION_HOMOGRAPHY:
warp_matrix = np.eye(3, 3, dtype=np.float32)
else:
warp_matrix = np.eye(2, 3, dtype=np.float32)
# Specify the number of iterations.
number_of_iterations = int(iteFromUi);
# Specify the threshold of the increment
# in the correlation coefficient between two iterations
termination_eps = float(koreFromUi);
# Define termination criteria
criteria = (cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, number_of_iterations, termination_eps)
# Run the ECC algorithm. The results are stored in warp_matrix.
(cc, warp_matrix) = cv2.findTransformECC (im1,im2,warp_matrix, warp_mode, criteria)
if warp_mode == cv2.MOTION_HOMOGRAPHY :
# Use warpPerspective for Homography
im2_aligned = cv2.warpPerspective (im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP)
else :
# Use warpAffine for Translation, Euclidean and Affine
im2_aligned = cv2.warpAffine(im2, warp_matrix, (sz[1],sz[0]), flags=cv2.INTER_LINEAR + cv2.WARP_INVERSE_MAP);
Does anyone have an idea how I can solve this problem?
Unfortunately I cannot provide the images to be analysed. They look something like this:
Same result with feature matching (Sift):
Wrong alignment:
Correct alignment:
I have a thermal image (with a color bar) from an IR camera. My goal is to get the temperature of any point by clicking on it.
I have already written a script that retrieves the RBG values of any pixel by right-clicking on it.
I figure that using the max and min temperatures of the color bar, I can map pixel values to temperature values.
Is this possible or is there a better way to approach this?
Thank you very much.
from PIL import Image
import cv2
from win32api import GetSystemMetrics
counter = 0
max_value = input('Max Temp Value: ')
min_value = input('Min Temp Value: ')
def mouse_callback(event, x, y, flags, params): # Tracks the pixel the mouse it hovering on. When right click it prints the pixel location and its RBG values.
global counter
if event == 2:
counter += 1
r, g, b = rgb_img.getpixel((x, y))
print(f'{counter}: {[x, y]} value {r} {g} {b}')
else:
print([x, y], end='\t\r', flush=True)
path_image = 'colors.jpg'
img = cv2.imread(path_image)
im = Image.open(path_image)
rgb_img = im.convert('RGB')
width = GetSystemMetrics(0)
height = GetSystemMetrics(1)
scale_width = width / im.size[0]
scale_height = height / im.size[1]
scale = min(scale_width, scale_height)
window_width = int((im.size[0] * scale) * 0.5)
window_height = int((im.size[1] * scale) * 0.5)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.resizeWindow('image', window_width, window_height)
cv2.setMouseCallback('image', mouse_callback)
cv2.imshow('image', img)
cv2.waitKey(0)
cv2.destroyAllWindows()
I'm currently trying to create a detector of aphids (green and rose) on plants but only using "classic" image processing technique (no neural network).
Here are an image I'm working on:
'aphids.jpg'
I'm working on a code (see below). If you apply it on the image you should have the plants alone. My problem is that I want to isolate the aphids that can be seen on the plants. There are a lot of them but I just want to detect the biggest or the more obvious.
On the code there is an "edges_detect" function I'm currently working on. One of the problem I have is that I can detect some of the aphids as contour but it will also take simple lines...
I tried to drop those line using the hierarchy of contour but it seems those line have inner contour so I can't easily delete them.
I also tried the adjust_gamma and contrast, but it doesn't give that much result.
I'm looking for more ideas. What would you try ?
Thank you in advance !
Here is the code:
import cv2
import numpy as np
import matplotlib.pyplot as plt
def adjust_gamma(image, gamma=1.0):
# build a lookup table mapping the pixel values [0, 255] to
# their adjusted gamma values
invGamma = 1.0 / gamma
table = np.array([((i / 255.0) ** invGamma) * 255
for i in np.arange(0, 256)]).astype("uint8")
# apply gamma correction using the lookup table
return cv2.LUT(image, table)
def adjust_contrast(image,alpha=1.0,beta=0):
new = np.zeros(image.shape,image.dtype)
for y in range(image.shape[0]):
for x in range(image.shape[1]):
for c in range(image.shape[2]):
new[y,x,c] = np.clip(alpha*image[y,x,c]+beta,0,255)
return(new)
def img_process(img):
(h1, w1) = img.shape[:2]
center = (w1 / 2, h1 / 2)
blur = cv2.GaussianBlur(img.copy(),(5,5),0)
hsv = cv2.cvtColor(blur,cv2.COLOR_BGR2HSV)
#image = img.copy()
#Boundaries to separate plants from the image
l_bound = np.array([20,0,0])
h_bound = np.array([90,250,170])#green
mask = cv2.inRange(hsv,l_bound,h_bound)
res = cv2.bitwise_and(img,img,mask=mask)
#Find contour plants
cnt,_ = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
sort_cnt = sorted(cnt,key=cv2.contourArea,reverse=True)
cnt = [sort_cnt[i] for i in range(len(sort_cnt)) if cv2.contourArea(sort_cnt[i])>300]
cv2.drawContours(res, cnt, -1, (0,255,0), -1)
#Inverse mask to have only the plant in the image
mask2 = cv2.inRange(res,np.array([0,0,0]),np.array([250,250,250]))
mask2 = cv2.bitwise_not(mask2)
res2 = cv2.bitwise_and(img,img,mask=mask2)
#Augment bright/contrast
res2=res2*1.45
res2=res2.astype('uint8')
#Crop
res2 = res2[:-50,int(center[0]-300):int(center[0]+550)]
return res2
def edge_detec(img):
(h1, w1) = img.shape[:2]
center = (w1 / 2, h1 / 2)
blur = cv2.GaussianBlur(img.copy(),(5,5),0)
gray = cv2.cvtColor(blur,cv2.COLOR_BGR2GRAY)
edges = cv2.Canny(gray,30,70,apertureSize = 3)
edges = edges[:-50,int(center[0]-300):int(center[0]+550)]
#kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE,(3,3))
#edges = cv2.morphologyEx(edges, cv2.MORPH_GRADIENT, kernel)
cnt,hierarchy = cv2.findContours(edges,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_SIMPLE)
cnt = sorted(cnt,key=cv2.contourArea,reverse=True)
listArea = list(map(cv2.contourArea,cnt))
sort_cnt = [x for x in cnt if cv2.contourArea(x)>10]
cv2.drawContours(edges, sort_cnt, -1, (0,255,0), -1)
return edges,center,img
### Debut programme
img = cv2.imread('051.jpg')
while True:
##Put processing function here
img_mod = img_process(img)
cv2.imshow('img',img_mod)
if cv2.waitKey(1) & 0xFF == 27:
break
cv2.destroyAllWindows()
Hello I'm trying to recognize text from Image using Tesseract but unable to get result.
I'm using EAST technique to detect text. I've one more question how can I extend padding of the box. cv2.putText does not work in this case.
original code for text detection: https://github.com/opencv/opencv/blob/master/samples/dnn/text_detection.cpp
import cv2
import numpy as np
import argparse
import time
import math
import matplotlib.pyplot as plt
import skimage.io as io
import os
from imutils.object_detection import non_max_suppression
import pytesseract
print(np.__version__)
def decode_predictions(scores, geometry):
**# grab the number of rows and columns from the scores volume, then
# initialize our set of bounding box rectangles and corresponding
# confidence scores**
(numRows, numCols) = scores.shape[2:4]
boxes = []
confidences = []
**# loop over the number of rows**
for y in range(0, numRows):
**# extract the scores (probabilities), followed by the geometrical
# data used to derive potential bounding box coordinates that
# surround text**
scoresData = scores[0, 0, y]
xData0 = geometry[0, 0, y]
xData1 = geometry[0, 1, y]
xData2 = geometry[0, 2, y]
xData3 = geometry[0, 3, y]
anglesData = geometry[0, 4, y]
**# loop over the number of columns**
for x in range(0, numCols):
**# if our score does not have sufficient probability, ignore it**
if scoresData[x] < args["min_confidence"]:
continue
**# compute the offset factor as our resulting feature maps will
# be 4x smaller than the input image**
(offsetX, offsetY) = (x * 4.0, y * 4.0)
**# extract the rotation angle for the prediction and then
# compute the sin and cosine**
angle = anglesData[x]
cos = np.cos(angle)
sin = np.sin(angle)
**# use the geometry volume to derive the width and height of
# the bounding box**
h = xData0[x] + xData2[x]
w = xData1[x] + xData3[x]
**# compute the rotated rect for
# the text prediction bounding box**
offset = (offsetX + (cos * xData1[x]) + (sin * xData2[x]), offsetY - (sin * xData1[x]) + (cos * xData2[x]))
p1 = (-sin * h + offset[0], -cos * h + offset[1])
p3 = (-cos * w + offset[0], sin * w + offset[1])
center = (0.5*(p1[0]+p3[0]), 0.5*(p1[1]+p3[1]))
**# add the bounding box coordinates and probability score to
# our respective lists**
boxes.append((center, (w,h), -angle * 180.0 / math.pi))
confidences.append(float(scoresData[x]))
return (boxes, confidences)
args = {
"image":"C:\\Users\\ckunwar\\Test_Images\\licence_plate1\\52.jpg",
"east": "frozen_east_text_detection.pb",
"min_confidence":0.25,
"nms_thresh": 0.24,
"width":480,
"height":320,
"padding":0.0
}
**# load the input image and grab the image dimensions**
image = cv2.imread(args["image"])
orig = image.copy()
(H, W) = image.shape[:2]
#print(H,W)
**# set the new width and height and then determine the ratio in change
# for both the width and height**
(newW, newH) = (args["width"], args["height"])
rW = W / float(newW)
rH = H / float(newH)
**# resize the image and grab the new image dimensions**
image = cv2.resize(image, (newW, newH))
(H, W) = image.shape[:2]
**# define the two output layer names for the EAST detector model that
# we are interested -- the first is the output probabilities and the
# second can be used to derive the bounding box coordinates of text**
layerNames = ["feature_fusion/Conv_7/Sigmoid","feature_fusion/concat_3"]
**# load the pre-trained EAST text detector**
print("[INFO] loading EAST text detector...")
net = cv2.dnn.readNet(args["east"])
**# construct a blob from the image and then perform a forward pass of
# the model to obtain the two output layer sets**
blob = cv2.dnn.blobFromImage(image, 1.0, (W, H),(123.68, 116.78, 103.94), swapRB=True, crop=False)
start = time.time()
net.setInput(blob)
(scores, geometry) = net.forward(layerNames)
end = time.time()
**# show timing information on text prediction**
print("[INFO] text detection took {:.6f} seconds".format(end - start))
(boxes, confidences) = decode_predictions(scores, geometry)
**# apply non-maxima suppression to suppress weak, overlapping bounding boxes**
indices = cv2.dnn.NMSBoxesRotated(boxes, confidences, args["min_confidence"], args["nms_thresh"])
results = []
**# loop over the bounding boxes**
for i in indices:
**# get 4 corners of the rotated rect**
vertices = cv2.boxPoints(boxes[i[0]])
**# scale the bounding box coordinates based on the respective ratios**
for j in [0,1,2,3]:
vertices[j][0] *= rW
vertices[j][1] *= rH
**# draw the bounding box on the image**
for j in [0,1,2,3]:
p1 = (vertices[j][0], vertices[j][1])
p2 = (vertices[(j + 1) % 4][0], vertices[(j + 1) % 4][1])
config = ("-l eng --oem 3 --psm 11")
text = pytesseract.image_to_string(orig,config=config)
results.append(((p1,p2), text))
results = sorted(results, key=lambda r:r[0][1])
output = orig.copy()
for ((p1,p2), text) in results:
print("OCR TEXT")
print("========")
print("{}\n".format(text))
text = "".join([c if ord(c) < 128 else "" for c in text]).strip()
cv2.line(output, p1, p2, (0, 255, 0), 2)
#cv2.rectangle(output, p1, p2,(0, 255, 0), 2)
cv2.putText(output, text,cv2.FONT_HERSHEY_TRIPLEX, 0.8, (0, 0, 255), 2)
**# show the output image**
#orig = cv2.cvtColor(orig, cv2.COLOR_BGR2RGB)
cv2.imshow("Text Detection", output)
cv2.waitKey(0)