I'm playing around with Vision Programming Interface (VPI) and trying to bend images. I came across this Lens Distortion Correction example (https://docs.nvidia.com/vpi/algo_ldc.html) and added some code so it takes an input image and shows the undistorted output image. The following code runs fine and I'm able to view the output image.
I'd like to run it in a loop for a video input. As soon as I uncomment the "videoCapture" line, I get the following error:
"Segmentation fault (core dumped).
Anyone able to help me use this code for video input?
import vpi
import numpy as np
import cv2
import PIL
from PIL import Image
img = cv2.imread('input.jpeg')
#cap = cv2.VideoCapture(0)
vpi_image = vpi.asimage(np.asarray(img))
grid = vpi.WarpGrid((2064,1544))
sensorWidth = 7.12
focallength = 3.5
f = focallength * (2064 / sensorWidth)
K = [[f, 0, 2064/2],
[0, f, 1544/2]]
X = np.eye(3,4)
warp = vpi.WarpMap.fisheye_correction(grid, K=K, X=X,
mapping=vpi.FisheyeMapping.EQUIDISTANT,
coeffs=[-0.01, 0.22])
with vpi.Backend.CUDA:
output = vpi_image.remap(warp, interp=vpi.Interp.CATMULL_ROM, border=vpi.Border.ZERO)
with output.rlock():
output = Image.fromarray(output.cpu()).save('output.jpeg')
pil_image = PIL.Image.open('output.jpeg').convert('RGB')
cv2_image = np.array(pil_image)
cv2_image = cv2_image[:, :, ::-1].copy()
cv2_image = cv2.resize(cv2_image, (920,590))
img = cv2.resize(img, (920, 590))
sbs = cv2.hconcat([img, cv2_image])
cv2.imshow("sbs", sbs)
cv2.waitKey(0)
Related
In this Python script, First I increase the image Resolution for better accuracy of OCR.
Second, I apply many filters to that image to increase contrast and text clarity.
Third, I fetch text from filtered image using Tesseract But the issue is Tesseract doesn't fetch data clearly.
And also tried to add page segmentation modes and OCR Engine modes on the pytesseract but did not receive the expected output.
This is my Code...
import os,argparse
import pytesseract
from pytesseract import Output
import csv
import numpy as np
from PIL import Image
image = cv2.imread('dota.jpg')
height = 9000
width = 16000
dimensions = (width, height)
image = cv2.resize(image,dimensions, interpolation=cv2.INTER_LINEAR)
def get_grayscale(image):
return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
def adaptiveThreshold(image):
return cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,11,2)
gray = get_grayscale(image)
sharpenKernel = np.array(([[2, -1, 2], [-1, 9, -1], [2, -1, 2]]), np.float32)/9
sharpen = cv2.filter2D(src=gray, kernel=sharpenKernel, ddepth=-1)
adthresh = adaptiveThreshold(sharpen)
cong = '--psm 6'
final = pytesseract.image_to_data(adthresh,output_type=Output.DICT,config=cong,lang='eng')
print(final['text'])
Also, this code is available on Github.
: https://github.com/Bhavin-Prydan/Dota-Esportz/blob/main/dota.py
I'm trying to learn opencv. Online i found that, with opencv, I can obtain the contours of some image. So i tried that. Here is the script:
import cv2
import numpy as np
def getC(imagine):
global imgContour
c,h = cv2.findContours(imagine,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
for cnt in c:
a = cv2.contourArea(cnt)
print(area)
if area>500:
cv2.drawContour(imgContour,cnt,-1,(255,0,0),3)
img = cv2.imread("a3.jpg")
imgContour = img.copy()
imgG = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgB = cv2.GaussianBlur(imgG,(7,7),1)
imgC = cv2.Canny(imgB,50,50)
getC(imgContour)
cv2.imshow("",img)
cv2.imshow("g",imgG)
cv2.imshow("b",imgB)
cv2.imshow("l",imgContour)
cv2.waitKey(0)
I think there is a problem with global variabiles, but also with the format. a3.jpg is that image.
I don't now what to do now, and how to resolve the issue.
Thanks for the help
you saved the area as the variable a but used it with the name area you can fix this by changing the variable name a to area
area = cv2.contourArea(cnt)
there is a typo in cv2.drawContour you should write it like that cv2.drawContours
cv2.drawContours method expects the contour you want to draw to be a list of lists so you need to call it like that
cv2.drawContours(imgContour,[cnt],-1,(255,0,0),3)
when you pass the image to the getC method you gave it an image without pre-processing this image and converting it to threshold image using canny so you need to call it like that
getC(imgC)
The Final Script
import cv2
import numpy as np
def getC(imagine):
global imgContour
print(imgContour.shape)
c,h = cv2.findContours(imagine,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
for cnt in c:
area = cv2.contourArea(cnt)
print(area)
if area>500:
cv2.drawContours(imgContour,[cnt],-1,(255,0,0),3)
img = cv2.imread("./a3.jpg")
imgContour = img.copy()
imgG = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
imgB = cv2.GaussianBlur(imgG,(7,7),1)
imgC = cv2.Canny(imgB,50,50)
getC(imgC)
cv2.imshow("",img)
cv2.imshow("g",imgG)
cv2.imshow("b",imgB)
cv2.imshow("l",imgContour)
cv2.waitKey(0)
I am using the code snippet given in here to compute HOG feature of a small image patch. However for the code attached herewith, the variable h, which is supposed to hold HOG feature values, is returning an empty tuple instead. Can anyone please point me to where am I going wrong in the code?
import numpy as np
import cv2
img = cv2.imread('newimg.jpg')
img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img_gray = cv2.resize(img_gray,(50,50))
hog = cv2.HOGDescriptor()
h = hog.compute(img_gray)
print(h)
The test image is attached
I think there is problem regarding the image size. More specifically image size is smaller as compare to the default window size of hog Descriptor. I recommend resize your image and then use the code snippet attached below.
winSize = (32,32)
blockSize = (16,16)
blockStride = (8,8)
cellSize = (8,8)
nbins = 9
hog = cv2.HOGDescriptor(winSize, blockSize, blockStride, cellSize, nbins)
h = hog.compute(img_gray)
print(h)
I'm using Opencv to do some morphological operations on an image:
but it joins some of the letters together creating problems when I detect it's contours. For example:
Is there some tweaking I can do wih my code to fix this or will I have to do this a different way?(but it has to be a closing algorithm or function because it is pretty helpful in preprocessing).
My code I am using is as below:
kernel = np.ones((5,5),np.uint8)
opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
Here is workable solution:
import numpy as np
import cv2
from matplotlib import pyplot as plt
I = cv2.imread('/home/smile/Downloads/words.jpg',cv2.IMREAD_GRAYSCALE)
_,It = cv2.threshold(I,0.,255,cv2.THRESH_OTSU)
It = cv2.bitwise_not(It)
_,labels = cv2.connectedComponents(I)
result = np.zeros((I.shape[0],I.shape[1],3),np.uint8)
for i in range(labels.min(),labels.max()+1):
mask = cv2.compare(labels,i,cv2.CMP_EQ)
_,ctrs,_ = cv2.findContours(mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
result = cv2.drawContours(result,ctrs,-1,(0xFF,0,0))
plt.figure()
plt.imshow(result)
plt.axis('off')
During the two first steps the image is binarize and reverse in order to make the letters appear has white over black.
_,It = cv2.threshold(I,0.,255,cv2.THRESH_OTSU)
It = cv2.bitwise_not(It)
Then during the next step each letter become a labelized region.
_,labels = cv2.connectedComponents(I)
The final step consist for each label value to find the area in the image that correspond to it, process the external contour of that area and "draw" it in the output image.
result = np.zeros((I.shape[0],I.shape[1],3),np.uint8)
for i in range(labels.min(),labels.max()+1):
mask = cv2.compare(labels,i,cv2.CMP_EQ)
_,ctrs,_ = cv2.findContours(mask,cv2.RETR_EXTERNAL,cv2.CHAIN_APPROX_NONE)
result = cv2.drawContours(result,ctrs,-1,(0xFF,0,0)
Hope it helps.
tf.image.decode_jpeg() function of Tensorflow gives different numerical result than scipy.misc.imread() for jpg images. While the images look similar, pixel values are different.
import numpy as np
import scipy
import tensorflow as tf
import matplotlib.pyplot as plt
def minimal_example():
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
tffilename = 'astronaut.tfrecords'
writer = tf.python_io.TFRecordWriter(tffilename)
#image_source = 'https://upload.wikimedia.org/wikipedia/commons/8/88/Astronaut-EVA.jpg'
image_path = 'astronaut.jpg'
image_file = open(image_path,'rb')
image = image_file.read()
image_scipy = scipy.misc.imread(image_path)
example = tf.train.Example(features=tf.train.Features(feature={'image':_bytes_feature(image)}))
writer.write(example.SerializeToString())
writer.close()
record_iterator = tf.python_io.tf_record_iterator(path=tffilename)
example = tf.train.Example()
example.ParseFromString(next(record_iterator))
image = example.features.feature['image'].bytes_list.value[0]
image_tf = tf.image.decode_jpeg(image).eval(session=tf.Session())
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.imshow(image_scipy)
ax2.imshow(image_tf)
print('Reconstruction Error', np.sum(np.abs(image_tf - image_scipy)))
plt.show()
result:
Reconstruction Error 3420883624
Is this a bug or am I doing something wrong?
The discrepancy arises because of inaccurate, but fast, default Discrete Cosine Tranform used by Tensorflow
According to the Source code
// The TensorFlow-chosen default for jpeg decoding is IFAST, sacrificing
// image quality for speed.
flags_.dct_method = JDCT_IFAST;
In order to get accurate decoding one can set the attribute dct_method = 'INTEGER_ACCURATE' as show in example below
def minimal_example():
#image_source = 'https://upload.wikimedia.org/wikipedia/commons/8/88/Astronaut-EVA.jpg'
image_path = 'astronaut.jpg'
image_file = open(image_path,'rb')
image_raw = image_file.read()
image_scipy = scipy.misc.imread(image_path)
image_tf = tf.image.decode_jpeg(image_raw).eval(session=tf.Session())
image_tf_accurate = tf.image.decode_jpeg(image_raw,dct_method="INTEGER_ACCURATE").eval(session=tf.Session())
print('Error For Default: ', np.sum(np.abs(image_tf - image_scipy)))
print('Error For Accurate: ', np.sum(np.abs(image_tf_accurate - image_scipy)))
#Error For Default: 3420883624
#Error For Accurate: 0
The JPEG standard does not require bit-to-bit identical decoding. So, some variations are expected between different implementations.
However, it still requires
a maximum 1 bit of difference for each pixel component.
So both outputs should not be apart further than one. Right?
print('max diff: ', np.max(np.abs(image_tf.astype(float) - image_scipy.astype(float))))
# max diff: 17.0
Ouch, at least one implementation does not follow the standard...