tf.image.decode_jpeg() function of Tensorflow gives different numerical result than scipy.misc.imread() for jpg images. While the images look similar, pixel values are different.
import numpy as np
import scipy
import tensorflow as tf
import matplotlib.pyplot as plt
def minimal_example():
def _bytes_feature(value):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
tffilename = 'astronaut.tfrecords'
writer = tf.python_io.TFRecordWriter(tffilename)
#image_source = 'https://upload.wikimedia.org/wikipedia/commons/8/88/Astronaut-EVA.jpg'
image_path = 'astronaut.jpg'
image_file = open(image_path,'rb')
image = image_file.read()
image_scipy = scipy.misc.imread(image_path)
example = tf.train.Example(features=tf.train.Features(feature={'image':_bytes_feature(image)}))
writer.write(example.SerializeToString())
writer.close()
record_iterator = tf.python_io.tf_record_iterator(path=tffilename)
example = tf.train.Example()
example.ParseFromString(next(record_iterator))
image = example.features.feature['image'].bytes_list.value[0]
image_tf = tf.image.decode_jpeg(image).eval(session=tf.Session())
fig = plt.figure()
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)
ax1.imshow(image_scipy)
ax2.imshow(image_tf)
print('Reconstruction Error', np.sum(np.abs(image_tf - image_scipy)))
plt.show()
result:
Reconstruction Error 3420883624
Is this a bug or am I doing something wrong?
The discrepancy arises because of inaccurate, but fast, default Discrete Cosine Tranform used by Tensorflow
According to the Source code
// The TensorFlow-chosen default for jpeg decoding is IFAST, sacrificing
// image quality for speed.
flags_.dct_method = JDCT_IFAST;
In order to get accurate decoding one can set the attribute dct_method = 'INTEGER_ACCURATE' as show in example below
def minimal_example():
#image_source = 'https://upload.wikimedia.org/wikipedia/commons/8/88/Astronaut-EVA.jpg'
image_path = 'astronaut.jpg'
image_file = open(image_path,'rb')
image_raw = image_file.read()
image_scipy = scipy.misc.imread(image_path)
image_tf = tf.image.decode_jpeg(image_raw).eval(session=tf.Session())
image_tf_accurate = tf.image.decode_jpeg(image_raw,dct_method="INTEGER_ACCURATE").eval(session=tf.Session())
print('Error For Default: ', np.sum(np.abs(image_tf - image_scipy)))
print('Error For Accurate: ', np.sum(np.abs(image_tf_accurate - image_scipy)))
#Error For Default: 3420883624
#Error For Accurate: 0
The JPEG standard does not require bit-to-bit identical decoding. So, some variations are expected between different implementations.
However, it still requires
a maximum 1 bit of difference for each pixel component.
So both outputs should not be apart further than one. Right?
print('max diff: ', np.max(np.abs(image_tf.astype(float) - image_scipy.astype(float))))
# max diff: 17.0
Ouch, at least one implementation does not follow the standard...
Related
I'm playing around with Vision Programming Interface (VPI) and trying to bend images. I came across this Lens Distortion Correction example (https://docs.nvidia.com/vpi/algo_ldc.html) and added some code so it takes an input image and shows the undistorted output image. The following code runs fine and I'm able to view the output image.
I'd like to run it in a loop for a video input. As soon as I uncomment the "videoCapture" line, I get the following error:
"Segmentation fault (core dumped).
Anyone able to help me use this code for video input?
import vpi
import numpy as np
import cv2
import PIL
from PIL import Image
img = cv2.imread('input.jpeg')
#cap = cv2.VideoCapture(0)
vpi_image = vpi.asimage(np.asarray(img))
grid = vpi.WarpGrid((2064,1544))
sensorWidth = 7.12
focallength = 3.5
f = focallength * (2064 / sensorWidth)
K = [[f, 0, 2064/2],
[0, f, 1544/2]]
X = np.eye(3,4)
warp = vpi.WarpMap.fisheye_correction(grid, K=K, X=X,
mapping=vpi.FisheyeMapping.EQUIDISTANT,
coeffs=[-0.01, 0.22])
with vpi.Backend.CUDA:
output = vpi_image.remap(warp, interp=vpi.Interp.CATMULL_ROM, border=vpi.Border.ZERO)
with output.rlock():
output = Image.fromarray(output.cpu()).save('output.jpeg')
pil_image = PIL.Image.open('output.jpeg').convert('RGB')
cv2_image = np.array(pil_image)
cv2_image = cv2_image[:, :, ::-1].copy()
cv2_image = cv2.resize(cv2_image, (920,590))
img = cv2.resize(img, (920, 590))
sbs = cv2.hconcat([img, cv2_image])
cv2.imshow("sbs", sbs)
cv2.waitKey(0)
can someone help me on how to increase the size of images from feature map extracted? i recently run CNN on set of images and would like to see the feature extracted. I manage to extract it but unable to actually see it because it was too small.
My code:
from matplotlib import pyplot
#summarize feature map shapes
for i in range(len(cnn.layers)):
layer = cnn.layers[i]
#check fr conv layer
if 'conv' not in layer.name:
continue
print(i, layer.name,layer.output.shape)
from keras import models
from keras.preprocessing import image
model_new = models.Model(inputs=cnn.inputs, outputs=cnn.layers[1].output)
img_path = 'train/1/2NbeGPsQf2Q - 4 0.jpg'
img = image.load_img(img_path, target_size=(img_rows, img_cols))
import numpy as np
from keras.applications.imagenet_utils import decode_predictions, preprocess_input
img = image.img_to_array(img)
img = np.expand_dims(img, axis=0)
img = preprocess_input(img)
features = model_new.predict(img)
square = 10
ix = 1
for _ in range(square):
for _ in range(square):
# specify subplot and turn of axis
ax = pyplot.subplot(square, square, ix)
ax.set_xticks([])
ax.set_yticks([])
# plot filter channel in colour
pyplot.imshow(features[0, :, :, ix-1], cmap='viridis')
ix += 1
# show the figure
pyplot.show()
the result is at attached.output of feature map layer 1
its too small. How can i make it bigger so i can see what actually is there?
Appreciate for any input. Thanks!
I use miniconda jupyter notebook python and I'm trying to implement a machine (Audio filtering). I got this error and I really don't know how to fix it.
Here I imported libraries that I need with the path of the file:
import wave as we
import numpy as np
import matplotlib.pyplot as plt
dir = r'/home/pc/Downloads/Bubble audios'
Here the fuction that should plot the graph:
def read_wav(wavfile, plots=True, normal=False):
f = wavfile
params = f.getparams()
# print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
strData = f.readframes(nframes) # , string format
waveData = np.frombuffer(strData, dtype=np.int16) # Convert a string to an int
# wave amplitude normalization
if normal == True:
waveData = waveData*1.0/(max(abs(waveData)))
#
if plots == True:
time = np.arange(0, nframes ,dtype=np.int16) *(1.0 / framerate)
plt.figure(dpi=100)
plt.plot(time, waveData)
plt.xlabel("Time")
plt.ylabel("Amplitude")
plt.title("Single channel wavedata")
plt.show()
return (Wave, time)
def fft_wav(waveData, plots=True):
f_array = np.fft.fft(waveData) # Fourier transform, the result is a complex array
f_abs = f_array
axis_f = np.linspace(0, 250, np.int(len(f_array)/2)) # map to 250
# axis_f = np.linspace(0, 250, np.int(len(f_array))) # map to 250
if plots == True:
plt.figure(dpi=100)
plt.plot(axis_f, np.abs(f_abs[0:len(axis_f)]))
# plt.plot(axis_f, np.abs(f_abs))
plt.xlabel("Frequency")
plt.ylabel("Amplitude spectrum")
plt.title("Tile map")
plt.show()
return f_abs
And here I call the function with the file that I want to be read and plotted.
f = we.open(dir+r'/Ars1_Aufnahme.wav', 'rb')
Wave, time = read_wav(f)
The error that I got:
ValueError: x and y must have same first dimension, but have shapes (2140699,) and (4281398,)
I tried to use np.reshape but it didn't work or I might have used it wrong. So, any advice?
it's seems that your time is 1/2 of the size of your wave. Maybe your nframe is too short. If you do nframses = 2*nframes what is the error ?
I have two images (channel 1 and channel 2) and I'm trying to compute the polynomial transform that warps one image into the other image. First, I created an ORB object and computed the affine transform between the two images (post-affine). Then I decided to try to use skimage.transform.PolynomialTransform. However, when I try to compute the transform, the returned NumPy array has either NaN values or 0 values, even though the original image had a non-zero float value at that location (post-polynomial). What am I doing wrong? Code included below, images in following link. https://drive.google.com/drive/folders/1mWxUvLFLK5-rYCrxs3-uGKFxKq2wXDjS?usp=sharing Thanks in advance!
Note: I know that the question Image warping with scikit-image and transform.PolynomialTransform is similar, but in my opinion the two aren't duplicates. Although that user's problem is with the same function, the pixels in their transformed images have values, whereas by and large mine don't.
import cv2
from ImageConversion import ImageConversion # self-written, irrelevant
import matplotlib
matplotlib.use('macosX')
import matplotlib.pyplot as plt
from scipy.ndimage import uniform_filter
from skimage.draw import circle_perimeter
from skimage.transform import PolynomialTransform, warp
def affine_transform(self):
channel1_u8 = self.channel1.astype('uint8') # necessary for detectAndCompute
channel2_u8 = self.channel2.astype('uint8')
orb = cv2.ORB_create(100)
#kp1, des1 = orb.detectAndCompute(channel1_32, None)
#kp2, des2 = orb.detectAndCompute(channel2_32, None)
kp1, des1 = orb.detectAndCompute(channel1_u8, None)
kp2, des2 = orb.detectAndCompute(channel2_u8, None)
matcher = cv2.DescriptorMatcher_create(cv2.DESCRIPTOR_MATCHER_BRUTEFORCE_HAMMING)
matches = matcher.match(des1, des2, None)
matches = sorted(matches, key = lambda x:x.distance)
points1 = np.zeros((len(matches), 2), dtype = np.float32)
points2 = np.zeros((len(matches), 2), dtype = np.float32)
for i, match in enumerate(matches):
points1[i, :] = kp1[match.queryIdx].pt # index of descriptor in query descriptors, ie index of descriptor in channel 1 which is the image we wish to map to channel 2
points2[i, :] = kp2[match.trainIdx].pt
mat_coeff, inliers = cv2.estimateAffine2D(points1, points2) # inliers only here because estimateAffine2D returns both matrix coefficients and inliers
print(mat_coeff)
rows, cols = channel1_u8.shape
#dst = cv2.warpAffine(channel1_u8, mat_coeff, (cols, rows))
dst = cv2.warpAffine(self.channel1, mat_coeff, (cols, rows))
return mat_coeff, dst
tform = PolynomialTransform()
tform.estimate(self.channel2, dst, order = 3)
warped_1 = warp(dst, tform, mode = 'constant')
I found the error. I was trying to feed PolynomialTransform.estimate the entire image, rather than identified key points in the image.
I have to find an image during a stream of desktop. My code work, but if the image, during the stream, is resized, the program not work. How can I solve this problems?
from PIL import ImageGrab
import numpy as np
import cv2
template = cv2.imread('piccola.png') #image to find
w, h = template.shape[:-1]
while 1:
img = ImageGrab.grab(bbox=(0,0,800,600)) #bbox specifies specific region (bbox= x,y,width,height *starts top-left)
img_np = np.array(img) #this is the array obtained from conversion
#frame = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
res = cv2.matchTemplate(img_np, template, cv2.TM_CCOEFF_NORMED)
threshold = .85
loc = np.where(res >= threshold)
for pt in zip(*loc[::-1]): # Switch columns and rows
cv2.rectangle(img_np, pt, (pt[0] + h, pt[1] + w), (0, 0, 255), 2)
cv2.imshow("output", img_np)
if cv2.waitKey(25) & 0xFF == ord('q'):
cv2.destroyAllWindows()
break
Instead of using cv2.matchTemplate, you can actually extract features from your template image, i.e. extract features such as SIFT/ORB/KAZE/BRISK and match them against by extracting the same features from the grabbed image. You can set up a threshold for the matching criteria.
you can read more about feature description and matching here - https://docs.opencv.org/3.4/d5/dde/tutorial_feature_description.html
Sample code for your understanding.
import cv2
import numpy as np
img1 = cv2.imread("template.jpg", cv2.IMREAD_GRAYSCALE)
img2 = cv2.imread("image.jpg", cv2.IMREAD_GRAYSCALE)
# ORB Detector
orb = cv2.ORB_create()
kp1, des1 = orb.detectAndCompute(img1, None)
kp2, des2 = orb.detectAndCompute(img2, None)
# Brute Force Matching
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(des1, des2)
matches = sorted(matches, key = lambda x:x.distance)
#drawing the matches
matching_result = cv2.drawMatches(img1, kp1, img2, kp2, matches[:50], None, flags=2)
you can filter the matches which have the distance > 0.7 (usual threshold) and check the percentage of matches. Based on that you can decide how well it's finding the similar images.
Methods like SIFT is patented but performs well.
Methods like ORB is fastest, but not invariant to scale.
you can try methods like KAZE and AKAZE.