CNN forward and backward with numpy einsum give different results to for loop implementation - conv-neural-network

I am trying to implement Convolutional Neural Network from scratch with Python numpy. I implemented forward and backward phases with numpy einsum (functions conv_forward and conv_backward).
When I compared the results of einsum conv_forward and conv_backward to reference implementations taken from the Coursera's Convolution Neural Network course (conv_forward_ref, conv_backward_ref), it is shown that the einsum versions give slight different results as compared to the reference implementation.
It is neglectable for a small amount of loops, but the difference is significant with a larger number of loops.
I was checking my implementation carefully and found no errors. I am not sure why is that, and which implementation is giving correct results.
And is there any other ways to implement the functions more efficiently (without using numpy einsum)?
Here is the code:
import numpy as np
# pad data
def pad_data(img_array, pad_size, pad_val=0):
padded_array = np.pad(img_array, ((0, 0), (pad_size, pad_size), (pad_size, pad_size), (0, 0)), 'constant', constant_values=(pad_val, pad_val))
return padded_array
"""
Reference implementation: Coursera's Convolution Neural Network course
"""
# Implement a single matrix multiplication of a slice of input and weights, bias
def conv_single_step(a_slice_prev, W, b):
s = a_slice_prev * W
Z = np.sum(s)
Z = Z + b
return Z
# conv forward: source code from Coursera's Convolution Neural Network course
def conv_forward_ref(A_prev, W, b, hparameters):
# get dimension of output of previous layer
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# get dimension of this layer's filter
(f, f, n_C_prev, n_C) = W.shape
# get values of this layer's hyperparameters
stride = hparameters["stride"]
pad = hparameters["pad"]
# compute the dimensions of the CONV output volume
n_H = int((n_H_prev - f + 2*pad) / stride) + 1
n_W = int((n_W_prev - f + 2*pad) / stride) + 1
# initialize the output volume Z with zeros
Z = np.zeros((m, n_H, n_W, n_C))
# pad the output of previous layer
A_prev_pad = pad_data(A_prev, pad)
# compute Z
for i in range(m):
a_prev_pad = A_prev_pad[i]
for h in range(n_H):
for w in range(n_W):
for c in range(n_C):
# find the corners of the current slice
vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f
# get the pixel values of the current slice of the previous layer's output
a_slice_prev = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
# convolve
Z[i,h,w,c] = conv_single_step(a_slice_prev, W[:,:,:,c], b[:,:,:,c])
# make sure the output shape is correct
assert(Z.shape == (m, n_H, n_W, n_C))
return Z
# conv backward: source code from Coursera's Convolution Neural Network course
def conv_backward_ref(dZ, A_prev, W, b, hparameters):
### START CODE HERE ###
# Retrieve dimensions from A_prev's shape
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# Retrieve dimensions from W's shape
(f, f, n_C_prev, n_C) = W.shape
# Retrieve information from "hparameters"
stride = hparameters["stride"]
pad = hparameters["pad"]
# Retrieve dimensions from dZ's shape
(m, n_H, n_W, n_C) = dZ.shape
# Initialize dA_prev, dW, db with the correct shapes
dA_prev = np.zeros((m, n_H_prev, n_W_prev, n_C_prev))
dW = np.zeros((f, f, n_C_prev, n_C))
db = np.zeros((1, 1, 1, n_C))
# Pad A_prev and dA_prev
A_prev_pad = pad_data(A_prev, pad_size=pad)
dA_prev_pad = pad_data(dA_prev, pad_size=pad)
for i in range(m): # loop over the training examples
# select ith training example from A_prev_pad and dA_prev_pad
a_prev_pad = A_prev_pad[i]
da_prev_pad = dA_prev_pad[i]
for h in range(n_H): # loop over vertical axis of the output volume
for w in range(n_W): # loop over horizontal axis of the output volume
for c in range(n_C): # loop over the channels of the output volume
# Find the corners of the current "slice"
vert_start = h * stride
vert_end = vert_start + f
horiz_start = w * stride
horiz_end = horiz_start + f
# Use the corners to define the slice from a_prev_pad
a_slice = a_prev_pad[vert_start:vert_end,horiz_start:horiz_end,:]
# Update gradients for the window and the filter's parameters using the code formulas given above
da_prev_pad[vert_start:vert_end, horiz_start:horiz_end, :] += W[:,:,:,c] * dZ[i, h, w, c]
dW[:,:,:,c] += a_slice * dZ[i, h, w, c]
db[:,:,:,c] += dZ[i, h, w, c]
# Set the ith training example's dA_prev to the unpaded da_prev_pad (Hint: use X[pad:-pad, pad:-pad, :])
#print(da_prev_pad[pad:-pad, pad:-pad, :].shape)
dA_prev[i, :, :, :] = da_prev_pad[pad:-pad, pad:-pad, :]
### END CODE HERE ###
# Making sure your output shape is correct
assert(dA_prev.shape == (m, n_H_prev, n_W_prev, n_C_prev))
return dA_prev, dW, db
"""
Numpy einsum implementation
"""
# conv forward: implemented with numpy einsum
def conv_forward(A_prev, W, b, hparameters):
# get dimension of output of previous layer
#print(A_prev.shape)
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# get dimension of this layer's filter
(f, f, n_C_prev_W, n_C) = W.shape
# make sure number of channels of A_prev equal to number of channels of W
assert(n_C_prev == n_C_prev_W)
# get values of this layer's hyperparameters and determine shape of output
stride = hparameters["stride"]
pad = hparameters["pad"]
n_H = int((n_H_prev - f + 2*pad) / stride) + 1
n_W = int((n_W_prev - f + 2*pad) / stride) + 1
# pad the output of previous layer
A_prev_pad = pad_data(A_prev, pad)
# compute Z for multiple input images and multiple filters
shape = (f, f, n_C_prev, m, n_H, n_W, 1)
strides = (A_prev_pad.strides * 2)[1:]
M = np.lib.stride_tricks.as_strided(A_prev_pad, shape=shape, strides=strides, writeable=False)
Z = np.einsum('pqrs,pqrtbmn->tbms', W, M)
Z = Z + b
assert(Z.shape == (m, n_H, n_W, n_C))
return Z
# # conv backward: implemented with numpy einsum
def conv_backward(dZ, A_prev, W, b, hparameters):
# retrieve dimensions from A_prev's shape
(m, n_H_prev, n_W_prev, n_C_prev) = A_prev.shape
# Retrieve dimensions from W's shape
(f, f, n_C_prev, n_C) = W.shape
# retrieve information from "hparameters"
stride = hparameters["stride"]
pad = hparameters["pad"]
# retrieve dimensions from dZ's shape
(m, n_H, n_W, n_C) = dZ.shape
# compute pad values to be applied to dZ, to guarantee A_prev's dimensions
pad_H = int(((n_H_prev - 1) * stride - n_H + f) / 2)
pad_W = int(((n_W_prev - 1) * stride - n_W + f) / 2)
# make sure pad_H equal pad_W cause this implementation support equal padding only
assert(pad_H == pad_W)
pad_dZ = pad_H
## compute dA_prev: inverse of forward process
# step 1: rotate W 180 degrees
# step 1: pad dZ then as_strided dZ to fxfxn_C
# step 2: dot strided dZ and 180-rotated W
# rotate W 180 degrees (= rotate 90 degrees twice) around the two first dims, anti-clockwise direction
W = np.rot90(W, 2)
# pad dZ
dZ_pad = pad_data(dZ, pad_dZ)
# compute dA_prev with strided trick and numpy einsum
shape = (f, f, n_C, m, n_H_prev, n_W_prev)
strides = (dZ_pad.strides)[1:] + (dZ_pad.strides)[0:3]
M = np.lib.stride_tricks.as_strided(dZ_pad, shape=shape, strides=strides, writeable=False)
dA_prev = np.einsum('pqrs,pqsbmn->bmnr', W, M)
assert(dA_prev.shape == A_prev.shape)
# free memory
del dZ_pad
## compute dW and db
# compute dW
A_prev_pad = pad_data(A_prev, pad)
shape_Z = (f, f, n_C_prev, m, n_H, n_W)
strides_Z = (A_prev_pad.strides)[1:] + (A_prev_pad.strides)[0:3]
M = np.lib.stride_tricks.as_strided(A_prev_pad, shape=shape_Z, strides=strides_Z, writeable=False)
dW = np.einsum('abcd,pqsabc->pqsd', dZ, M)
assert(dW.shape == W.shape)
db = np.einsum('abcd->d', dZ).reshape(1, 1, 1, n_C)
return dA_prev, dW, db
## compute dW and db
"""
Test
"""
A_prev = np.random.rand(10, 100, 100, 3) * 1000
W = np.random.rand(5, 5, 3, 10)
b = np.zeros((1, 1, 1, 10))
hparameters = {"stride": 1, "pad": 2}
Z_ref = conv_forward_ref(A_prev, W, b, hparameters)
Z = conv_forward(A_prev, W, b, hparameters)
print("sum of difference for Z: ", np.sum(Z_ref - Z))
print("is Z matched with Z_slow: ", np.allclose(Z_ref, Z))
dZ = np.random.rand(10, 100, 100, 10) * 1000
dA_prev_ref, dW_ref, db_ref = conv_backward_ref(dZ, A_prev, W, b, hparameters)
dA_prev, dW, db = conv_backward(dZ, A_prev, W, b, hparameters)
print("sum of difference for dA: ", np.sum(dA_prev_ref - dA_prev))
print("sum of difference for dW: ", np.sum(dW_ref - dW))
print("sum of difference for db: ", np.sum(db_ref - db))
print(np.allclose(dA_prev_ref, dA_prev))
print(np.allclose(dW_ref, dW))
print(np.allclose(db_ref, db))
Results:
sum of difference for Z: -4.743924364447594e-08
is Z matched with Z_ref: True
sum of difference for dA: 3.2011885195970535e-06
sum of difference for dW: 0.0
sum of difference for db: 0.0
is dA_prev matched with dA_prev_ref: True
is dW matched with dW_ref: True
is db matched with db_ref: True

Related

Speed Up a for Loop - Python

I have a code that works perfectly well but I wish to speed up the time it takes to converge. A snippet of the code is shown below:
def myfunction(x, i):
y = x + (min(0, target[i] - data[i, :]x))*data[i]/(norm(data[i])**2))
return y
rows, columns = data.shape
start = time.time()
iterate = 0
iterate_count = []
norm_count = []
res = 5
x_not = np.ones(columns)
norm_count.append(norm(x_not))
iterate_count.append(0)
while res > 1e-8:
for row in range(rows):
y = myfunction(x_not, row)
x_not = y
iterate += 1
iterate_count.append(iterate)
norm_count.append(norm(x_not))
res = abs(norm_count[-1] - norm_count[-2])
print('Converge at {} iterations'.format(iterate))
print('Duration: {:.4f} seconds'.format(time.time() - start))
I am relatively new in Python. I will appreciate any hint/assistance.
Ax=b is the problem we wish to solve. Here, 'A' is the 'data' and 'b' is the 'target'
Ugh! After spending a while on this I don't think it can be done the way you've set up your problem. In each iteration over the row, you modify x_not and then pass the updated result to get the solution for the next row. This kind of setup can't be vectorized easily. You can learn the thought process of vectorization from the failed attempt, so I'm including it in the answer. I'm also including a different iterative method to solve linear systems of equations. I've included a vectorized version -- where the solution is updated using matrix multiplication and vector addition, and a loopy version -- where the solution is updated using a for loop to demonstrate what you can expect to gain.
1. The failed attempt
Let's take a look at what you're doing here.
def myfunction(x, i):
y = x + (min(0, target[i] - data[i, :] # x)) * (data[i] / (norm(data[i])**2))
return y
You subtract
the dot product of (the ith row of data and x_not)
from the ith row of target,
limited at zero.
You multiply this result with the ith row of data divided my the norm of that row squared. Let's call this part2
Then you add this to the ith element of x_not
Now let's look at the shapes of the matrices.
data is (M, N).
target is (M, ).
x_not is (N, )
Instead of doing these operations rowwise, you can operate on the entire matrix!
1.1. Simplifying the dot product.
Instead of doing data[i, :] # x, you can do data # x_not and this gives an array with the ith element giving the dot product of the ith row with x_not. So now we have data # x_not with shape (M, )
Then, you can subtract this from the entire target array, so target - (data # x_not) has shape (M, ).
So far, we have
part1 = target - (data # x_not)
Next, if anything is greater than zero, set it to zero.
part1[part1 > 0] = 0
1.2. Finding rowwise norms.
Finally, you want to multiply this by the row of data, and divide by the square of the L2-norm of that row. To get the norm of each row of a matrix, you do
rownorms = np.linalg.norm(data, axis=1)
This is a (M, ) array, so we need to convert it to a (M, 1) array so we can divide each row. rownorms[:, None] does this. Then divide data by this.
part2 = data / (rownorms[:, None]**2)
1.3. Add to x_not
Finally, we're adding each row of part1 * part2 to the original x_not and returning the result
result = x_not + (part1 * part2).sum(axis=0)
Here's where we get stuck. In your approach, each call to myfunction() gives a value of part1 that depends on target[i], which was changed in the last call to myfunction().
2. Why vectorize?
Using numpy's inbuilt methods instead of looping allows it to offload the calculation to its C backend, so it runs faster. If your numpy is linked to a BLAS backend, you can extract even more speed by using your processor's SIMD registers
The conjugate gradient method is a simple iterative method to solve certain systems of equations. There are other more complex algorithms that can solve general systems well, but this should do for the purposes of our demo. Again, the purpose is not to have an iterative algorithm that will perfectly solve any linear system of equations, but to show what kind of speedup you can expect if you vectorize your code.
Given your system
data # x_not = target
Let's define some variables:
A = data.T # data
b = data.T # target
And we'll solve the system A # x = b
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
p = resid
while (np.abs(resid) > tolerance).any():
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
x = x + alpha * p
resid_new = resid - alpha * Ap
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
To contrast the fully vectorized approach with one that uses iterations to update the rows of x and resid_new, let's define another implementation of the CG solver that does this.
def solve_loopy(data, target, itermax = 100, tolerance = 1e-8):
A = data.T # data
b = data.T # target
rows, columns = data.shape
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
resid_new = b - A # x
p = resid
niter = 0
while (np.abs(resid) > tolerance).any() and niter < itermax:
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
for i in range(len(x)):
x[i] = x[i] + alpha * p[i]
resid_new[i] = resid[i] - alpha * Ap[i]
# resid_new = resid - alpha * A # p
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
niter += 1
return x
And our original vector method:
def solve_vect(data, target, itermax = 100, tolerance = 1e-8):
A = data.T # data
b = data.T # target
rows, columns = data.shape
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
resid_new = b - A # x
p = resid
niter = 0
while (np.abs(resid) > tolerance).any() and niter < itermax:
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
x = x + alpha * p
resid_new = resid - alpha * Ap
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
niter += 1
return x
Let's solve a simple system to see if this works first:
2x1 + x2 = -5
−x1 + x2 = -2
should give a solution of [-1, -3]
data = np.array([[ 2, 1],
[-1, 1]])
target = np.array([-5, -2])
print(solve_loopy(data, target))
print(solve_vect(data, target))
Both give the correct solution [-1, -3], yay! Now on to bigger things:
data = np.random.random((100, 100))
target = np.random.random((100, ))
Let's ensure the solution is still correct:
sol1 = solve_loopy(data, target)
np.allclose(data # sol1, target)
# Output: False
sol2 = solve_vect(data, target)
np.allclose(data # sol2, target)
# Output: False
Hmm, looks like the CG method doesn't work for badly conditioned random matrices we created. Well, at least both give the same result.
np.allclose(sol1, sol2)
# Output: True
But let's not get discouraged! We don't really care if it works perfectly, the point of this is to demonstrate how amazing vectorization is. So let's time this:
import timeit
timeit.timeit('solve_loopy(data, target)', number=10, setup='from __main__ import solve_loopy, data, target')
# Output: 0.25586539999994784
timeit.timeit('solve_vect(data, target)', number=10, setup='from __main__ import solve_vect, data, target')
# Output: 0.12008900000000722
Nice! A ~2x speedup simply by avoiding a loop while updating our solution!
For larger systems, this will be even better.
for N in [10, 50, 100, 500, 1000]:
data = np.random.random((N, N))
target = np.random.random((N, ))
t_loopy = timeit.timeit('solve_loopy(data, target)', number=10, setup='from __main__ import solve_loopy, data, target')
t_vect = timeit.timeit('solve_vect(data, target)', number=10, setup='from __main__ import solve_vect, data, target')
print(N, t_loopy, t_vect, t_loopy/t_vect)
This gives us:
N t_loopy t_vect speedup
00010 0.002823 0.002099 1.345390
00050 0.051209 0.014486 3.535048
00100 0.260348 0.114601 2.271773
00500 0.980453 0.240151 4.082644
01000 1.769959 0.508197 3.482822

How to create my own loss function in Pytorch?

I'd like to create a model that predicts parameters of a circle (coordinates of center, radius).
Input is an array of points (of arc with noise):
def generate_circle(x0, y0, r, start_angle, phi, N, sigma):
theta = np.linspace(start_angle*np.pi/180, (start_angle + phi)*np.pi/180, num=N)
x = np.array([np.random.normal(r*np.cos(t) + x0 , sigma, 1)[0] for t in theta])
y = np.array([np.random.normal(r*np.sin(t) + y0 , sigma, 1)[0] for t in theta])
return x, y
n_x = 1000
start_angle = 0
phi = 90
N = 100
sigma = 0.005
x_full = []
for i in range(n_x):
x0 = np.random.normal(0 , 10, 1)[0]
y0 = np.random.normal(0 , 10, 1)[0]
r = np.random.normal(0 , 10, 1)[0]
x, y = generate_circle(x0, y0, r, start_angle, phi, N, sigma)
x_full.append(np.array([ [x[i], y[i]] for i in range(len(x))]))
X = torch.from_numpy(np.array(x_full))
print(X.size()) # torch.Size([1000, 100, 2])
Output: [x_c, y_c, r]
As a loss function I need to use this one:
I tried to implement something like the following:
class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x
# It doesn't work, it's just an idea
def my_loss(point, params):
arr = ((point[:, 0] - params[:, 0])**2 + (point[:, 1] - params[:, 1])**2 - params[:, 2]**2)**2
loss = torch.sum(arr)
return loss
# For N pairs (x, y) model predicts parameters of circle
net = Net(n_feature=N*2, n_hidden=10, n_output=3)
optimizer = torch.optim.SGD(net.parameters(), lr=1e-4)
for t in range(1000):
prediction = net(X.view(n_x, N*2).float())
loss = my_loss(X, prediction)
print(f"loss: {loss}")
optimizer.zero_grad()
loss.backward()
optimizer.step()
So, the question is how to correctly implement my own loss function in terms of Pytorch in this case?
Or how to change the model's structure to get expected results?
You're trying to create a loss between the predicted outputs and the inputs instead of between the predicted outputs and the true outputs. To do this you need to save the true values of x0, y0, and r when you generate them.
n_x = 1000
start_angle = 0
phi = 90
N = 100
sigma = 0.005
x_full = []
targets = [] # <-- Here
for i in range(n_x):
x0 = np.random.normal(0 , 10, 1)[0]
y0 = np.random.normal(0 , 10, 1)[0]
r = np.random.normal(0 , 10, 1)[0]
targets.append(np.array([x0, y0, r])) # <-- Here
x, y = generate_circle(x0, y0, r, start_angle, phi, N, sigma)
x_full.append(np.array([ [x[i], y[i]] for i in range(len(x))]))
X = torch.from_numpy(np.array(x_full))
Y = torch.from_numpy(np.array(targets)) # <-- Here
print(X.size()) # torch.Size([1000, 100, 2])
print(Y.size()) # torch.Size([1000, 3])
Now, when you call my_loss you should use:
loss = my_loss(Y, prediction)
You are passing in all your data points every iteration of your for loop, I would split your data into smaller sections so that your model doesn't just learn to output the same values every time. e.g. you have generated 1000 points so pass in a random selection of 100 in each iteration using something like random.sample(...)
Your input numbers are pretty large which means your loss will be huge, so generate inputs between 0 and 1 and then if you need the value to be between 0 and 10 you can just multiply by 10.

Solving coordinate state estimation using particle filter in python

I have a pickle file which contains 300 coordinates of my subject's location in time. There are some missing values in the middle of it for which I am using a particle filter to estimate those missing values. At the end, I am getting some predictions (not completely accurate) but in a bit drifted form.
So the position of my subject is, in fact, the position of my subject's nose. I take a total of 300 frames and each frame consists of a coordinate for nose in it. There are some frames which have the value of (0,0) meaning the values are missing. So in order to find them, I am implementing the particle filter. I am a newbie for particle filter so there are possibilities that I may have messed up the code. The results that I get, gives me the prediction for 300 frames with drifted values. You can get a clear idea form the image.
My measurement value is distance from four landmarks and I provide orientation angle to next point and distance to next point as additional measurements.
from filterpy.monte_carlo import systematic_resample
import numpy as np
import matplotlib.pyplot as plt
from numpy.linalg import norm
from numpy.random import randn
import scipy.stats
from numpy.random import uniform
import pickle
from math import *
#####################################################
def create_uniform_particles(x_range, y_range, hdg_range, N):
particles = np.empty((N, 3))
particles[:, 0] = uniform(x_range[0], x_range[1], size=N)
particles[:, 1] = uniform(y_range[0], y_range[1], size=N)
particles[:, 2] = uniform(hdg_range[0], hdg_range[1], size=N)
particles[:, 2] %= 2 * np.pi
return particles
def create_gaussian_particles(mean, std, N):
particles = np.empty((N, 3))
particles[:, 0] = mean[0] + (randn(N) * std[0])
particles[:, 1] = mean[1] + (randn(N) * std[1])
particles[:, 2] = mean[2] + (randn(N) * std[2])
particles[:, 2] %= 2 * np.pi
return particles
#####################################################
def predict(particles, u, std):
# move according to control input u (heading change, velocity)
#with noise Q (std heading change, std velocity)`
N = len(particles)
# update heading
#particles[:, 2] += u[0] + (randn(N) * std[0])
#particles[:, 2] %= 2 * np.pi
#u[0] += (randn(N) * std[0])
u[0] %= 2 * np.pi
# move in the (noisy) commanded direction
dist = (u[1]) #+ (randn(N) * std[1])
particles[:, 0] += np.cos(u[0]) * dist
particles[:, 1] += np.sin(u[0]) * dist
#####################################################
def update(particles, weights, z, R, landmarks):
for i, landmark in enumerate(landmarks):
distance = np.linalg.norm(particles[:, 0:2] - landmark, axis=1)
weights *= scipy.stats.norm(distance, R).pdf(z[i])
weights += 1.e-300 # avoid round-off to zero
weights /= sum(weights) # normalize
#####################################################
def estimate(particles, weights):
#returns mean and variance of the weighted particles
pos = particles[:, 0:2]
mean = np.average(pos, weights=weights, axis=0)
var = np.average((pos - mean)**2, weights=weights, axis=0)
return mean, var
#####################################################
def simple_resample(particles, weights):
N = len(particles)
cumulative_sum = np.cumsum(weights)
cumulative_sum[-1] = 1. # avoid round-off error
indexes = np.searchsorted(cumulative_sum, random(N))
# resample according to indexes
particles[:] = particles[indexes]
weights.fill(1.0 / N)
#####################################################
def neff(weights):
return 1. / np.sum(np.square(weights))
#####################################################
def resample_from_index(particles, weights, indexes):
particles[:] = particles[indexes]
weights[:] = weights[indexes]
weights.fill(1.0 / len(weights))
#####################################################
def read_pickle(pkl_file, f,j):
with open(pkl_file, 'rb') as res:
dets = pickle.load(res, encoding = 'latin1')
all_keyps = dets['all_keyps']
keyps_t = np.array(all_keyps[1])
keyps = np.zeros((keyps_t.shape[0], 4, 17))
for k in range(keyps.shape[0]):
if keyps_t[k]!=[]:
keyps[k] = keyps_t[k][0]
keyps = keyps[:,:2,:]
for i in range(keyps.shape[0]):
keyps[i][0] = keyps[i][0]/480*256
keyps[i][1] = keyps[i][1]/640*256
x0=keyps[f][0][j]
y0=keyps[f][1][j]
x1=keyps[f+1][0][j]
y1=keyps[f+1][1][j]
cord = np.array([x0,y0])
orientation = atan2((y1 - y0),(x1 - x0))
dist= sqrt((x1-x0) ** 2 + (y1-y0) ** 2)
u = np.array([orientation,dist])
return (cord, u)
#####################################################
def run_pf1(N, iters=298, sensor_std_err=.1,
do_plot=True, plot_particles=False,
xlim=(-256, 256), ylim=(-256, 256),
initial_x=None):
landmarks = np.array([[0, 0], [0, 256], [256,0], [256,256]])
NL = len(landmarks)
plt.figure()
# create particles and weights
if initial_x is not None:
particles = create_gaussian_particles(
mean=initial_x, std=(5, 5, np.pi/4), N=N)
else:
particles = create_uniform_particles((0,20), (0,20), (0, 6.28), N)
weights = np.ones(N) / N
if plot_particles:
alpha = .20
if N > 5000:
alpha *= np.sqrt(5000)/np.sqrt(N)
plt.scatter(particles[:, 0], particles[:, 1],
alpha=alpha, color='g')
xs = []
#robot_pos, u = read_pickle('.pkl',1,0)
for x in range(iters):
robot_pos, uv = read_pickle('.pkl',x,0)
print("orignal: ", robot_pos,)
# distance from robot to each landmark
zs = (norm(landmarks - robot_pos, axis=1) +
(randn(NL) * sensor_std_err))
# move diagonally forward to (x+1, x+1)
predict(particles, u=uv, std=(0, .0))
# incorporate measurements
update(particles, weights, z=zs, R=sensor_std_err,
landmarks=landmarks)
# resample if too few effective particles
if neff(weights) < N/2:
indexes = systematic_resample(weights)
resample_from_index(particles, weights, indexes)
assert np.allclose(weights, 1/N)
mu, var = estimate(particles, weights)
#mu +=(120,10)
xs.append(mu)
print("expected: ",mu)
if plot_particles:
plt.scatter(particles[:, 0], particles[:, 1],
color='k', marker=',', s=1)
p1 = plt.scatter(robot_pos[0], robot_pos[1], marker='+',
color='k', s=180, lw=3)
p2 = plt.scatter(mu[0], mu[1], marker='s', color='r')
print(p2)
xs = np.array(xs)
#plt.plot(xs[:, 0], xs[:, 1])
plt.legend([p1, p2], ['Actual', 'PF'], loc=4, numpoints=1)
plt.xlim(*xlim)
plt.ylim(*ylim)
print('final position error, variance:\n\t', mu - np.array([iters, iters]), var)
plt.show()
return(p2)
###############################
run_pf1(N=5000)
I expect a set of 300 coordinate values (estimated) as a result of the particle filter so I can replace my missing values in original files with this predicted ones.

why the points I reconstructed are not in a same coordinate?

I use binocular camera to reconstruct points in 3d from 2d picture,I took many pictures by binocular camera and reconstructed points(feature points have been found already),but I found that the 3d models I reconstructed are not in a same coordinate.
I don't know the extrinsic params(by the way,I wonder how to get this params,because I got the intrinsic matrix from calibration already)
so, I compute the E matrix(8 points algorithm) and assume project matrix P1 of camera1 is P[I|0] and calculate P2 by P1 and E
the last step is to calculate the points in 3d by triangulation.
Code:
def compute_normalized_image_to_image_matrix(p1, p2, compute_essential=False):
""" Computes the fundamental or essential matrix from corresponding points
using the normalized 8 point algorithm.
:input p1, p2: corresponding points with shape 3 x n
:returns: fundamental or essential matrix with shape 3 x 3
"""
n = p1.shape[1]
if p2.shape[1] != n:
raise ValueError('Number of points do not match.')
# preprocess image coordinates
p1n, T1 = scale_and_translate_points(p1)
p2n, T2 = scale_and_translate_points(p2)
# compute F or E with the coordinates
F = compute_image_to_image_matrix(p1n, p2n, compute_essential)
# reverse preprocessing of coordinates
# We know that P1' E P2 = 0
F = np.dot(T1.T, np.dot(F, T2))
return F / F[2, 2]
def compute_fundamental_normalized(p1, p2):
return compute_normalized_image_to_image_matrix(p1, p2)
def compute_essential_normalized(p1, p2):
return compute_normalized_image_to_image_matrix(p1, p2, compute_essential=True)
def scale_and_translate_points(points):
""" Scale and translate image points so that centroid of the points
are at the origin and avg distance to the origin is equal to sqrt(2).
:param points: array of homogenous point (3 x n)
:returns: array of same input shape and its normalization matrix
"""
x = points[0]
y = points[1]
center = points.mean(axis=1) # mean of each row
cx = x - center[0] # center the points
cy = y - center[1]
dist = np.sqrt(np.power(cx, 2) + np.power(cy, 2))
scale = np.sqrt(2) / dist.mean()
norm3d = np.array([
[scale, 0, -scale * center[0]],
[0, scale, -scale * center[1]],
[0, 0, 1]
])
return np.dot(norm3d, points), norm3d
def compute_P_from_fundamental(F):
""" Compute the second camera matrix (assuming P1 = [I 0])
from a fundamental matrix.
"""
e = compute_epipole(F.T) # left epipole
Te = skew(e)
return np.vstack((np.dot(Te, F.T).T, e)).T
def compute_P_from_essential(E):
""" Compute the second camera matrix (assuming P1 = [I 0])
from an essential matrix. E = [t]R
:returns: list of 4 possible camera matrices.
"""
U, S, V = np.linalg.svd(E)
# Ensure rotation matrix are right-handed with positive determinant
if np.linalg.det(np.dot(U, V)) < 0:
V = -V
# create 4 possible camera matrices (Hartley p 258)
W = np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
P2s = [np.vstack((np.dot(U, np.dot(W, V)).T, U[:, 2])).T,
np.vstack((np.dot(U, np.dot(W, V)).T, -U[:, 2])).T,
np.vstack((np.dot(U, np.dot(W.T, V)).T, U[:, 2])).T,
np.vstack((np.dot(U, np.dot(W.T, V)).T, -U[:, 2])).T]
return P2s
def linear_triangulation(p1, p2, m1, m2):
"""
Linear triangulation (Hartley ch 12.2 pg 312) to find the 3D point X
where p1 = m1 * X and p2 = m2 * X. Solve AX = 0.
:param p1, p2: 2D points in homo. or catesian coordinates. Shape (2 x n)
:param m1, m2: Camera matrices associated with p1 and p2. Shape (3 x 4)
:returns: 4 x n homogenous 3d triangulated points
"""
num_points = p1.shape[1]
res = np.ones((4, num_points))
for i in range(num_points):
A = np.asarray([
(p1[0, i] * m1[2, :] - m1[0, :]),
(p1[1, i] * m1[2, :] - m1[1, :]),
(p2[0, i] * m2[2, :] - m2[0, :]),
(p2[1, i] * m2[2, :] - m2[1, :])
])
_, _, V = np.linalg.svd(A)
X = V[-1, :]
res[:, i] = X / X[3]
return res
so how can I solve this? I want all my reconstructed points to be in a same coordinate system,could you please tell me?thank you very much!

finding optimum lambda and features for polynomial regression

I am new to Data Mining/ML. I've been trying to solve a polynomial regression problem of predicting the price from given input parameters (already normalized within range[0, 1])
I'm quite close as my output is in proportion to the correct one, but it seems a bit suppressed, my algorithm is correct, just don't know how to reach to an appropriate lambda, (regularized parameter) and how to decide to what extent I should populate features as the problem says : "The prices per square foot, are (approximately) a polynomial function of the features. This polynomial always has an order less than 4".
Is there a way we could visualize data to find optimum value for these parameters, like we find optimal alpha (step size) and number of iterations by visualizing cost function in linear regression using gradient descent.
Here is my code : http://ideone.com/6ctDFh
from numpy import *
def mapFeature(X1, X2):
degree = 2
out = ones((shape(X1)[0], 1))
for i in range(1, degree+1):
for j in range(0, i+1):
term1 = X1**(i-j)
term2 = X2 ** (j)
term = (term1 * term2).reshape( shape(term1)[0], 1 )
"""note that here 'out[i]' represents mappedfeatures of X1[i], X2[i], .......... out is made to store features of one set in out[i] horizontally """
out = hstack(( out, term ))
return out
def solve():
n, m = input().split()
m = int(m)
n = int(n)
data = zeros((m, n+1))
for i in range(0, m):
ausi = input().split()
for k in range(0, n+1):
data[i, k] = float(ausi[k])
X = data[:, 0 : n]
y = data[:, n]
theta = zeros((6, 1))
X = mapFeature(X[:, 0], X[:, 1])
ausi = computeCostVect(X, y, theta)
# print(X)
print("Results usning BFGS : ")
lamda = 2
theta, cost = findMinTheta(theta, X, y, lamda)
test = [0.05, 0.54, 0.91, 0.91, 0.31, 0.76, 0.51, 0.31]
print("prediction for 0.31 , 0.76 (using BFGS) : ")
for i in range(0, 7, 2):
print(mapFeature(array([test[i]]), array([test[i+1]])).dot( theta ))
# pyplot.plot(X[:, 1], y, 'rx', markersize = 5)
# fig = pyplot.figure()
# ax = fig.add_subplot(1,1,1)
# ax.scatter(X[:, 1],X[:, 2], s=y) # Added third variable income as size of the bubble
# pyplot.show()
The current output is:
183.43478288
349.10716957
236.94627602
208.61071682
The correct output should be:
180.38
1312.07
440.13
343.72

Resources