Generate 2D distribution avoiding loops - python-3.x

I want to construct a 2D Gassuian-like distribution on a (Nx, Ny) array of the form:
return np.exp(-0.5*((x-xp)**2 + (y-yp)**2)/SG**2)
where (x,y), in this case, would correspond to [i, j] matrix indices.
I am doing this by looping through a np.zeros((Nx,Ny)) matrix and updating its values with the defined function.
Basically, I would like to know if there is a way to generate a similar result but avoid the for loops that I am using here. My intuition tells me that np.meshgrid or zip(x, y) should do it but I have been unable to replicate it.
(I would like to avoid using the auxiliar distribution_Gp function and to be able to use directly normaldist function).
Here is my sample code of how I am using it all together:
import numpy as np
def normaldist(x, y, Nx, Ny, xp, yp, SG=1):
"""2D-mesh (Nx,Ny) with Gaussian distribution values."""
z = np.exp(-0.5*((x-xp)**2 + (y-yp)**2)/SG**2)
# /(SG*np.sqrt(np.pi*2.))) # non-normalized
return z
def distribution_Gp(Nx, Ny, xp, yp, SG=1):
"""Fill up the C0(Nx, Ny) array for the specified values and conditions."""
mask = np.zeros((Nx, Ny))
for j in range(0, Ny):
for i in range(0, Nx):
if(i <= Nx*Ny*normaldist(i, j, Nx, Ny, xp, yp, SG)):
mask[i, j] = normaldist(i, j, Nx, Ny, xp, yp, SG)
return mask
Nx = 11
Ny = Nx
arr_img = distribution_Gp(Nx, Ny, Nx//2, Ny//3, SG=2)

A matrix with values sampled from a normal distribution can be accomplished by :
np.random.normal(mean, std, (Nx, Ny))
where Nx and Ny are shapes of the output, as in your code.
If you want to apply any custom function to a matrix then this can be accomplished by:
arr = np.zeros((Nx, Ny))
f = lambda x: x + 3
result = f(arr)

By using lambda and with two arguments and meshgrid it is possible to replicate distribution_Gp.
Using lambda and avoiding using the intermediate function:
x = np.linspace(0, 10, Nx)
y = np.linspace(0, 10, Ny)
arr = np.zeros((Nx, Ny))
f = lambda x, y: normaldist(x, y, Nx//2, Ny//3, SG=2).T
X, Y = np.meshgrid(x, y)
result = f(X, Y)
which produces the same result as:
result = distribucio_de_puntsG(Nx, Ny, Nx//2, Ny//L, SG=2)

Related

Numpy Vectorization for Nested 'for' loop

I was trying to write a program which plots level set for any given function.
rmin = -5.0
rmax = 5.0
c = 4.0
x = np.arange(rmin,rmax,0.1)
y = np.arange(rmin,rmax,0.1)
x,y = np.meshgrid(x,y)
f = lambda x,y: y**2.0 - 4*x
realplots = []
for i in range(x.shape[0]):
for j in range(x.shape[1]):
if abs(f(x[i,j],y[i,j])-c)< 1e-4:
realplots.append([x[i,j],y[i,j]])`
But it being a nested for loop, is taking lot of time. Any help in vectorizing the above code/new method of plotting level set is highly appreciated.(Note: The function 'f' will be changed at the time of running.So, the vectorization must be done without considering the function's properties)
I tried vectorizing through
ans = np.where(abs(f(x,y)-c)<1e-4,np.array([x,y]),[0,0])
but it was giving me operands could not be broadcast together with shapes (100,100) (2,100,100) (2,)
I was adding [0,0] as an escape from else condition in np.where which is indeed wrong.
Since you get the values rather than the indexes, you don't really need np.where.
You can directly use the mask to index x and y, look at the "Boolean array indexing" section of the documentation.
It is straightforward:
def vectorized(x, y, c, f, threshold):
mask = np.abs(f(x, y) - c) < threshold
x, y = x[mask], y[mask]
return np.stack([x, y], axis=-1)
Your function for reference:
def op(x, y, c, f, threshold):
res = []
for i in range(x.shape[0]):
for j in range(x.shape[1]):
if abs(f(x[i, j], y[i, j]) - c) < threshold:
res.append([x[i, j], y[i, j]])
return res
Tests:
rmin, rmax = -5.0, +5.0
c = 4.0
threshold = 1e-4
x = np.arange(rmin, rmax, 0.1)
y = np.arange(rmin, rmax, 0.1)
x, y = np.meshgrid(x, y)
f = lambda x, y: y**2 - 4 * x
res_op = op(x, y, c, f, threshold)
res_vec = vectorized(x, y, c, f, threshold)
assert np.allclose(res_op, res_vec)

Numpy.linalg.eig is giving different results than numpy.linalg.eigh for Hermitian matrices

I have one hermitian matrix (specifically, a Hamiltonian). Though phase of a singe eigenvector can be arbitrary, the quantities I am calculating is physical (I reduced the code a bit keeping just the reproducible part). eig and eigh are giving very different results.
import numpy as np
import numpy.linalg as nlg
import matplotlib.pyplot as plt
def Ham(Ny, Nx, t, phi):
h = np.zeros((Ny,Ny), dtype=complex)
for ii in range(Ny-1):
h[ii+1,ii] = t
h[Ny-1,0] = t
h=h+np.transpose(np.conj(h))
u = np.zeros((Ny,Ny), dtype=complex)
for ii in range(Ny):
u[ii,ii] = -t*np.exp(-2*np.pi*1j*phi*ii)
u = u + 1e-10*np.eye(Ny)
H = np.kron(np.eye(Nx,dtype=int),h) + np.kron(np.diag(np.ones(Nx-1), 1),u) + np.kron(np.diag(np.ones(Nx-1), -1),np.transpose(np.conj(u)))
H[0:Ny,Ny*(Nx-1):Ny*Nx] = np.transpose(np.conj(u))
H[Ny*(Nx-1):Ny*Nx,0:Ny] = u
x=[]; y=[];
for jj in range (1,Nx+1):
for ii in range (1,Ny+1):
x.append(jj); y.append(ii)
x = np.asarray(x)
y = np.asarray(y)
return H, x, y
def C_num(Nx, Ny, E, t, phi):
H, x, y = Ham(Ny, Nx, t, phi)
ifhermitian = np.allclose(H, np.transpose(np.conj(H)), rtol=1e-5, atol=1e-8)
assert ifhermitian == True
Hp = H
V,wf = nlg.eigh(Hp) ##Check. eig gives different result
idx = np.argsort(np.real(V))
wf = wf[:, idx]
normmat = wf*np.conj(wf)
norm = np.sqrt(np.sum(normmat, axis=0))
wf = wf/(norm*np.sqrt(len(H)))
wf = wf[:, V<=E] ##Chose a subset of eigenvectors
V01 = wf*np.exp(1j*x)[:,None]; V12 = wf*np.exp(1j*y)[:,None]
V23 = wf*np.exp(1j*x)[:,None]; V30 = wf*np.exp(1j*y)[:,None]
wff = np.transpose(np.conj(wf))
C01 = np.dot(wff,V01); C12 = np.dot(wff,V12); C23 = np.dot(wff,V23); C30 = np.dot(wff,V30)
F = nlg.multi_dot([C01,C12,C23,C30])
ifhermitian = np.allclose(F, np.transpose(np.conj(F)), rtol=1e-5, atol=1e-8)
assert ifhermitian == True
evals, efuns = nlg.eig(F) ##Check eig gives different result
C = (1/(2*np.pi))*np.sum(np.angle(evals));
return C
C = C_num(16, 16, 0, 1, 1/8)
print(C)
Changing both nlg.eigh to nlg.eig, or even changing only the last one, giving very different results.
As I mentioned elsewhere, the eigenvalue and eigenvector are not unique.
The only thing that is true is that for each eigenvalue $A v = lambda v$, the two matrices returned by eig and eigh describe those solutions, it is natural that eig inexact but approximate results.
You can see that both the solutions will triangularize your matrix in different ways
H, x, y = Ham(16, 16, 1, 1./8)
D, V = nlg.eig(H)
Dh, Vh = nlg.eigh(H)
Then
import matplotlib.pyplot as plt
plt.figure(figsize=(14, 7))
plt.subplot(121);
plt.imshow(abs(np.conj(Vh.T) # H # Vh))
plt.title('diagonalized with eigh')
plt.subplot(122);
plt.imshow(abs(np.conj(V.T) # H # V))
plt.title('diagonalized with eig')
Plots this
That both diagonalizations were successfull, but the eigenvalues are indifferent order.
If you sort the eigenvalues you see they match
plt.plot(np.diag(np.real(np.conj(Vh.T) # H # Vh)))
plt.plot(np.diag(np.imag(np.conj(Vh.T) # H # Vh)))
plt.plot(np.sort(np.diag(np.real(np.conj(V.T) # H # V))))
plt.title('eigenvalues')
plt.legend(['real eigh', 'imag eigh', 'sorted real eig'], loc='upper left')
Since many eigenvalues are repeated, the eigenvector associated with a given eigenvalue is not unique as well, the only thing we can guarantee is that the eigenvectors for a given eigenvalue must span the same subspace.
The diagonalization test is the best in my opinion.
Is eigh always better than eig?
If you search for the eigenvalues in the lapack routines you will have many options. So it is I cannot discuss each possible implementation here. The common sense says that we can expect that the symmetric/hermitian routines to perform better, otherwise ther would be no reason to add one more routine that is more limited. But I never tested carefully the behavior of eig vs eigh.
To have an intuition compare the equation for tridiagonalization for symmetric matrices, and the equation for reduction of a general matrix to its Heisenberg form found here.

Create Image using Matplotlib imshow meshgrid and custom colors

I am trying to create an image where the x axis is the width, and y axis is the height of the image. And where each point can be given a color based on a RBG mapping. From looking at imshow() from Matplotlib I guess I need to create a meshgrid on the form (NxMx3) where 3 is a tuple or something similar with the rbg colors.
But so far I have not managed to understand how to do that. Lets say I have this example:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
x_min = 1
x_max = 5
y_min = 1
y_max = 5
Nx = 5 #number of steps for x axis
Ny = 5 #number of steps for y axis
x = np.linspace(x_min, x_max, Nx)
y = np.linspace(y_min, y_max, Ny)
#Can then create a meshgrid using this to get the x and y axis system
xx, yy = np.meshgrid(x, y)
#imagine I have some funcion that does someting based on the x and y values
def somefunc(x_value, y_value):
#do something and return rbg based on that
return x_value + y_value
res = somefunc(xx, yy)
cmap = LinearSegmentedColormap.from_list('mycmap', ['white', 'blue', 'black'])
plt.figure(dpi=100)
plt.imshow(res, cmap=cmap, interpolation='bilinear')
plt.show()
And this creates a plot, but what would I have to do if my goal was to give spesific rbg values based on x and y values inside somefunc and make the resulting numpy array into a N x M x 3 array
I tried to make the somefunc function return a tuple of rbg values to use (r, b g) but that does not seem to work
It will of course completely depend on what you want to do with the values you supply to the function. So let's assume you just want to put the x values as the red channel and the y values as the blue channel, this could look like
def somefunc(x_value, y_value):
return np.dstack((x_value/5., np.zeros_like(x_value), y_value/5.))
Complete example:
import numpy as np
import matplotlib.pyplot as plt
x_min = 1
x_max = 5
y_min = 1
y_max = 5
Nx = 5 #number of steps for x axis
Ny = 5 #number of steps for y axis
x = np.linspace(x_min, x_max, Nx)
y = np.linspace(y_min, y_max, Ny)
#Can then create a meshgrid using this to get the x and y axis system
xx, yy = np.meshgrid(x, y)
#imagine I have some funcion that does someting based on the x and y values
def somefunc(x_value, y_value):
return np.dstack((x_value/5., np.zeros_like(x_value), y_value/5.))
res = somefunc(xx, yy)
plt.figure(dpi=100)
plt.imshow(res)
plt.show()
If you already have a (more complicated) function that returns an RGB tuple you may loop over the grid to fill an empty array with the values of the function.
#If you already have some function that returns an RGB tuple
def somefunc(x_value, y_value):
if x_value > 2 and y_value < 3:
return np.array(((y_value+1)/4., (y_value+2)/5., 0.43))
elif x_value <=2:
return np.array((y_value/5., (x_value+3)/5., 0.0))
else:
return np.array((x_value/5., (y_value+5)/10., 0.89))
# you may loop over the grid to fill a new array with those values
res = np.zeros((xx.shape[0],xx.shape[1],3))
for i in range(xx.shape[0]):
for j in range(xx.shape[1]):
res[i,j,:] = somefunc(xx[i,j],yy[i,j])
plt.figure(dpi=100)
plt.imshow(res)

Neural network numerical gradient check not working with matrices using Python-numpy

I'm trying to implement a simple numerical gradient check using Python 3 and numpy to be used for neural network.
It works well for simple 1D functions but fails when applied to matrices of parameters.
My guess is that either my cost function is not calculated well for a matrix or that the way I do the numerical gradient check is wrong somehow.
See code below and thanks for your help!
import numpy as np
import random
import copy
def gradcheck_naive(f, x):
""" Gradient check for a function f.
Arguments:
f -- a function that takes a single argument (x) and outputs the
cost (fx) and its gradients grad
x -- the point (numpy array) to check the gradient at
"""
rndstate = random.getstate()
random.setstate(rndstate)
fx, grad = f(x) # Evaluate function value at original point
#fx=cost
#grad=gradient
h = 1e-4
# Iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
while not it.finished:
ix = it.multi_index #multi-index number
random.setstate(rndstate)
xp = copy.deepcopy(x)
xp[ix] += h
fxp, gradp = f(xp)
random.setstate(rndstate)
xn = copy.deepcopy(x)
xn[ix] -= h
fxn, gradn = f(xn)
numgrad = (fxp-fxn) / (2*h)
# Compare gradients
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad), abs(grad[ix]))
if reldiff > 1e-5:
print ("Gradient check failed.")
print ("First gradient error found at index %s" % str(ix))
print ("Your gradient: %f \t Numerical gradient: %f" % (
grad[ix], numgrad))
return
it.iternext() # Step to next dimension
print ("Gradient check passed!")
#sanity check with 1D function
exp_f = lambda x: (np.sum(np.exp(x)), np.exp(x))
gradcheck_naive(exp_f, np.random.randn(4,5)) #this works fine
#sanity check with matrices
#forward pass
W = np.random.randn(5,10)
x = np.random.randn(10,3)
D = W.dot(x)
#backpropagation pass
gradx = W
func_f = lambda x: (np.sum(W.dot(x)), gradx)
gradcheck_naive(func_f, np.random.randn(10,3)) #this does not work (grad check fails)
I figured it out! (my math teacher would be so proud...)
The short answer is that I was mixing up matrices dot product and element wise product.
When using an element wise product, the gradient is equal to:
W = np.array([[2,4],[3,5],[3,1]])
x = np.array([[1,7],[5,-1],[4,7]])
D = W*x #element-wise multiplication
gradx = W
func_f = lambda x: (np.sum(W*x), gradx)
gradcheck_naive(func_f, np.random.randn(3,2))
When using the dot product, the gradient becomes:
W = np.array([[2,4],[3,5]]))
x = np.array([[1,7],[5,-1],[5,1]])
D = x.dot(W)
unitary = np.array([[1,1],[1,1],[1,1]])
gradx = unitary.dot(np.transpose(W))
func_f = lambda x: (np.sum(x.dot(W)), gradx)
gradcheck_naive(func_f, np.random.randn(3,2))
I was also wondering how did the element wise product behave with matrices of not equal dimensions like below:
x = np.random.randn(10)
W = np.random.randn(3,10)
D1 = x*W
D2 = W*x
Turns out that D1=D2 (same dimension as W=3x10) and my understanding is that x is being broadcasted by numpy to be a 3x10 matrix to allow the element wise multiplication.
Conclusion: when in doubt, write it out with small matrices to figure out where the error is.

finding optimum lambda and features for polynomial regression

I am new to Data Mining/ML. I've been trying to solve a polynomial regression problem of predicting the price from given input parameters (already normalized within range[0, 1])
I'm quite close as my output is in proportion to the correct one, but it seems a bit suppressed, my algorithm is correct, just don't know how to reach to an appropriate lambda, (regularized parameter) and how to decide to what extent I should populate features as the problem says : "The prices per square foot, are (approximately) a polynomial function of the features. This polynomial always has an order less than 4".
Is there a way we could visualize data to find optimum value for these parameters, like we find optimal alpha (step size) and number of iterations by visualizing cost function in linear regression using gradient descent.
Here is my code : http://ideone.com/6ctDFh
from numpy import *
def mapFeature(X1, X2):
degree = 2
out = ones((shape(X1)[0], 1))
for i in range(1, degree+1):
for j in range(0, i+1):
term1 = X1**(i-j)
term2 = X2 ** (j)
term = (term1 * term2).reshape( shape(term1)[0], 1 )
"""note that here 'out[i]' represents mappedfeatures of X1[i], X2[i], .......... out is made to store features of one set in out[i] horizontally """
out = hstack(( out, term ))
return out
def solve():
n, m = input().split()
m = int(m)
n = int(n)
data = zeros((m, n+1))
for i in range(0, m):
ausi = input().split()
for k in range(0, n+1):
data[i, k] = float(ausi[k])
X = data[:, 0 : n]
y = data[:, n]
theta = zeros((6, 1))
X = mapFeature(X[:, 0], X[:, 1])
ausi = computeCostVect(X, y, theta)
# print(X)
print("Results usning BFGS : ")
lamda = 2
theta, cost = findMinTheta(theta, X, y, lamda)
test = [0.05, 0.54, 0.91, 0.91, 0.31, 0.76, 0.51, 0.31]
print("prediction for 0.31 , 0.76 (using BFGS) : ")
for i in range(0, 7, 2):
print(mapFeature(array([test[i]]), array([test[i+1]])).dot( theta ))
# pyplot.plot(X[:, 1], y, 'rx', markersize = 5)
# fig = pyplot.figure()
# ax = fig.add_subplot(1,1,1)
# ax.scatter(X[:, 1],X[:, 2], s=y) # Added third variable income as size of the bubble
# pyplot.show()
The current output is:
183.43478288
349.10716957
236.94627602
208.61071682
The correct output should be:
180.38
1312.07
440.13
343.72

Resources