I have coded the laplacien function for a non-regular mesh (created with the scipy.spatial.Delaunay function).
I have not errors but the results are not correct : the eigenvectors are correct but the eigenvalues are too high (in absolute value).
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial
def rect_drum(L,H,U):
vals = []
val = 0
k = 1
l = 1
while val >= -U:
while val >= -U:
val = -np.pi**2*((k/L)**2+(l/H)**2)
if val >= -U:
vals.append(val)
l += 1
l = 1
k += 1
val = -np.pi**2*((k/L)**2+(l/H)**2)
return np.array(vals)
def count_vp(tab,U):
#count the n eigenvalues greater than equal to -U in the array tab
return tab[tab>=-U]
def in_curve(f,fargs,shape,a):
points = [] # the points inside the curve
for j in range(shape[0]):
for i in range(shape[1]):
if f(i*a,j*a,*fargs) < 0:
points.append([i*a,j*a])
return np.array(points)
def triang(points,a,f,fargs,bord):
tri_points = points.copy()
tri_points[:,1] *= np.sqrt(3)
tri_points2 = np.vstack((points,bord))
tri_points2[:,1] *= np.sqrt(3)
tri_points2[:,0] += a/2
tri_points2[:,1] += np.sqrt(3)/2*a
fin = np.vstack((tri_points,tri_points2))
i = 0
eps = 0.01
while i < len(fin):
if f(fin[i,0]+eps,fin[i,1]+eps,*fargs) > 0:
fin = np.delete(fin,i,0)
i -= 1
i += 1
return np.vstack((fin,bord)),len(fin),len(bord)
def tri_ang(points,ind,p0):
# sort the points in trigonometric order
vec=np.arctan2((points-p0)[:,1],(points-p0)[:,0])
values = []
dtype = [('val',float),('n',int)]
for i in range(len(vec)):
values.append((vec[i],i))
values = np.sort(np.array(values,dtype),order='val')
new_points = []
new_ind = []
for tup in values:
new_points.append(points[tup[1]])
new_ind.append(ind[tup[1]])
return np.array(new_points),np.array(new_ind)
def M(points,tri,Nint):
indptr,ind = tri.vertex_neighbor_vertices
W = np.zeros((Nint,Nint)) # cotangents matrix
A = np.zeros((Nint,1)) # surfaces vertex array for each point i (A[i])
for i in range(Nint):
tot = 0
nhb_ind = ind[indptr[i]:indptr[i+1]] # indices of the points close to the point of index k
nhb = points[nhb_ind] # their coordinates
nhb,nhb_ind = tri_ang(nhb,nhb_ind,points[i]) #the coordinates (nhb) and (nhb_ind) of each neighbor of i
for j in range(len(nhb_ind)):
vec = nhb[j]-points[i] # a vector connecting the point to his neighbor of index 0
vec_av = nhb[j-1]-points[i] # another vector but with the Vosin from before
if j+1 >= len(nhb_ind):
k = 0
else:
k = j+1
vec_ap = nhb[k]-points[i] # another vector but with the next neighbor
# another vector but with the next neighbor
A[i] += 0.5/3*np.linalg.norm(np.cross(vec,vec_av))
if nhb_ind[j] < Nint:
# we use the vector and scalar product to calculate the cotangents: A.B/||AxB||
cotan_alpha = np.dot(vec_av,vec_av-vec)/np.linalg.norm(np.cross(vec_av,vec_av-vec))
cotan_beta = np.dot(vec_ap,vec_ap-vec)/np.linalg.norm(np.cross(vec_ap,vec_ap-vec))
# Wij value :
W[i,nhb_ind[j]] = -0.5*(cotan_alpha+cotan_beta)
tot += cotan_alpha+cotan_beta
W[i,i] = -0.5*tot # diagonal values
return (1/A)*W
def rect(x,y,L,H,x0=0,y0=0):
if 0<x-x0<L and 0<y-y0<H:
return -1
else:
return 1
def rect_rim(L,H,a,x0=0,y0=0):
tab1 = np.arange(x0,L+x0,a)[:,np.newaxis]
h = np.hstack((tab1,H*np.ones((len(tab1),1))+y0))
b = np.hstack((tab1,np.zeros((len(tab1),1))+y0))
tab2 = np.arange(y0+a,H+y0,a)[:,np.newaxis]
g = np.hstack((np.zeros((len(tab2),1))+x0,tab2))
d = np.hstack((L*np.ones((len(tab2),1))+x0,tab2))
hp = np.array([[L+x0,H+y0]])
bp = np.array([[L+x0,0]])
return np.vstack((h,b,g,d,hp,bp))
# sample with a square 1*1
L = 1
H = 1
dl = 0.05
sol = in_curve(rect,[L,H],(100,100),dl)
sol_tri,Nint,Nbord = triang(sol,dl,rect,[L,H],rect_rim(L,H,dl))
# plt.plot(sol_tri[:,0],sol_tri[:,1],linestyle="",marker="+",label="tri")
# plt.plot(sol[:,0],sol[:,1],linestyle="",marker="x")
# plt.legend()
# plt.show()
# triangulation
tri = scipy.spatial.Delaunay(sol_tri)
# plt.triplot(sol_tri[:,0],sol_tri[:,1],tri.simplices)
# plt.show()
M = M(sol_tri,tri,Nint)
valp,vecp = np.linalg.eig(M) # eigenvalues and eigenvectors
vecp = np.real(vecp)
# comparison with the exact solution:
T = 1000
U = np.arange(0,T,1)
NUsim = np.array([len(count_vp(valp,u)) for u in U])
NU = np.array([len(rect_drum(L,H,u)) for u in U])
plt.plot(U,NUsim,label='simulation')
plt.plot(U,NU,label='exacts')
plt.legend()
plt.show()
# 3D plot of an eigenvector
vecp_tot = np.vstack((vecp,np.zeros((Nbord,Nint))))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(sol_tri[:,0],sol_tri[:,1],vecp_tot[:,0],triangles=tri.simplices)
plt.show()
The laplacian is the function named "M".
The "in_curve function" return the points inside a curve defined by f(x,y,*fargs) < 0 (a square in the sample).
The "triang" function return points with added points (triangle meshs). The fonction uses an another function for the rim of the curve (for most precision), in the sample it is the "rect_rim" function.
I used the formula given at https://en.wikipedia.org/wiki/Discrete_Laplace_operator ("mesh laplacians").
I have solve my problem : it's a sign and a rim problems.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial
def rect_drum(L,H,U):
vals = []
val = 0
k = 1
l = 1
while val >= -U:
while val >= -U:
val = -np.pi**2*((k/L)**2+(l/H)**2)
if val >= -U:
vals.append(val)
l += 1
l = 1
k += 1
val = -np.pi**2*((k/L)**2+(l/H)**2)
return np.array(vals)
def count_vp(tab,U):
#count the n eigenvalues greater than equal to -U in the array tab
return tab[tab>=-U]
def in_curve(f,fargs,shape,a):
points = [] # the points inside the curve
for j in range(shape[0]):
for i in range(shape[1]):
if f(i*a,j*a,*fargs) < 0:
points.append([i*a,j*a])
return np.array(points)
def triang(points,a,f,fargs,bord):
tri_points = points.copy()
tri_points[:,1] *= np.sqrt(3)
tri_points2 = np.vstack((points,bord))
tri_points2[:,1] *= np.sqrt(3)
tri_points2[:,0] += a/2
tri_points2[:,1] += np.sqrt(3)/2*a
fin = np.vstack((tri_points,tri_points2))
i = 0
eps = 0.01
while i < len(fin):
if f(fin[i,0]+eps,fin[i,1]+eps,*fargs) > 0:
fin = np.delete(fin,i,0)
i -= 1
i += 1
return np.vstack((fin,bord)),len(fin),len(bord)
def tri_ang(points,ind,p0):
# sort the points in trigonometric order
vec=np.arctan2((points-p0)[:,1],(points-p0)[:,0])
values = []
dtype = [('val',float),('n',int)]
for i in range(len(vec)):
values.append((vec[i],i))
values = np.sort(np.array(values,dtype),order='val')
new_points = []
new_ind = []
for tup in values:
new_points.append(points[tup[1]])
new_ind.append(ind[tup[1]])
return np.array(new_points),np.array(new_ind)
def Laplacian(points,tri,Nint):
indptr,ind = tri.vertex_neighbor_vertices
W = np.zeros((Nint,Nint)) # cotangents matrix
A = np.zeros((Nint,1)) # surfacesvertex aray of point i (A[i])
for i in range(Nint):
tot = 0
nhb_ind = ind[indptr[i]:indptr[i+1]] # indices of the points close to the point of index k
nhb = points[nhb_ind] # their coordinates
nhb,nhb_ind = tri_ang(nhb,nhb_ind,points[i]) #the coordinates (nhb) and (nhb_ind) of each neighbor of i
for j in range(len(nhb_ind)):
vec = nhb[j]-points[i] # a vector connecting the point to his neighbor of index 0
vec_av = nhb[j-1]-points[i] # another vector but with the Vosin from before
if j+1 >= len(nhb_ind):
k = 0
else:
k = j+1
vec_ap = nhb[k]-points[i] # another vector but with the next neighbor
# we use the cross product to calculate the areas of the triangles: ||AxB||/2:
A[i] += 0.5/3*np.linalg.norm(np.cross(vec,vec_av))
# we use the cross product and scalar product to calculate the cotangents: A.B/||AxB||
cotan_alpha = np.dot(vec_av,vec_av-vec)/np.linalg.norm(np.cross(vec_av,vec_av-vec))
cotan_beta = np.dot(vec_ap,vec_ap-vec)/np.linalg.norm(np.cross(vec_ap,vec_ap-vec))
tot += cotan_alpha+cotan_beta
if nhb_ind[j] < Nint:
W[i,nhb_ind[j]] = 0.5*(cotan_alpha+cotan_beta)
W[i,i] = -0.5*tot # diagonal values
return (1/A)*W
def rect(x,y,L,H,x0=0,y0=0):
if 0<x-x0<L and 0<y-y0<H:
return -1
else:
return 1
def rect_rim(L,H,a,x0=0,y0=0):
tab1 = np.arange(x0,L+x0,a)[:,np.newaxis]
h = np.hstack((tab1,H*np.ones((len(tab1),1))+y0))
b = np.hstack((tab1,np.zeros((len(tab1),1))+y0))
tab2 = np.arange(y0+a,H+y0,a)[:,np.newaxis]
g = np.hstack((np.zeros((len(tab2),1))+x0,tab2))
d = np.hstack((L*np.ones((len(tab2),1))+x0,tab2))
hp = np.array([[L+x0,H+y0]])
bp = np.array([[L+x0,0]])
return np.vstack((h,b,g,d,hp,bp))
# sample with a square 1*1
L = 1
H = 1
dl = 0.04
sol = in_curve(rect,[L,H],(100,100),dl)
sol_tri,Nint,Nbord = triang(sol,dl,rect,[L,H],rect_rim(L,H,dl))
# triangulation
tri = scipy.spatial.Delaunay(sol_tri)
M = Laplacian(sol_tri,tri,Nint)
valp,vecp = np.linalg.eig(M) # eigenvalues and eigenvectors
vecp = np.real(vecp)
# comparison with the exact solution:
T = 1000
U = np.arange(0,T,1)
NUsim = np.array([len(count_vp(valp,u)) for u in U])
NU = np.array([len(rect_drum(L,H,u)) for u in U])
plt.plot(U,NUsim,label='simulation')
plt.plot(U,NU,label='exacts')
plt.legend()
plt.show()
# 3D plot of an eigenvector
mode = 0 # change this for an another mode
vecp_tot = np.vstack((vecp,np.zeros((Nbord,Nint))))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(sol_tri[:,0],sol_tri[:,1],vecp_tot[:,mode],triangles=tri.simplices)
plt.show()
Notes :
1- The hight eigenvalues are false : it's an effect of discretisation.
2- If dl is too small, we have false eigenvectors and eigenvalues (at the top of valp and firsts vectors of vecp), it's probably due to the quality of the meshing.
Related
I'm writing a script that tracks the shifts of a sample by estimating the displacement of an ensemble of particles. The first implementation, in Python, works alright, but it takes too long for a large amount of samples. To combat this, I tried rewriting the method in Cython, but as this was my first time ever using it, I can't seem to get any performance increases. I know 3D FFTs exist and are often faster than looped 2D FFTs, but for this instance, they take too much memory and or slower than for-loops.
Python function:
import numpy as np
from scipy.fft import fftshift
import pyfftw
def python_corr(frame_a, frame_b):
DTYPEf = 'float32'
DTYPEc = 'complex64'
k = frame_a.shape[0]
m = frame_a.shape[1] # size y of 2d sample
n = frame_a.shape[2] # size x of 2d sample
fs = [m,n] # sample shape
bs = [m,n//2+1] # rfft sample shape
corr = np.zeros([k,m,n], DTYPEf) # out
fft_forward = pyfftw.builders.rfft2(
pyfftw.empty_aligned(fs, dtype = DTYPEf),
axes = [-2,-1],
)
fft_backward = pyfftw.builders.irfft2(
pyfftw.empty_aligned(bs, dtype = DTYPEc),
axes = [-2,-1],
)
for ind in range(k): # looping over 2D samples
window_a = frame_a[ind,:,:]
window_b = frame_b[ind,:,:]
corr[ind,:,:] = fftshift( # cross correlation via FFT algorithm
np.real(fft_backward(
np.conj(fft_forward(window_a))*fft_forward(window_b)
)),
axes = [-2,-1]
)
return corr
Cython function:
import numpy as np
from scipy.fft import fftshift
import pyfftw
cimport numpy as np
np.import_array()
cimport cython
DTYPEf = np.float32
ctypedef np.float32_t DTYPEf_t
DTYPEc = np.complex64
ctypedef np.complex64_t DTYPEc_t
#cython.boundscheck(False)
#cython.nonecheck(False)
def cython_corr(
np.ndarray[DTYPEf_t, ndim = 3] frame_a,
np.ndarray[DTYPEf_t, ndim = 3] frame_b,
):
cdef int ind, k, m, n
k = frame_a.shape[0]
m = frame_a.shape[1] # size y of sample
n = frame_a.shape[2] # size x of sample
cdef DTYPEf_t[:,:] window_a = pyfftw.empty_aligned([m,n], dtype = DTYPEf) # sample a
window_a[:,:] = 0.
cdef DTYPEf_t[:,:] window_b = pyfftw.empty_aligned([m,n], dtype = DTYPEf) # sample b
window_b[:,:] = 0.
cdef DTYPEf_t[:,:] corr = pyfftw.empty_aligned([m,n], dtype = DTYPEf) # cross-corr matrix
corr[:,:] = 0.
cdef DTYPEf_t[:,:,:] out = pyfftw.empty_aligned([k,m,n], dtype = DTYPEf) # out
out[:,:] = 0.
cdef object fft_forward
cdef object fft_backward
cdef DTYPEc_t[:,:] f2a = pyfftw.empty_aligned([m, n//2+1], dtype = DTYPEc) # rfft out of sample a
f2a[:,:] = 0. + 0.j
cdef DTYPEc_t[:,:] f2b = pyfftw.empty_aligned([m, n//2+1], dtype = DTYPEc) # rfft out of sample b
f2b[:,:] = 0. + 0.j
cdef DTYPEc_t[:,:] r = pyfftw.empty_aligned([m, n//2+1], dtype = DTYPEc) # power spectrum of sample a and b
r[:,:] = 0. + 0.j
fft_forward = pyfftw.builders.rfft2(
pyfftw.empty_aligned([m,n], dtype = DTYPEf),
axes = [0,1],
)
fft_backward = pyfftw.builders.irfft2(
pyfftw.empty_aligned([m,n//2+1], dtype = DTYPEc),
axes = [0,1],
)
for ind in range(k):
window_a = frame_a[ind,:,:]
window_b = frame_b[ind,:,:]
r = np.conj(fft_forward(window_a))*fft_forward(window_b) # power spectrum of sample a and b
corr = fft_backward(r).real # cross correlation
corr = fftshift(corr, axes = [0,1]) # shift Q1 --> Q3, Q2 --> Q4
# the fftshift could be moved out of the loop, but lets use that as a last resort :)
out[ind,:,:] = corr
return out
Test for methods:
import time
aa = bb = np.empty([14000, 24,24]).astype('float32') # a small test with 14000 24x24px samples
print(f'Number of samples: {aa.shape[0]}')
start = time.time()
corr = python_corr(aa, bb)
print(f'Time for Python: {time.time() - start}')
del corr
start = time.time()
corr = cython_corr(aa, bb)
print(f'Time for Cython: {time.time() - start}')
del corr
I have a list D containing 50 sub-lists. The number of elements in these sub-lists are decreasing. I visualize the list D by
for i, array in enumerate(D):
plt.scatter([i]*len(array), array)
I have 50 functions taking values from St_Sp, and Y is a list containing 50 elements, each of them is the output of each function. I visualize these functions
fig, ax = plt.subplots()
for i in range(len(Y)):
ax.plot(St_Sp, Y[i])
I found that too many colors are not easy to eyes. I would like to ask how to alternate color of the graphs between blue and white? I mean the color of the functions and dots in D are white > blue > white > blue ...
Could you please elaborate on how to do so?
##### Import packages
import numpy as np
import scipy.linalg as la
import time
import matplotlib
import matplotlib.pyplot as plt
##### Initial conditions
N = 100
lamda = 7
mu = 2
a = np.exp(-0.05)
r = - np.log(a).copy()
St_Sp = np.arange(- N, N + 1)
Card = St_Sp.shape[0]
##### Define infintesimal generator
def LL(x, y):
if x == N or x == - N: re = 0
elif x - y == - 1: re = lamda
elif x - y == 1: re = mu
elif x - y == 0: re = - (mu + lamda)
else: re = 0
return re
def L(x):
return - LL(x, x)
##### Define function Phi
def Phi(x):
return max(x, 0)
Phi = np.vectorize(Phi)
##### Define vector b
b = np.array(Phi(St_Sp))
##### Define function Psi
def Psi(x):
return L(x) / (L(x) + r)
Psi = np.vectorize(Psi)
##### Generate a Boolean vector whose all elements are False
d = np.array([0] * Card).astype(bool)
##### Define matrix A
A = np.zeros((Card, Card))
for i in range(Card):
for j in range(Card):
if (i != j) & (L(St_Sp[i]) != 0):
A[i, j] = LL(St_Sp[i], St_Sp[j]) / L(St_Sp[i])
elif (i != j) & (L(St_Sp[i]) == 0):
A[i, j] = 0
elif (i == j) & (Psi(St_Sp[i]) != 0):
A[i, j] = - 1 / Psi(St_Sp[i])
else: A[i, j] = 1
##### Row names of A
rows = np.arange(0, Card)
##### Define matrix B
B = np.zeros((Card, Card))
for i in range(Card):
for j in range(Card):
if i != j:
B[i, j] = LL(St_Sp[i], St_Sp[j])
else: B[i, j] = LL(St_Sp[i], St_Sp[j]) - r
start = time.time()
##### Generate I_0
I = [np.array([1] * Card).astype(bool), d.copy()]
Z = np.array(b.copy())
Z = Z.astype(float)
D = [St_Sp]
index0 = np.matmul(B, Z) <= 0
index1 = ~ index0
Y = [b.copy()]
##### Iterations
for i in range(1, Card):
I = [I[0] & index0, I[1] | index1]
Z = np.array(b.copy())
Z = Z.astype(float)
A1 = A[np.ix_(rows[I[1]], rows[I[1]])]
A2 = A[np.ix_(rows[I[1]], rows[I[0]])]
Z[I[1]] = la.solve(A1, - np.matmul(A2, Z[I[0]]))
Y = np.concatenate((Y, [Z]))
D.append(St_Sp[I[0]])
index = np.matmul(B[I[0]], Z) <= 0
index0, index1 = d.copy(), d.copy()
index0[I[0]], index1[I[0]] = index, ~ index
if (I[0] == index0).all() == True: break
for i, array in enumerate(D):
plt.scatter([i]*len(array), array)
fig, ax = plt.subplots()
for i in range(len(Y)):
ax.plot(St_Sp, Y[i])
The easiest approach is to set a custom color cycler. Instead of cycling between the 10 typical colors, the default colors for the plots will cycle through the given colors.
from cycler import cycler
custom_cycler = cycler(color=['white', 'blue'])
plt.gca().set_prop_cycle(custom_cycler)
for i, array in enumerate(D[:-1]):
plt.scatter([i] * len(array), array)
plt.scatter([len(D) - 1] * len(D[-1]), D[-1], color='crimson')
fig, ax = plt.subplots()
ax.set_prop_cycle(custom_cycler)
for i in range(len(Y) - 1):
ax.plot(St_Sp, Y[i])
ax.plot(St_Sp, Y[len(Y) - 1], color='crimson')
plt.show()
I am trying to run this model of seed predation and population dynamics but I am new to coding and I am only getting one predation value that gets repeated over different generations. How can I get different predation values for different year?
Also, Is there an issue with the normalizing method used?
import numpy as np
import matplotlib.pyplot as plt
def is_odd(year):
return ((year % 2) == 1)
def reproduction(p_iter, year, dead):
if is_odd(year):
predation = dead
seedsProd = p_iter*s_oddd
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) + np.array([0,0,p_iter[2]])
else:
predation = dead
seedsProd = p_iter*s_even
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) +np.array([0,p_iter[1],0])
def normalize(p_iter):
if is_odd(year):
x = np.copy(p_iter)
x[2] = 0
x = (K-p_iter[2]) * x / sum(x)
x[2] = p_iter[2]
return x
else:
x = np.copy(p_iter)
x[1] = 0
x = (K-p_iter[1]) * x / sum(x)
x[1] = p_iter[1]
return x
Predation is defined here:
def predation():
return (np.array(np.round(np.random.uniform(0.4,0.6),2)))
#max_years
Y = 100
#carrying capacity
K = 1000
#initial populaton
p_1, p_2, p_3 = 998., 1., 1.
#seed released per plant
s_1, s_2, s_3 = 200, 260, 260
p_init = np.array([p_1, p_2, p_3],dtype=float)
s_oddd = np.array([s_1, s_2, 0.0])
s_even = np.array([s_1, 0.0, s_3])
n = len(p_init)
m = np.append(p_init,s_oddd)
p_iter = p_init
dead = 0
norm = 0
for year in range(1,Y+1):
dead = predation()
seeds = reproduction(p_iter, year, dead)
p_iter = np.maximum(seeds,np.zeros(p_iter.shape))
p_iter = normalize(p_iter)
m = np.vstack((m, [*p_iter]+[*seeds] ))
I am trying to use sciklearn to find the goodness of a KNeighborsClassifier on my data.
My code is below (X is a matrix with NUM_MATCHES rows and NUM_FEATURES columns, Y is a column vector with NUM_MATCHES rows). I keep getting the error
TypeError: Partition index must be integer
on this line of the code below
rad_prob = estimator.predict_proba(np.reshape(radiant_query,(1,-1)))[0][1]
I am new to sciklearn not sure what the issue is.
from sklearn.neighbors import KNeighborsClassifier
from sklearn import cross_validation
import numpy as np
K=2
FOLDS_FINISHED=0
NUM_HEROES = 78
NUM_FEATURES = NUM_HEROES*2
def score(estimator, X, y):
global FOLDS_FINISHED
correct_predictions = 0
for i, radiant_query in enumerate(X):
dire_query = np.concatenate((radiant_query[NUM_HEROES:NUM_FEATURES], radiant_query[0:NUM_HEROES]))
rad_prob = estimator.predict_proba(np.reshape(radiant_query,(1,-1)))[0][1]
dire_prob = estimator.predict_proba(np.reshape(dire_query,(1,-1)))[0][0]
overall_prob = (rad_prob + dire_prob) / 2
prediction = 1 if (overall_prob > 0.5) else -1
result = 1 if prediction == y[i] else 0
correct_predictions += result
FOLDS_FINISHED += 1
accuracy = float(correct_predictions) / len(X)
print ('Accuracy: %f' % accuracy)
return accuracy
preprocessed = np.load('train_9000.npz')
X = preprocessed['X']
Y = preprocessed['Y']
NUM_MATCHES = 3000
X = X[0:NUM_MATCHES]
Y = Y[0:NUM_MATCHES]
k_fold = cross_validation.KFold(n=NUM_MATCHES, n_folds=K, shuffle=True)
d_tries = [3, 4, 5]
d_accuracy_pairs = []
for d_index, d in enumerate(d_tries):
model = KNeighborsClassifier(n_neighbors=NUM_MATCHES/K,metric=my_distance,weights=poly_param(d))
model_accuracies = cross_validation.cross_val_score(model, X, Y, scoring=score, cv=k_fold)
model_accuracy = model_accuracies.mean()
d_accuracy_pairs.append((d, model_accuracy))
I need to optimize a non-convex problem (max likelihood), and when I try quadratic optmiziation algorithms such as bfgs, Nelder-Mead, it fails to find the extremum, I frequently get saddle point, instead.
You can download data from here.
import numpy as np
import csv
from scipy.stats import norm
f=open('data.csv','r')
reader = csv.reader(f)
headers = next(reader)
column={}
for h in headers:
column[h] = []
for row in reader:
for h,v in zip(headers, row):
column[h].append(float(v))
ini=[-0.0002,-0.01,.002,-0.09,-0.04,0.01,-0.02,-.0004]
for i in range(0,len(x[0])):
ini.append(float(x[0][i]))
x_header = list(Coef_headers)
N = 19 # no of observations
I = 4
P =7
Yobs=np.zeros(N)
Yobs[:] = column['size']
X=np.zeros((N,P))
X[:,0] = column['costTon']
X[:,1] = column['com1']
X[:,2] = column['com3']
X[:,3] = column['com4']
X[:,4] = column['com5']
X[:,5] = column['night']
X[:,6] = 1 #constant
def myfunction(B):
beta = B[0.299,18.495,2.181,2.754,3.59,2.866,-12.846]
theta = 30
U=np.zeros((N,I))
mm=np.zeros(I)
u = np.zeros((N,I))
F = np.zeros((N,I))
G = np.zeros(N)
l = 0
s1 = np.expm1(-theta)
for n in range (0,N):
m = 0
U[n,0] = B[0]*column['cost_van'][n]+ B[4]*column['cap_van'][n]
U[n,1] = B[1]+ B[5]*column['ex'][n]+ B[8]*column['dist'][n]+ B[0]*column['cost_t'][n]+ B[4]*column['cap_t'][n]
U[n,2] = B[2]+ B[6]*column['ex'][n]+ B[9]*column['dist'][n] + B[0]*column['cost_Ht'][n]+ B[4]*column['cap_Ht'][n]
U[n,3] = B[3]+ B[7]*column['ex'][n]+ B[10]*column['dist'][n]+ B[0]*column['cost_tr'][n]+ B[4]*column['cap_tr'][n]
for i in range(0,I):
mm[i]=np.exp(U[n,i])
m= sum(mm)
for i in range(0,I):
u[n,i]=1/(1+ np.exp(U[n,i]- np.log(m-np.exp(U[n,i]))))
F[n,i] = np.expm1(-u[n,i]*theta)
CDF = np.zeros(N)
Y = X.dot(beta)
resid = 0
for n in range (0,N):
resid = resid + (np.square(Yobs[n]-Y[n]))
SSR = resid / N
dof = N - P - 1
s2 = resid/dof # MSE, or variance: the mean squarred error of residuals
for n in range(0,N):
CDF[n] = norm.cdf((Yobs[n]+1),SSR,s2) - norm.cdf((Yobs[n]-1),SSR,s2)
G[n] = np.expm1(-CDF[n]*theta)
k = column['Choice_Veh'][n]-1
l = l + (np.log10(1+(F[n,k]*G[n]/s1))/(-theta))
loglikelihood = np.log10(l)
return -loglikelihood
rranges = np.repeat(slice(-10, 10, 1),11, axis = 0)
a = rranges
from scipy import optimize
resbrute = optimize.brute(myfunction, rranges, full_output=True,finish=optimize.fmin)
print("# global minimum:", resbrute[0])
print("function value at global minimum :", resbrute[1])
Now, I decided to go for grid search and tried scipy.optimize.brute, but I get this error. In fact, my real variables are 47, I decreased it to 31 to work, but still doesn't. please help.
File "C:\...\site-packages\numpy\core\numeric.py", line 1906, in indices
res = empty((N,)+dimensions, dtype=dtype)
ValueError: array is too big.