Calculating entropy in ID3 log2(0) in formula - python-3.x

import numpy as np
udacity_set = np.array(
[[1,1,1,0],
[1,0,1,0],
[0,1,0,1],
[1,0,0,1]])
label = udacity_set[:,udacity_set.shape[1]-1]
fx = label.size
positive = label[label == 1].shape[0]
positive_probability = positive/fx
negative = label[label == 0].shape[0]
negative_probability = negative/fx
entropy = -negative_probability*np.log2(negative_probability) - positive_probability*np.log2(positive_probability)
atribute = 0
V = 1
attribute_set = udacity_set[np.where(udacity_set[:,atribute] == 1)] #selecting positive instance of occurance in attribute 14
instances = attribute_set.shape[0]
negative_labels = attribute_set[np.where(attribute_set[:,attribute_set.shape[1]-1]== 0)].shape[0]
positive_labels = attribute_set[np.where(attribute_set[:,attribute_set.shape[1]-1]== 1)].shape[0]
p0 = negative_labels/instances
p1 = positive_labels/instances
entropy2 = - p0*np.log2(p0) - p1*np.log2(p1)
attribute_set2 = udacity_set[np.where(udacity_set[:,atribute] == 0)] #selecting positive instance of occurance in attribute 14
instances2 = attribute_set2.shape[0]
negative_labels2 = attribute_set[np.where(attribute_set2[:,attribute_set2.shape[1]-1]== 0)].shape[0]
positive_labels2 = attribute_set[np.where(attribute_set2[:,attribute_set2.shape[1]-1]== 1)].shape[0]
p02 = negative_labels2/instances2
p12 = positive_labels2/instances2
entropy22 = - p02*np.log2(p02) - p12*np.log2(p12)
Problem is when attribute is pure and entropy is meant to be 0. But when i put this into a formula i get NaN. I know how to code workaround, but why is this formula rigged?

Related

Discrete Laplacian on a non-regular mesh (python)

I have coded the laplacien function for a non-regular mesh (created with the scipy.spatial.Delaunay function).
I have not errors but the results are not correct : the eigenvectors are correct but the eigenvalues ​​are too high (in absolute value).
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial
def rect_drum(L,H,U):
vals = []
val = 0
k = 1
l = 1
while val >= -U:
while val >= -U:
val = -np.pi**2*((k/L)**2+(l/H)**2)
if val >= -U:
vals.append(val)
l += 1
l = 1
k += 1
val = -np.pi**2*((k/L)**2+(l/H)**2)
return np.array(vals)
def count_vp(tab,U):
#count the n eigenvalues ​​greater than equal to -U in the array tab
return tab[tab>=-U]
def in_curve(f,fargs,shape,a):
points = [] # the points inside the curve
for j in range(shape[0]):
for i in range(shape[1]):
if f(i*a,j*a,*fargs) < 0:
points.append([i*a,j*a])
return np.array(points)
def triang(points,a,f,fargs,bord):
tri_points = points.copy()
tri_points[:,1] *= np.sqrt(3)
tri_points2 = np.vstack((points,bord))
tri_points2[:,1] *= np.sqrt(3)
tri_points2[:,0] += a/2
tri_points2[:,1] += np.sqrt(3)/2*a
fin = np.vstack((tri_points,tri_points2))
i = 0
eps = 0.01
while i < len(fin):
if f(fin[i,0]+eps,fin[i,1]+eps,*fargs) > 0:
fin = np.delete(fin,i,0)
i -= 1
i += 1
return np.vstack((fin,bord)),len(fin),len(bord)
def tri_ang(points,ind,p0):
# sort the points in trigonometric order
vec=np.arctan2((points-p0)[:,1],(points-p0)[:,0])
values = []
dtype = [('val',float),('n',int)]
for i in range(len(vec)):
values.append((vec[i],i))
values = np.sort(np.array(values,dtype),order='val')
new_points = []
new_ind = []
for tup in values:
new_points.append(points[tup[1]])
new_ind.append(ind[tup[1]])
return np.array(new_points),np.array(new_ind)
def M(points,tri,Nint):
indptr,ind = tri.vertex_neighbor_vertices
W = np.zeros((Nint,Nint)) # cotangents matrix
A = np.zeros((Nint,1)) # surfaces vertex array for each point i (A[i])
for i in range(Nint):
tot = 0
nhb_ind = ind[indptr[i]:indptr[i+1]] # indices of the points close to the point of index k
nhb = points[nhb_ind] # their coordinates
nhb,nhb_ind = tri_ang(nhb,nhb_ind,points[i]) #the coordinates (nhb) and (nhb_ind) of each neighbor of i
for j in range(len(nhb_ind)):
vec = nhb[j]-points[i] # a vector connecting the point to his neighbor of index 0
vec_av = nhb[j-1]-points[i] # another vector but with the Vosin from before
if j+1 >= len(nhb_ind):
k = 0
else:
k = j+1
vec_ap = nhb[k]-points[i] # another vector but with the next neighbor
# another vector but with the next neighbor
A[i] += 0.5/3*np.linalg.norm(np.cross(vec,vec_av))
if nhb_ind[j] < Nint:
# we use the vector and scalar product to calculate the cotangents: A.B/||AxB||
cotan_alpha = np.dot(vec_av,vec_av-vec)/np.linalg.norm(np.cross(vec_av,vec_av-vec))
cotan_beta = np.dot(vec_ap,vec_ap-vec)/np.linalg.norm(np.cross(vec_ap,vec_ap-vec))
# Wij value :
W[i,nhb_ind[j]] = -0.5*(cotan_alpha+cotan_beta)
tot += cotan_alpha+cotan_beta
W[i,i] = -0.5*tot # diagonal values
return (1/A)*W
def rect(x,y,L,H,x0=0,y0=0):
if 0<x-x0<L and 0<y-y0<H:
return -1
else:
return 1
def rect_rim(L,H,a,x0=0,y0=0):
tab1 = np.arange(x0,L+x0,a)[:,np.newaxis]
h = np.hstack((tab1,H*np.ones((len(tab1),1))+y0))
b = np.hstack((tab1,np.zeros((len(tab1),1))+y0))
tab2 = np.arange(y0+a,H+y0,a)[:,np.newaxis]
g = np.hstack((np.zeros((len(tab2),1))+x0,tab2))
d = np.hstack((L*np.ones((len(tab2),1))+x0,tab2))
hp = np.array([[L+x0,H+y0]])
bp = np.array([[L+x0,0]])
return np.vstack((h,b,g,d,hp,bp))
# sample with a square 1*1
L = 1
H = 1
dl = 0.05
sol = in_curve(rect,[L,H],(100,100),dl)
sol_tri,Nint,Nbord = triang(sol,dl,rect,[L,H],rect_rim(L,H,dl))
# plt.plot(sol_tri[:,0],sol_tri[:,1],linestyle="",marker="+",label="tri")
# plt.plot(sol[:,0],sol[:,1],linestyle="",marker="x")
# plt.legend()
# plt.show()
# triangulation
tri = scipy.spatial.Delaunay(sol_tri)
# plt.triplot(sol_tri[:,0],sol_tri[:,1],tri.simplices)
# plt.show()
M = M(sol_tri,tri,Nint)
valp,vecp = np.linalg.eig(M) # eigenvalues and eigenvectors
vecp = np.real(vecp)
# comparison with the exact solution:
T = 1000
U = np.arange(0,T,1)
NUsim = np.array([len(count_vp(valp,u)) for u in U])
NU = np.array([len(rect_drum(L,H,u)) for u in U])
plt.plot(U,NUsim,label='simulation')
plt.plot(U,NU,label='exacts')
plt.legend()
plt.show()
# 3D plot of an eigenvector
vecp_tot = np.vstack((vecp,np.zeros((Nbord,Nint))))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(sol_tri[:,0],sol_tri[:,1],vecp_tot[:,0],triangles=tri.simplices)
plt.show()
The laplacian is the function named "M".
The "in_curve function" return the points inside a curve defined by f(x,y,*fargs) < 0 (a square in the sample).
The "triang" function return points with added points (triangle meshs). The fonction uses an another function for the rim of the curve (for most precision), in the sample it is the "rect_rim" function.
I used the formula given at https://en.wikipedia.org/wiki/Discrete_Laplace_operator ("mesh laplacians").
I have solve my problem : it's a sign and a rim problems.
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import scipy.spatial
def rect_drum(L,H,U):
vals = []
val = 0
k = 1
l = 1
while val >= -U:
while val >= -U:
val = -np.pi**2*((k/L)**2+(l/H)**2)
if val >= -U:
vals.append(val)
l += 1
l = 1
k += 1
val = -np.pi**2*((k/L)**2+(l/H)**2)
return np.array(vals)
def count_vp(tab,U):
#count the n eigenvalues ​​greater than equal to -U in the array tab
return tab[tab>=-U]
def in_curve(f,fargs,shape,a):
points = [] # the points inside the curve
for j in range(shape[0]):
for i in range(shape[1]):
if f(i*a,j*a,*fargs) < 0:
points.append([i*a,j*a])
return np.array(points)
def triang(points,a,f,fargs,bord):
tri_points = points.copy()
tri_points[:,1] *= np.sqrt(3)
tri_points2 = np.vstack((points,bord))
tri_points2[:,1] *= np.sqrt(3)
tri_points2[:,0] += a/2
tri_points2[:,1] += np.sqrt(3)/2*a
fin = np.vstack((tri_points,tri_points2))
i = 0
eps = 0.01
while i < len(fin):
if f(fin[i,0]+eps,fin[i,1]+eps,*fargs) > 0:
fin = np.delete(fin,i,0)
i -= 1
i += 1
return np.vstack((fin,bord)),len(fin),len(bord)
def tri_ang(points,ind,p0):
# sort the points in trigonometric order
vec=np.arctan2((points-p0)[:,1],(points-p0)[:,0])
values = []
dtype = [('val',float),('n',int)]
for i in range(len(vec)):
values.append((vec[i],i))
values = np.sort(np.array(values,dtype),order='val')
new_points = []
new_ind = []
for tup in values:
new_points.append(points[tup[1]])
new_ind.append(ind[tup[1]])
return np.array(new_points),np.array(new_ind)
def Laplacian(points,tri,Nint):
indptr,ind = tri.vertex_neighbor_vertices
W = np.zeros((Nint,Nint)) # cotangents matrix
A = np.zeros((Nint,1)) # surfacesvertex aray of point i (A[i])
for i in range(Nint):
tot = 0
nhb_ind = ind[indptr[i]:indptr[i+1]] # indices of the points close to the point of index k
nhb = points[nhb_ind] # their coordinates
nhb,nhb_ind = tri_ang(nhb,nhb_ind,points[i]) #the coordinates (nhb) and (nhb_ind) of each neighbor of i
for j in range(len(nhb_ind)):
vec = nhb[j]-points[i] # a vector connecting the point to his neighbor of index 0
vec_av = nhb[j-1]-points[i] # another vector but with the Vosin from before
if j+1 >= len(nhb_ind):
k = 0
else:
k = j+1
vec_ap = nhb[k]-points[i] # another vector but with the next neighbor
# we use the cross product to calculate the areas of the triangles: ||AxB||/2:
A[i] += 0.5/3*np.linalg.norm(np.cross(vec,vec_av))
# we use the cross product and scalar product to calculate the cotangents: A.B/||AxB||
cotan_alpha = np.dot(vec_av,vec_av-vec)/np.linalg.norm(np.cross(vec_av,vec_av-vec))
cotan_beta = np.dot(vec_ap,vec_ap-vec)/np.linalg.norm(np.cross(vec_ap,vec_ap-vec))
tot += cotan_alpha+cotan_beta
if nhb_ind[j] < Nint:
W[i,nhb_ind[j]] = 0.5*(cotan_alpha+cotan_beta)
W[i,i] = -0.5*tot # diagonal values
return (1/A)*W
def rect(x,y,L,H,x0=0,y0=0):
if 0<x-x0<L and 0<y-y0<H:
return -1
else:
return 1
def rect_rim(L,H,a,x0=0,y0=0):
tab1 = np.arange(x0,L+x0,a)[:,np.newaxis]
h = np.hstack((tab1,H*np.ones((len(tab1),1))+y0))
b = np.hstack((tab1,np.zeros((len(tab1),1))+y0))
tab2 = np.arange(y0+a,H+y0,a)[:,np.newaxis]
g = np.hstack((np.zeros((len(tab2),1))+x0,tab2))
d = np.hstack((L*np.ones((len(tab2),1))+x0,tab2))
hp = np.array([[L+x0,H+y0]])
bp = np.array([[L+x0,0]])
return np.vstack((h,b,g,d,hp,bp))
# sample with a square 1*1
L = 1
H = 1
dl = 0.04
sol = in_curve(rect,[L,H],(100,100),dl)
sol_tri,Nint,Nbord = triang(sol,dl,rect,[L,H],rect_rim(L,H,dl))
# triangulation
tri = scipy.spatial.Delaunay(sol_tri)
M = Laplacian(sol_tri,tri,Nint)
valp,vecp = np.linalg.eig(M) # eigenvalues and eigenvectors
vecp = np.real(vecp)
# comparison with the exact solution:
T = 1000
U = np.arange(0,T,1)
NUsim = np.array([len(count_vp(valp,u)) for u in U])
NU = np.array([len(rect_drum(L,H,u)) for u in U])
plt.plot(U,NUsim,label='simulation')
plt.plot(U,NU,label='exacts')
plt.legend()
plt.show()
# 3D plot of an eigenvector
mode = 0 # change this for an another mode
vecp_tot = np.vstack((vecp,np.zeros((Nbord,Nint))))
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_trisurf(sol_tri[:,0],sol_tri[:,1],vecp_tot[:,mode],triangles=tri.simplices)
plt.show()
Notes :
1- The hight eigenvalues are false : it's an effect of discretisation.
2- If dl is too small, we have false eigenvectors and eigenvalues (at the top of valp and firsts vectors of vecp), it's probably due to the quality of the meshing.

Scipy optimize.minimize with multi- parameters

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import linalg, optimize
%matplotlib inline
Data load
data = pd.read_csv("D:/Stat/TimeSeries/KRW_month_0617_1.csv",index_col="Date") / 100
para = open("D:/Stat/TimeSeries/KRW_month_0617_1.txt").readlines()[0:2]
data.index = pd.to_datetime(data.index)
Parameters
cond = []
params = []
time = []
for i in para:
j = i.split()
for k in j:
cond.append(k)
cond = cond[1:]
for i in range(len(cond)):
cond[i] = round(float(cond[i]),4)
params = cond[0:23]
time = cond[23:]
maturity = np.array(time[1:])
timegap = 1/cond[23]
Functions We need
def Paramcheck(Params, checkStationary = 1):
result = 0
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0], [params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
for i in range(len(Sigma)):
if Sigma[i][i] < 0:
result = 1
for j in SigmaEps:
if np.any(SigmaEps) < 0:
result = 1
if Lambda < 0.05 or Lambda > 2:
result = 1
elif State[0] < 0:
result = 1
elif Kappa[0][0] < 0:
result = 1
if result == 0 and checkStationary > 0:
if max(np.linalg.eigvals(-Kappa).real) > 0:
result = 2
return result
def CheckDet(x):
if x == np.inf or x == np.nan:
result = 1
elif x < 0:
result = 2
elif abs(x) < 10**-250:
result = 3
else:
result = 0
return result
def NS_factor(lambda_val, maturity):
col1 = np.ones(len(maturity))
col2 = (1 - np.exp(-lambda_val*maturity))/(lambda_val*maturity)
col3 = col2 - np.exp(-lambda_val*maturity)
factor = np.array([col1,col2,col3]).transpose()
return factor
def DNS_Kalman_filter(Params, *args):
N = Paramcheck(Params)
if N == 0:
Kappa = np.array([[params[20],0,0], [0,params[21],0], [0,0,params[22]]])
Sigma = np.array([[params[1],0,0], [params[2],params[3],0],
[params[4],params[5],params[6]]])
State = np.array([params[7], params[8], params[9]])
Lambda = params[0]
SigmaEps = np.identity(10)
for i in range(10):
SigmaEps[i][i] = params[i+10]
Obs_Yield = args[0]
Obs_Date = args[1]
Timegap = args[2]
Obs_Mty = args[3]
Finalstate = args[4]
Mty_length = len(Obs_Mty)
B = NS_factor(lambda_val = Lambda,maturity = Obs_Mty)
H_large = SigmaEps **2
N_obs = len(Obs_Date)
LLH_vec = np.zeros(N_obs)
phi1 = linalg.expm(-Kappa*Timegap)
phi0 = (np.identity(3)-phi1) # State
Eigenvalues = np.linalg.eig(Kappa)[0]
Eigen_vec = np.linalg.eig(Kappa)[1]
Eigen_vec_inv = np.linalg.inv(Eigen_vec)
S = Eigen_vec_inv # Sigma # Sigma.transpose() # Eigen_vec_inv.transpose()
Atilde = np.dot(Sigma[0], Sigma[0])
Btilde = np.dot(Sigma[1], Sigma[1])
Ctilde = np.dot(Sigma[2], Sigma[2])
Dtilde = np.dot(Sigma[0], Sigma[1])
Etilde = np.dot(Sigma[0], Sigma[2])
Ftilde = np.dot(Sigma[1], Sigma[2])
res1= Atilde* Obs_Mty* Obs_Mty/6
res2= Btilde*(1/(2*Lambda**2) - (1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + (1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
res3= Ctilde*(1/(2*Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2)-
Obs_Mty*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda) -
3*np.exp(-2*Lambda*Obs_Mty)/(4*Lambda**2) - 2*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 5*(1-
np.exp(-2*Lambda*Obs_Mty))/(8*Lambda**3*Obs_Mty))
res4= Dtilde*(Obs_Mty/(2*Lambda) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) - (1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res5= Etilde*(3*np.exp(-Lambda*Obs_Mty)/(Lambda**2) + Obs_Mty/(2*Lambda)+Obs_Mty*np.exp(-
Lambda*Obs_Mty)/(Lambda) - 3*(1-np.exp(-Lambda*Obs_Mty))/(Lambda**3*Obs_Mty))
res6= Ftilde*(1/(Lambda**2) + np.exp(-Lambda*Obs_Mty)/(Lambda**2) -
np.exp(-2*Lambda*Obs_Mty)/(2*Lambda**2) - 3*(1-np.exp(-
Lambda*Obs_Mty))/(Lambda**3*Obs_Mty) + 3*(1-
np.exp(-2*Lambda*Obs_Mty))/(4*Lambda**3*Obs_Mty))
val = res1 + res2 + res3 + res4 + res5 + res6
V_mat = np.zeros([3,3])
V_lim = np.zeros([3,3])
for i in range(3):
for j in range(3):
V_mat[i][j] = S[i][j]*(1-np.exp(-(Eigenvalues[i] +
Eigenvalues[j])*Timegap))/(Eigenvalues[i] + Eigenvalues[j])
V_lim[i][j] = S[i][j]/(Eigenvalues[i] + Eigenvalues[j])
Q = (Eigen_vec # V_mat # Eigen_vec.transpose()).real
Sigma_lim = (Eigen_vec # V_lim # Eigen_vec.transpose()).real
for i in range(N_obs):
y = Obs_Yield[i]
xhat = phi0 + phi1 # State
y_implied = B # xhat
v = y - y_implied + val
Sigmahat = phi1 # Sigma_lim # phi1.transpose() + Q
F = B # Sigmahat # B.transpose() + H_large
detF = np.linalg.det(F)
if CheckDet(detF) > 0:
N = 3
break
Finv = np.linalg.inv(F)
State = xhat + Sigmahat # B.transpose() # Finv # v
Sigma_lim = Sigmahat - Sigmahat # B.transpose() # Finv # B # Sigmahat
LLH_vec[i] = np.log(detF) + v.transpose() # Finv # v
if N == 0:
if Finalstate:
yDate = Obs_Date[-1]
result = np.array([yDate,State])
else:
result = 0.5 * (sum(LLH_vec) + Mty_length*N_obs*np.log(2*np.pi))
else:
result = 7000000
return result
I made a code that does Arbitrage Free Nelson-Siegel model. Data is return rates of bond (1Y,1.5Y, ... ,20Y). I wanna optimize that function with scipy optimize.minimize function with fixed *args.
Suppose that Initial parmas are verified that it's close to optimized params from empirical experiments using Dynamic Nelson-Siegel Model.
LLC_new = 0
while True:
LLC_old = LLC_new
OPT = optimize.minimize(x0=params,fun=DNS_Kalman_filter, args=
(data.values,data.index,timegap,maturity,0))
params = OPT.x
LLC_new = round(OPT.fun,5)
print("Current LLC: %0.5f" %LLC_new)
if LLC_old == LLC_new:
OPT_para = params
FinalState = DNS_Kalman_filter(params,data.values,data.index,timegap,maturity,True)
break
Result is
Current LLC: -7613.70146
Current LLC: -7613.70146
LLC(log-likelihood value) isn't maximized. It's not a result I desire using Optimizer.
Is there any solution for that?
In R, there is optim() function works as similar as scipy.optimize.minimize() which works really well. I also have a R code for that very similar to this Python code.

How can I do test wald in Python?

I want to test a hypothesis that "intercept = 0, beta = 1" so I should do wald test and used module 'statsmodel.formula.api'.
But I'm not sure which code is correct when doing wald test.
from statsmodels.datasets import longley
import statsmodels.formula.api as smf
data = longley.load_pandas().data
hypothesis_0 = '(Intercept = 0, GNP = 0)'
hypothesis_1 = '(GNP = 0)'
hypothesis_2 = '(GNP = 1)'
hypothesis_3 = '(Intercept = 0, GNP = 1)'
results = smf.ols('TOTEMP ~ GNP', data).fit()
wald_0 = results.wald_test(hypothesis_0)
wald_1 = results.wald_test(hypothesis_1)
wald_2 = results.wald_test(hypothesis_2)
wald_3 = results.wald_test(hypothesis_3)
print(wald_0)
print(wald_1)
print(wald_2)
print(wald_3)
results.summary()
I thought hypothesis_3 is right at first.
But the result of hypothesis_1 is same with F-test of regression, which represent that the hypothesis 'intercept = 0 and beta = 0'.
So, I thought that the module,'wald_test' set 'intercept = 0' by default.
I'm not sure which one is correct.
Could you please give me an answer which one is right?
Hypothesis 3 is the correct joint null hypothesis for the wald test.
Hypothesis 1 is the same as the F-test in the summary output which is the hypothesis that all slope coefficients are zero.
I changed the example to use artificial data, so we can see the effect of different "true" beta coefficients.
import numpy as np
import pandas as pd
nobs = 100
np.random.seed(987125)
yx = np.random.randn(nobs, 2)
beta0 = 0
beta1 = 1
yx[:, 0] += beta0 + beta1 * yx[:, 1]
data = pd.DataFrame(yx, columns=['TOTEMP', 'GNP'])
hypothesis_0 = '(Intercept = 0, GNP = 0)'
hypothesis_1 = '(GNP = 0)'
hypothesis_2 = '(GNP = 1)'
hypothesis_3 = '(Intercept = 0, GNP = 1)'
results = smf.ols('TOTEMP ~ GNP', data).fit()
wald_0 = results.wald_test(hypothesis_0)
wald_1 = results.wald_test(hypothesis_1)
wald_2 = results.wald_test(hypothesis_2)
wald_3 = results.wald_test(hypothesis_3)
print('H0:', hypothesis_0)
print(wald_0)
print()
print('H0:', hypothesis_1)
print(wald_1)
print()
print('H0:', hypothesis_2)
print(wald_2)
print()
print('H0:', hypothesis_3)
print(wald_3)
In this case with beta0=0 and beta1=1, both hypothesis 2 and 3 hold. Hypothesis 0 and 1 are not consistent with the simulated data.
The wald test results reject the false and do not reject the true hypotheses, given sample size and effect size should result in high power.
H0: (Intercept = 0, GNP = 0)
<F test: F=array([[ 58.22023709]]), p=2.167936332972888e-17, df_denom=98, df_num=2>
H0: (GNP = 0)
<F test: F=array([[ 116.33149937]]), p=2.4054199668085043e-18, df_denom=98, df_num=1>
H0: (GNP = 1)
<F test: F=array([[ 0.1205935]]), p=0.7291363441993846, df_denom=98, df_num=1>
H0: (Intercept = 0, GNP = 1)
<F test: F=array([[ 0.0623734]]), p=0.9395692694166834, df_denom=98, df_num=2>
Similar results can be checked by changing beta0 and beta1.

How to get different predation values every year for my simulation?

I am trying to run this model of seed predation and population dynamics but I am new to coding and I am only getting one predation value that gets repeated over different generations. How can I get different predation values for different year?
Also, Is there an issue with the normalizing method used?
import numpy as np
import matplotlib.pyplot as plt
def is_odd(year):
return ((year % 2) == 1)
def reproduction(p_iter, year, dead):
if is_odd(year):
predation = dead
seedsProd = p_iter*s_oddd
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) + np.array([0,0,p_iter[2]])
else:
predation = dead
seedsProd = p_iter*s_even
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) +np.array([0,p_iter[1],0])
def normalize(p_iter):
if is_odd(year):
x = np.copy(p_iter)
x[2] = 0
x = (K-p_iter[2]) * x / sum(x)
x[2] = p_iter[2]
return x
else:
x = np.copy(p_iter)
x[1] = 0
x = (K-p_iter[1]) * x / sum(x)
x[1] = p_iter[1]
return x
Predation is defined here:
def predation():
return (np.array(np.round(np.random.uniform(0.4,0.6),2)))
#max_years
Y = 100
#carrying capacity
K = 1000
#initial populaton
p_1, p_2, p_3 = 998., 1., 1.
#seed released per plant
s_1, s_2, s_3 = 200, 260, 260
p_init = np.array([p_1, p_2, p_3],dtype=float)
s_oddd = np.array([s_1, s_2, 0.0])
s_even = np.array([s_1, 0.0, s_3])
n = len(p_init)
m = np.append(p_init,s_oddd)
p_iter = p_init
dead = 0
norm = 0
for year in range(1,Y+1):
dead = predation()
seeds = reproduction(p_iter, year, dead)
p_iter = np.maximum(seeds,np.zeros(p_iter.shape))
p_iter = normalize(p_iter)
m = np.vstack((m, [*p_iter]+[*seeds] ))

optimize.brute: ValueError: array is too big

I need to optimize a non-convex problem (max likelihood), and when I try quadratic optmiziation algorithms such as bfgs, Nelder-Mead, it fails to find the extremum, I frequently get saddle point, instead.
You can download data from here.
import numpy as np
import csv
from scipy.stats import norm
f=open('data.csv','r')
reader = csv.reader(f)
headers = next(reader)
column={}
for h in headers:
column[h] = []
for row in reader:
for h,v in zip(headers, row):
column[h].append(float(v))
ini=[-0.0002,-0.01,.002,-0.09,-0.04,0.01,-0.02,-.0004]
for i in range(0,len(x[0])):
ini.append(float(x[0][i]))
x_header = list(Coef_headers)
N = 19 # no of observations
I = 4
P =7
Yobs=np.zeros(N)
Yobs[:] = column['size']
X=np.zeros((N,P))
X[:,0] = column['costTon']
X[:,1] = column['com1']
X[:,2] = column['com3']
X[:,3] = column['com4']
X[:,4] = column['com5']
X[:,5] = column['night']
X[:,6] = 1 #constant
def myfunction(B):
beta = B[0.299,18.495,2.181,2.754,3.59,2.866,-12.846]
theta = 30
U=np.zeros((N,I))
mm=np.zeros(I)
u = np.zeros((N,I))
F = np.zeros((N,I))
G = np.zeros(N)
l = 0
s1 = np.expm1(-theta)
for n in range (0,N):
m = 0
U[n,0] = B[0]*column['cost_van'][n]+ B[4]*column['cap_van'][n]
U[n,1] = B[1]+ B[5]*column['ex'][n]+ B[8]*column['dist'][n]+ B[0]*column['cost_t'][n]+ B[4]*column['cap_t'][n]
U[n,2] = B[2]+ B[6]*column['ex'][n]+ B[9]*column['dist'][n] + B[0]*column['cost_Ht'][n]+ B[4]*column['cap_Ht'][n]
U[n,3] = B[3]+ B[7]*column['ex'][n]+ B[10]*column['dist'][n]+ B[0]*column['cost_tr'][n]+ B[4]*column['cap_tr'][n]
for i in range(0,I):
mm[i]=np.exp(U[n,i])
m= sum(mm)
for i in range(0,I):
u[n,i]=1/(1+ np.exp(U[n,i]- np.log(m-np.exp(U[n,i]))))
F[n,i] = np.expm1(-u[n,i]*theta)
CDF = np.zeros(N)
Y = X.dot(beta)
resid = 0
for n in range (0,N):
resid = resid + (np.square(Yobs[n]-Y[n]))
SSR = resid / N
dof = N - P - 1
s2 = resid/dof # MSE, or variance: the mean squarred error of residuals
for n in range(0,N):
CDF[n] = norm.cdf((Yobs[n]+1),SSR,s2) - norm.cdf((Yobs[n]-1),SSR,s2)
G[n] = np.expm1(-CDF[n]*theta)
k = column['Choice_Veh'][n]-1
l = l + (np.log10(1+(F[n,k]*G[n]/s1))/(-theta))
loglikelihood = np.log10(l)
return -loglikelihood
rranges = np.repeat(slice(-10, 10, 1),11, axis = 0)
a = rranges
from scipy import optimize
resbrute = optimize.brute(myfunction, rranges, full_output=True,finish=optimize.fmin)
print("# global minimum:", resbrute[0])
print("function value at global minimum :", resbrute[1])
Now, I decided to go for grid search and tried scipy.optimize.brute, but I get this error. In fact, my real variables are 47, I decreased it to 31 to work, but still doesn't. please help.
File "C:\...\site-packages\numpy\core\numeric.py", line 1906, in indices
res = empty((N,)+dimensions, dtype=dtype)
ValueError: array is too big.

Resources