I want to test a hypothesis that "intercept = 0, beta = 1" so I should do wald test and used module 'statsmodel.formula.api'.
But I'm not sure which code is correct when doing wald test.
from statsmodels.datasets import longley
import statsmodels.formula.api as smf
data = longley.load_pandas().data
hypothesis_0 = '(Intercept = 0, GNP = 0)'
hypothesis_1 = '(GNP = 0)'
hypothesis_2 = '(GNP = 1)'
hypothesis_3 = '(Intercept = 0, GNP = 1)'
results = smf.ols('TOTEMP ~ GNP', data).fit()
wald_0 = results.wald_test(hypothesis_0)
wald_1 = results.wald_test(hypothesis_1)
wald_2 = results.wald_test(hypothesis_2)
wald_3 = results.wald_test(hypothesis_3)
print(wald_0)
print(wald_1)
print(wald_2)
print(wald_3)
results.summary()
I thought hypothesis_3 is right at first.
But the result of hypothesis_1 is same with F-test of regression, which represent that the hypothesis 'intercept = 0 and beta = 0'.
So, I thought that the module,'wald_test' set 'intercept = 0' by default.
I'm not sure which one is correct.
Could you please give me an answer which one is right?
Hypothesis 3 is the correct joint null hypothesis for the wald test.
Hypothesis 1 is the same as the F-test in the summary output which is the hypothesis that all slope coefficients are zero.
I changed the example to use artificial data, so we can see the effect of different "true" beta coefficients.
import numpy as np
import pandas as pd
nobs = 100
np.random.seed(987125)
yx = np.random.randn(nobs, 2)
beta0 = 0
beta1 = 1
yx[:, 0] += beta0 + beta1 * yx[:, 1]
data = pd.DataFrame(yx, columns=['TOTEMP', 'GNP'])
hypothesis_0 = '(Intercept = 0, GNP = 0)'
hypothesis_1 = '(GNP = 0)'
hypothesis_2 = '(GNP = 1)'
hypothesis_3 = '(Intercept = 0, GNP = 1)'
results = smf.ols('TOTEMP ~ GNP', data).fit()
wald_0 = results.wald_test(hypothesis_0)
wald_1 = results.wald_test(hypothesis_1)
wald_2 = results.wald_test(hypothesis_2)
wald_3 = results.wald_test(hypothesis_3)
print('H0:', hypothesis_0)
print(wald_0)
print()
print('H0:', hypothesis_1)
print(wald_1)
print()
print('H0:', hypothesis_2)
print(wald_2)
print()
print('H0:', hypothesis_3)
print(wald_3)
In this case with beta0=0 and beta1=1, both hypothesis 2 and 3 hold. Hypothesis 0 and 1 are not consistent with the simulated data.
The wald test results reject the false and do not reject the true hypotheses, given sample size and effect size should result in high power.
H0: (Intercept = 0, GNP = 0)
<F test: F=array([[ 58.22023709]]), p=2.167936332972888e-17, df_denom=98, df_num=2>
H0: (GNP = 0)
<F test: F=array([[ 116.33149937]]), p=2.4054199668085043e-18, df_denom=98, df_num=1>
H0: (GNP = 1)
<F test: F=array([[ 0.1205935]]), p=0.7291363441993846, df_denom=98, df_num=1>
H0: (Intercept = 0, GNP = 1)
<F test: F=array([[ 0.0623734]]), p=0.9395692694166834, df_denom=98, df_num=2>
Similar results can be checked by changing beta0 and beta1.
Related
I am trying to track multiple set-points in the case of interacting quadruple tank system process. Here, the upper limits of tanks are 25 and lower limits are 0. I want to track the set-point values of 5,12,7 and 5. Although, I am able to track the initial 3 set-points (i.e. 5,12 and 7), I am not able to track the last set-point due to solver exceeding max. iterations. I have attached the code below->
#MHE+MPC model
#to measure computational time of the code
start=time.time()
#Process Model
p = GEKKO(remote=False)
process=0
p.time = [0,0.5]
noise = 0.25
#Constants
g = 981
g1 = .9
g2 = .9
A1=32
A3=32
A2=32
A4=32
a1=0.057
a3=0.057
a2=0.057
a4=0.057
init_h=5
#Controlled process variables
p.h1=p.SV(lb=0,ub=25)
p.h2=p.SV(lb=0,ub=25)
p.h3=p.SV(lb=0,ub=25)
p.h4=p.SV(lb=0,ub=25)
#Manipulated process variables
p.v1=p.MV(value=3.15,lb=0.1,ub=8)
p.v2=p.MV(value=3.15,lb=0.1,ub=8)
#Parameters of process
p.k1=p.Param(value=3.14,lb=0,ub=10)
p.k2=p.Param(value=3.14,lb=0,ub=10)
#Equations process
p.Equation(A1*p.h1.dt()==a3*((2*g*p.h3)**0.5)-(a1*((2*g*p.h1)**0.5))+(g1*p.k1*p.v1))
p.Equation(A2*p.h2.dt()==a4*((2*g*p.h4)**0.5)-(a2*((2*g*p.h2)**0.5))+(g2*p.k2*p.v2))
p.Equation(A3*p.h3.dt()==-a3*((2*g*p.h3)**0.5)+((1-g2)*p.k2*p.v2))
p.Equation(A4*p.h4.dt()==-a4*((2*g*p.h4)**0.5)+((1-g1)*p.k1*p.v1))
#options
p.options.IMODE = 4
#p.h1.TAU=-10^10
#p.h2.TAU=-10^10
#%% MHE Model
m = GEKKO(remote=False)
#prediction horizon
m.time = np.linspace(0,40,41) #0-20 by 0.5 -- discretization must match simulation
#MHE control, manipulated variables and parameters
m.h1=m.CV(lb=0,ub=25)
m.h2=m.CV(lb=0,ub=25)
m.h3=m.SV(lb=0,ub=25)
m.h4=m.SV(lb=0,ub=25)
m.v1=m.MV(value=3.15,lb=0.10,ub=8)
m.v2=m.MV(value=3.15,lb=0.10,ub=8)
m.k1=m.FV(value=3.14,lb=0,ub=10)
m.k2=m.FV(value=3.14,lb=0,ub=10)
#m.h1.TAU=0
#m.h2.TAU=0
#Equations
m.Equation(A1*m.h1.dt()==a3*((2*g*m.h3)**0.5)-(a1*((2*g*m.h1)**0.5))+(g1*m.k1*m.v1))
m.Equation(A2*m.h2.dt()==a4*((2*g*m.h4)**0.5)-(a2*((2*g*m.h2)**0.5))+(g2*m.k2*m.v2))
m.Equation(A3*m.h3.dt()==-a3*((2*g*m.h3)**0.5)+((1-g2)*m.k2*m.v2))
m.Equation(A4*m.h4.dt()==-a4*((2*g*m.h4)**0.5)+((1-g1)*m.k1*m.v1))
#Options
m.options.IMODE = 5 #MHE
m.options.EV_TYPE = 2
# STATUS = 0, optimizer doesn't adjust value
# STATUS = 1, optimizer can adjust
m.v1.STATUS = 0
m.v2.STATUS = 0
m.k1.STATUS=1
m.k2.STATUS=1
m.h1.STATUS = 1
m.h2.STATUS = 1
#m.h3.STATUS = 0
#m.h4.STATUS = 0
# FSTATUS = 0, no measurement
# FSTATUS = 1, measurement used to update model
m.v1.FSTATUS = 1
m.v2.FSTATUS = 1
m.k1.FSTATUS=0
m.k2.FSTATUS=0
m.h1.FSTATUS = 1
m.h2.FSTATUS = 1
m.h3.FSTATUS = 1
m.h4.FSTATUS = 1
#m.options.MAX_ITER=1000
m.options.SOLVER=3
m.options.NODES=3
#%% MPC Model
c = GEKKO(remote=False)
c.time = np.linspace(0,10,11) #0-5 by 0.5 -- discretization must match simulation
c.v1=c.MV(value=3.15,lb=0.10,ub=8)
c.v2=c.MV(value=3.15,lb=0.10,ub=8)
c.k1=c.FV(value=3.14,lb=0,ub=10)
c.k2=c.FV(value=3.14,lb=0,ub=10)
#Variables
c.h1=c.CV(lb=0,ub=25)
c.h2=c.CV(lb=0,ub=25)
c.h3=c.SV(lb=0,ub=25)
c.h4=c.SV(lb=0,ub=25)
#Equations
c.Equation(A1*c.h1.dt()==a3*((2*g*c.h3)**0.5)-(a1*((2*g*c.h1)**0.5))+(g1*c.k1*c.v1))
c.Equation(A2*c.h2.dt()==a4*((2*g*c.h4)**0.5)-(a2*((2*g*c.h2)**0.5))+(g2*c.k2*c.v2))
c.Equation(A3*c.h3.dt()==-a3*((2*g*c.h3)**0.5)+((1-g2)*c.k2*c.v2))
c.Equation(A4*c.h4.dt()==-a4*((2*g*c.h4)**0.5)+((1-g1)*c.k1*c.v1))
#Options
c.options.IMODE = 6 #MPC
c.options.CV_TYPE = 2
# STATUS = 0, optimizer doesn't adjust value
# STATUS = 1, optimizer can adjust
c.v1.STATUS = 1
c.v2.STATUS = 1
c.k1.STATUS=0
c.k2.STATUS=0
c.h1.STATUS = 1
c.h2.STATUS = 1
#c.h3.STATUS = 0
#c.h4.STATUS = 0
# FSTATUS = 0, no measurement
# FSTATUS = 1, measurement used to update model
c.v1.FSTATUS = 0
c.v2.FSTATUS = 0
c.k1.FSTATUS=1
c.k2.FSTATUS=1
c.h1.FSTATUS = 1
c.h2.FSTATUS = 1
c.h3.FSTATUS = 1
c.h4.FSTATUS = 1
sp=5
c.h1.SP=sp
c.h2.SP=sp
p1 = GEKKO(remote=False)
p1.time = [0,0.5]
#Parameters
p1.h1=p1.CV(lb=0,ub=25)
p1.h2=p1.CV(lb=0,ub=25)
p1.h3=p1.CV(lb=0,ub=25)
p1.h4=p1.CV(lb=0,ub=25)
p1.v1=p1.MV(value=3.15,lb=0.1,ub=8)
p1.v2=p1.MV(value=3.15,lb=0.1,ub=8)
p1.k1=p1.Param(lb=0,ub=10,value=3.14)
p1.k2=p1.Param(lb=0,ub=10,value=3.14)
#Equations
p1.Equation(A1*p1.h1.dt()==a3*((2*g*p1.h3)**0.5)-a1*((2*g*p1.h1)**0.5)+g1*p1.k1*p1.v1)
p1.Equation(A2*p1.h2.dt()==a4*((2*g*p1.h4)**0.5)-a2*((2*g*p1.h2)**0.5)+g2*p1.k2*p1.v2)
p1.Equation(A3*p1.h3.dt()==-a3*((2*g*p1.h3)**0.5)+(1-g2)*p1.k2*p1.v2)
p1.Equation(A4*p1.h4.dt()==-a4*((2*g*p1.h4)**0.5)+(1-g1)*p1.k1*p1.v1)
#options
p1.options.IMODE = 4
#%% problem configuration
# number of cycles
cycles = 480
# noise level
#%% run process, estimator and control for cycles
h1_meas = np.empty(cycles)
h2_meas =np.empty(cycles)
h3_meas =np.empty(cycles)
h4_meas=np.empty(cycles)
h1_est = np.empty(cycles)
h2_est = np.empty(cycles)
h3_est = np.empty(cycles)
h4_est = np.empty(cycles)
h1_plant=np.empty(cycles)
h2_plant=np.empty(cycles)
h3_plant=np.empty(cycles)
h4_plant=np.empty(cycles)
h1_measured=np.empty(cycles)
h2_measured=np.empty(cycles)
h3_measured=np.empty(cycles)
h4_measured=np.empty(cycles)
v1_est = np.empty(cycles)
v2_est = np.empty(cycles)
k1_est = np.empty(cycles)
k2_est = np.empty(cycles)
u_cont_k1 = np.empty(cycles)
u_cont_k2 = np.empty(cycles)
sp_store = np.empty(cycles)
sum_est=np.empty(cycles)
sum_model=np.empty(cycles)
# Create plot
plt.figure(figsize=(10,7))
plt.ion()
plt.show()
p.MAX_ITER=20
c.MAX_ITER=20
m.MAX_ITER=20
p1.MAX_ITER=20
for i in range(cycles):
print(i)
# set point changes
if i==cycles/4:
sp = 12
elif i==2*cycles/4:
sp = 7
elif i==3*cycles/4:
sp = 5
sp_store[i] = sp
c.h1.SP=sp
c.h2.SP=sp
c.k1.MEAS = m.k1.NEWVAL
c.k2.MEAS = m.k2.NEWVAL
if p.options.SOLVESTATUS == 1:
# print("going:",i)
c.h1.MEAS = p.h1.MODEL
c.h2.MEAS = p.h2.MODEL
c.h3.MEAS = p.h3.MODEL
c.h4.MEAS = p.h4.MODEL
print(i,'Plant Model:',p.h1.MODEL,p.h2.MODEL,p.h3.MODEL,p.h4.MODEL)
c.solve(disp=False,debug=0)
#print("NEWVAL:",i,c.u,c.u.NEWVAL)
u_cont_k1[i] = c.v1.NEWVAL
u_cont_k2[i] = c.v2.NEWVAL
#print("Horizon:",i,c.h1[0:],c.h2[0:])
#print("Move:",i,c.v1.NEWVAL,c.v2.NEWVAL)
## process simulator
#load control move
p.v1.MEAS = u_cont_k1[i]
p.v2.MEAS = u_cont_k2[i]
#simulate
p.solve(disp=False,debug=0)
#plant model
p1.k1=3.14
p1.k2=3.14
p1.v1.MEAS = u_cont_k1[i]
p1.v2.MEAS = u_cont_k2[i]
p1.solve(disp=False,debug=0)
h1_plant[i]=p1.h1.MODEL
h2_plant[i]=p1.h2.MODEL
h3_plant[i]=p1.h3.MODEL
h4_plant[i]=p1.h4.MODEL
h1_measured[i]=p1.h1.MODEL+(random()*2)*noise
h2_measured[i]=p1.h2.MODEL+(random()*2)*noise
h3_measured[i]=p1.h3.MODEL+(random()*2)*noise
h4_measured[i]=p1.h4.MODEL+(random()*2)*noise
#print("Model process output:",i,p.h1.MODEL,p.h2.MODEL,p.h3.MODEL,p.h4.MODEL)
#load output with white noise
h1_meas[i] = p.h1.MODEL+(random()-0.5)*noise
h2_meas[i] = p.h2.MODEL+(random()-0.5)*noise
h3_meas[i] = p.h3.MODEL+(random()-0.5)*noise
h4_meas[i] = p.h4.MODEL+(random()-0.5)*noise
#Only MPC
## estimator
#load input and measured output
m.v1.MEAS = u_cont_k1[i]
m.v2.MEAS = u_cont_k2[i]
#m.h1.MEAS = h1_meas[i]+(random()*2)*noise
#m.h2.MEAS = h2_meas[i]+(random()*2)*noise
#m.h3.MEAS = h3_meas[i]+(random()*2)*noise
#m.h4.MEAS = h4_meas[i]+(random()*2)*noise
m.h1.MEAS = h1_meas[i]
m.h2.MEAS = h2_meas[i]
m.h3.MEAS = h3_meas[i]
m.h4.MEAS = h4_meas[i]
#m.COLDSTART=2
#optimize parameters
m.solve(disp=False,debug=0)
#store results
if i>=process:
h1_est[i] = m.h1.MODEL
h2_est[i] = m.h2.MODEL
h3_est[i] = m.h3.MODEL
h4_est[i] = m.h4.MODEL
v1_est[i] = m.v1.NEWVAL
v2_est[i] = m.v2.NEWVAL
k1_est[i]= m.k1.NEWVAL
k2_est[i] = m.k2.NEWVAL
print("Estimated h:",i,h1_est[i],h2_est[i],h3_est[i],h4_est[i])
print("Estimated k:",i,k1_est[i],k2_est[i],p.k1[0],p.k2[0])
print("Estimated v:",i,v1_est[i],v2_est[i])
print("dh1/dt:",(a3*((2*g*h3_est[i])**0.5)-(a1*((2*g*h3_est[i])**0.5))+(g1*k1_est[i]*v1_est[i]))/A3)
print("dh2/dt:",(a4*((2*g*h4_est[i])**0.5)-(a2*((2*g*h2_est[i])**0.5))+(g2*k2_est[i]*v2_est[i]))/A2)
print("dh3/dt:",(-a3*((2*g*h3_est[i])**0.5)+((1-g2)*k2_est[i]*v2_est[i]))/A3)
print("dh4/dt:",(-a4*((2*g*h4_est[i])**0.5)+((1-g1)*k1_est[i]*v1_est[i]))/A4)
if i%1==0:
plt.clf()
plt.subplot(4,1,1)
#plt.plot(h1_meas[0:i])
#plt.plot(h2_meas[0:i])
#plt.plot(h3_meas[0:i])
#plt.plot(h4_meas[0:i])
plt.plot(h1_est[0:i])
plt.plot(h2_est[0:i])
plt.plot(sp_store[0:i])
plt.subplot(4,1,2)
plt.plot(h3_est[0:i])
plt.plot(h4_est[0:i])
#plt.legend(('h1_pred','h2_pred','h3_pred','h4_pred'))
plt.subplot(4,1,3)
plt.plot(k1_est[0:i])
plt.plot(k2_est[0:i])
plt.subplot(4,1,4)
plt.plot(v1_est[0:i])
plt.plot(v2_est[0:i])
plt.draw()
plt.pause(0.05)
end=time.time()
print("total time:",end-start)
I feel there is some issue with my MHE+MPC code. However, I am not able to realize the mistake?
Nice application. I needed a few imports to make the script work. These may be loaded automatically for you.
from gekko import GEKKO
import time
import numpy as np
import matplotlib.pyplot as plt
from random import random
The script solves successfully if a lower bound is included on all the level variables (1e-6). There is a problem when the level goes below zero or is at zero when using m.sqrt(). This small adjustment helps it solve successfully so it doesn't get into a region where it is undefined. Gekko solvers can't deal with imaginary numbers.
Although the solution is successful, it appears that the control performance oscillates. There may need to be some tuning of the application.
I am trying to implement parts of Facebook's prophet with some help from this example.
https://github.com/luke14free/pm-prophet/blob/master/pmprophet/model.py
This goes well :), but I am having some problems with the dot product I don't understand. Note that I am implementing the linear trends.
ds = pd.to_datetime(df['dagindex'], format='%d-%m-%y')
m = pm.Model()
changepoint_prior_scale = 0.05
n_changepoints = 25
changepoints = pd.date_range(
start=pd.to_datetime(ds.min()),
end=pd.to_datetime(ds.max()),
periods=n_changepoints + 2
)[1: -1]
with m:
# priors
sigma = pm.HalfCauchy('sigma', 10, testval=1)
#trend
growth = pm.Normal('growth', 0, 10)
prior_changepoints = pm.Laplace('changepoints', 0, changepoint_prior_scale, shape=len(changepoints))
y = np.zeros(len(df))
# indexes x_i for the changepoints.
s = [np.abs((ds - i).values).argmin() for i in changepoints]
g = growth
x = np.arange(len(ds))
# delta
d = prior_changepoints
regression = x * g
base_piecewise_regression = []
for i in s:
local_x = x.copy()[:-i]
local_x = np.concatenate([np.zeros(i), local_x])
base_piecewise_regression.append(local_x)
piecewise_regression = np.array(base_piecewise_regression)
# this dot product doesn't work?
piecewise_regression = pm.math.dot(theano.shared(piecewise_regression).T, d)
# If I comment out this line and use that one as dot product. It works fine
# piecewise_regression = (piecewise_regression.T * d[None, :]).sum(axis=-1)
regression += piecewise_regression
y += regression
obs = pm.Normal('y',
mu=(y - df.gebruikers.mean()) / df.gebruikers.std(),
sd=sigma,
observed=(df.gebruikers - df.gebruikers.mean()) / df.gebruikers.std())
start = pm.find_MAP(maxeval=10000)
trace = pm.sample(500, step=pm.NUTS(), start=start)
If I run the snippet above with
piecewise_regression = (piecewise_regression.T * d[None, :]).sum(axis=-1)
the model works as expected. However I cannot get it to work with a dot product. The NUTS sampler doesn't sample at all.
piecewise_regression = pm.math.dot(theano.shared(piecewise_regression).T, d)
EDIT
Ive got a minimal working example
The problem still occurs with theano.shared. I’ve got a minimal working example:
np.random.seed(5)
n_changepoints = 10
t = np.arange(1000)
s = np.sort(np.random.choice(t, size=n_changepoints, replace=False))
a = (t[:, None] > s) * 1
real_delta = np.random.normal(size=n_changepoints)
y = np.dot(a, real_delta) * t
with pm.Model():
sigma = pm.HalfCauchy('sigma', 10, testval=1)
delta = pm.Laplace('delta', 0, 0.05, shape=n_changepoints)
g = tt.dot(a, delta) * t
obs = pm.Normal('obs',
mu=(g - y.mean()) / y.std(),
sd=sigma,
observed=(y - y.mean()) / y.std())
trace = pm.sample(500)
It seems to have something to do with the size of matrix a. NUTS doesnt’t sample if I start with
t = np.arange(1000)
however the example above does sample when I reduce the size of t to:
t = np.arange(100)
I am trying to run this model of seed predation and population dynamics but I am new to coding and I am only getting one predation value that gets repeated over different generations. How can I get different predation values for different year?
Also, Is there an issue with the normalizing method used?
import numpy as np
import matplotlib.pyplot as plt
def is_odd(year):
return ((year % 2) == 1)
def reproduction(p_iter, year, dead):
if is_odd(year):
predation = dead
seedsProd = p_iter*s_oddd
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) + np.array([0,0,p_iter[2]])
else:
predation = dead
seedsProd = p_iter*s_even
seedsPred = K*predation*200*(seedsProd/np.sum(seedsProd))
return (seedsProd - seedsPred) +np.array([0,p_iter[1],0])
def normalize(p_iter):
if is_odd(year):
x = np.copy(p_iter)
x[2] = 0
x = (K-p_iter[2]) * x / sum(x)
x[2] = p_iter[2]
return x
else:
x = np.copy(p_iter)
x[1] = 0
x = (K-p_iter[1]) * x / sum(x)
x[1] = p_iter[1]
return x
Predation is defined here:
def predation():
return (np.array(np.round(np.random.uniform(0.4,0.6),2)))
#max_years
Y = 100
#carrying capacity
K = 1000
#initial populaton
p_1, p_2, p_3 = 998., 1., 1.
#seed released per plant
s_1, s_2, s_3 = 200, 260, 260
p_init = np.array([p_1, p_2, p_3],dtype=float)
s_oddd = np.array([s_1, s_2, 0.0])
s_even = np.array([s_1, 0.0, s_3])
n = len(p_init)
m = np.append(p_init,s_oddd)
p_iter = p_init
dead = 0
norm = 0
for year in range(1,Y+1):
dead = predation()
seeds = reproduction(p_iter, year, dead)
p_iter = np.maximum(seeds,np.zeros(p_iter.shape))
p_iter = normalize(p_iter)
m = np.vstack((m, [*p_iter]+[*seeds] ))
import numpy as np
udacity_set = np.array(
[[1,1,1,0],
[1,0,1,0],
[0,1,0,1],
[1,0,0,1]])
label = udacity_set[:,udacity_set.shape[1]-1]
fx = label.size
positive = label[label == 1].shape[0]
positive_probability = positive/fx
negative = label[label == 0].shape[0]
negative_probability = negative/fx
entropy = -negative_probability*np.log2(negative_probability) - positive_probability*np.log2(positive_probability)
atribute = 0
V = 1
attribute_set = udacity_set[np.where(udacity_set[:,atribute] == 1)] #selecting positive instance of occurance in attribute 14
instances = attribute_set.shape[0]
negative_labels = attribute_set[np.where(attribute_set[:,attribute_set.shape[1]-1]== 0)].shape[0]
positive_labels = attribute_set[np.where(attribute_set[:,attribute_set.shape[1]-1]== 1)].shape[0]
p0 = negative_labels/instances
p1 = positive_labels/instances
entropy2 = - p0*np.log2(p0) - p1*np.log2(p1)
attribute_set2 = udacity_set[np.where(udacity_set[:,atribute] == 0)] #selecting positive instance of occurance in attribute 14
instances2 = attribute_set2.shape[0]
negative_labels2 = attribute_set[np.where(attribute_set2[:,attribute_set2.shape[1]-1]== 0)].shape[0]
positive_labels2 = attribute_set[np.where(attribute_set2[:,attribute_set2.shape[1]-1]== 1)].shape[0]
p02 = negative_labels2/instances2
p12 = positive_labels2/instances2
entropy22 = - p02*np.log2(p02) - p12*np.log2(p12)
Problem is when attribute is pure and entropy is meant to be 0. But when i put this into a formula i get NaN. I know how to code workaround, but why is this formula rigged?
I need to optimize a non-convex problem (max likelihood), and when I try quadratic optmiziation algorithms such as bfgs, Nelder-Mead, it fails to find the extremum, I frequently get saddle point, instead.
You can download data from here.
import numpy as np
import csv
from scipy.stats import norm
f=open('data.csv','r')
reader = csv.reader(f)
headers = next(reader)
column={}
for h in headers:
column[h] = []
for row in reader:
for h,v in zip(headers, row):
column[h].append(float(v))
ini=[-0.0002,-0.01,.002,-0.09,-0.04,0.01,-0.02,-.0004]
for i in range(0,len(x[0])):
ini.append(float(x[0][i]))
x_header = list(Coef_headers)
N = 19 # no of observations
I = 4
P =7
Yobs=np.zeros(N)
Yobs[:] = column['size']
X=np.zeros((N,P))
X[:,0] = column['costTon']
X[:,1] = column['com1']
X[:,2] = column['com3']
X[:,3] = column['com4']
X[:,4] = column['com5']
X[:,5] = column['night']
X[:,6] = 1 #constant
def myfunction(B):
beta = B[0.299,18.495,2.181,2.754,3.59,2.866,-12.846]
theta = 30
U=np.zeros((N,I))
mm=np.zeros(I)
u = np.zeros((N,I))
F = np.zeros((N,I))
G = np.zeros(N)
l = 0
s1 = np.expm1(-theta)
for n in range (0,N):
m = 0
U[n,0] = B[0]*column['cost_van'][n]+ B[4]*column['cap_van'][n]
U[n,1] = B[1]+ B[5]*column['ex'][n]+ B[8]*column['dist'][n]+ B[0]*column['cost_t'][n]+ B[4]*column['cap_t'][n]
U[n,2] = B[2]+ B[6]*column['ex'][n]+ B[9]*column['dist'][n] + B[0]*column['cost_Ht'][n]+ B[4]*column['cap_Ht'][n]
U[n,3] = B[3]+ B[7]*column['ex'][n]+ B[10]*column['dist'][n]+ B[0]*column['cost_tr'][n]+ B[4]*column['cap_tr'][n]
for i in range(0,I):
mm[i]=np.exp(U[n,i])
m= sum(mm)
for i in range(0,I):
u[n,i]=1/(1+ np.exp(U[n,i]- np.log(m-np.exp(U[n,i]))))
F[n,i] = np.expm1(-u[n,i]*theta)
CDF = np.zeros(N)
Y = X.dot(beta)
resid = 0
for n in range (0,N):
resid = resid + (np.square(Yobs[n]-Y[n]))
SSR = resid / N
dof = N - P - 1
s2 = resid/dof # MSE, or variance: the mean squarred error of residuals
for n in range(0,N):
CDF[n] = norm.cdf((Yobs[n]+1),SSR,s2) - norm.cdf((Yobs[n]-1),SSR,s2)
G[n] = np.expm1(-CDF[n]*theta)
k = column['Choice_Veh'][n]-1
l = l + (np.log10(1+(F[n,k]*G[n]/s1))/(-theta))
loglikelihood = np.log10(l)
return -loglikelihood
rranges = np.repeat(slice(-10, 10, 1),11, axis = 0)
a = rranges
from scipy import optimize
resbrute = optimize.brute(myfunction, rranges, full_output=True,finish=optimize.fmin)
print("# global minimum:", resbrute[0])
print("function value at global minimum :", resbrute[1])
Now, I decided to go for grid search and tried scipy.optimize.brute, but I get this error. In fact, my real variables are 47, I decreased it to 31 to work, but still doesn't. please help.
File "C:\...\site-packages\numpy\core\numeric.py", line 1906, in indices
res = empty((N,)+dimensions, dtype=dtype)
ValueError: array is too big.