I am trying to solve the problem as follows with Gekko in python.
I_s is an indicator variable in the problem whose value is 1 if theta is positive and 0 if theta is zero.
I wrote the problem in a code using Gekko, python.
In contrast to my previous posts, I add some constraints with respect to I, which is an indicator variable.
If I set N=10, the solution, theta is all zero, which is the result that I want.
But if I set N=100 or 200, the solution cannot be found. I cannot understand why this happens.
I want to check if theta is also zero in larger N (200).
Is there any way to solve this issue?
My code is as belows.
# Import package
from gekko import GEKKO
import numpy as np
# Define parameters
P_CO = 600 # $/tonCO
beta_CO2 = 1 # no unit
P_CO2 = 80 # $/tonCO2eq
E_ref = 3.1022616 # tonCO2eq/tonCO
E_dir = -1.600570692 # tonCO2eq/tonCO
E_indir_others = 0.3339226804 # tonCO2eq/tonCO
E_indir_elec_cons = 18.46607256 # GJ/tonCO
C1_CAPEX = 285695 # no unit
C2_CAPEX = 188.42 # no unit
C1_FOX = 82282 # no unit
C2_FOX = 24.094 # no unit
C1_ROX = 4471.5 # no unit
C2_ROX = 96.034 # no unit
C1_UOX = 7934.9 # no unit
C2_UOX = 986.9 # no unit
r = 0.08 # discount rate
N = 10 # number of scenarios
T = 30 # total time period
GWP_init = 0.338723235 # 2020 Electricity GWP in EU 27 countries
theta_max = 1600000 # Max capacity
# Function to make GWP_EU matrix (TxN matrix)
def Electricity_GWP(GWP_init, n_years, num_episodes):
GWP_mean = 0.36258224*np.exp(-0.16395611*np.arange(1, n_years+2)) + 0.03091272
GWP_mean = GWP_mean.reshape(-1,1)
GWP_Yearly = np.tile(GWP_mean, num_episodes)
noise = np.zeros((n_years+1, num_episodes))
stdev2050 = GWP_mean[-1] * 0.25
stdev = np.arange(0, stdev2050 * (1 + 1/n_years), stdev2050/n_years)
for i in range(n_years+1):
noise[i,:] = np.random.normal(0, stdev[i], num_episodes)
GWP_forecast = GWP_Yearly + noise
return GWP_forecast
GWP_EU = Electricity_GWP(GWP_init, T, N) # (T+1)*N matrix
GWP_EU = GWP_EU[1:,:] # T*N matrix
print(np.shape(GWP_EU))
# Build Gekko model
m = GEKKO(remote=False)
theta = m.Array(m.Var, N, lb=0, ub=theta_max)
I = m.Array(m.Var, N, lb=0, ub=1, integer=True)
demand = np.ones((T,1))
demand[0] = 8031887.589
for k in range(1,11):
demand[k] = demand[k-1] * 1.026
for k in range(11,21):
demand[k] = demand[k-1] * 1.016
for k in range(21,T):
demand[k] = demand[k-1] * 1.011
demand = 0.12 * demand
demand = np.tile(demand, N) # T*N matrix
print(np.shape(demand))
m3 = [[m.min3(demand[t,s],theta[s]) for t in range(T)] for s in range(N)]
obj = m.sum([sum([((1/(1+r))**(t+1))*((P_CO*m3[s][t]) \
+ (beta_CO2*P_CO2*m3[s][t]*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s])) \
- (C1_CAPEX*I[s]+C2_CAPEX*theta[s]+C1_FOX*I[s]+C2_FOX*theta[s])\
- (C1_ROX*I[s]+C2_ROX*m3[s][t]+C1_UOX*I[s]+C2_UOX*m3[s][t])) for t in range(T)]) for s in range(N)])
for i in range(N):
m.Equation(theta[i]<=1000000*I[i])
m.Equation(-theta[i]<1000000*(1-I[i]))
# obj = m.sum([m.sum([((1/(1+r))**(t+1))*((P_CO*m.min3(demand[t,s], theta[s])) \
# + (beta_CO2*P_CO2*m.min3(demand[t,s], theta[s])*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s])) \
# - (C1_CAPEX+C2_CAPEX*theta[s]+C1_FOX+C2_FOX*theta[s])-(C1_ROX+C2_ROX*m.min3(demand[t,s], theta[s])+C1_UOX+C2_UOX*m.min3(demand[t,s], theta[s]))) for t in range(T)]) for s in range(N)])
m.Maximize(obj/N)
m.solve(disp=True)
# s = m.sum(m.sum(((1/(1+r))**(t+1))*((P_CO*m.min3(demand[t,s], theta[s])) \
# + beta_CO2*P_CO2*m.min3(demand[t,s], theta[s])*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s]) \
# - (C1_CAPEX + C2_CAPEX*theta[s]) - (C1_FOX + C2_FOX*theta[s]) - (C1_ROX + C2_ROX*m.min3(demand[t,s], theta[s])) - (C1_UOX + C2_UOX*m.min3(demand[t,s], theta[s])))
# for s in range(N)) for t in range(T))/N
print(theta)
I solved this issue by increasing the big M in the constraint for an indicator variable I, 1000000 to 10000000.
for i in range(N):
m.Equation(theta[i]<=10000000*I[i])
m.Equation(-theta[i]<10000000*(1-I[i]))
I didn't understand why this worked, but the result gave me the solution of 200*1 array with all zero.
I'm trying to solve the optimization problem as above.
And my code is as belows.
It worked, but I got the negative degrees of freedom problem.
And the objective value was also negative, which I did not expect to be. I expected the positive one.
I can't understand why this happened and don't know how this problem can be solved.
Can somebody give me a suggestion?
Code
# Import package
from gekko import GEKKO
import numpy as np
# Define parameters
P_CO = 600 # $/tonCO
beta_CO2 = 1 # no unit
P_CO2 = 60 # $/tonCO2eq
E_ref = 3.1022616 # tonCO2eq/tonCO
E_dir = -1.600570692 # tonCO2eq/tonCO
E_indir_others = 0.3339226804 # tonCO2eq/tonCO
E_indir_elec_cons = 18.46607256 # GJ/tonCO
C1_CAPEX = 285695 # no unit
C2_CAPEX = 188.42 # no unit
C1_FOX = 82282 # no unit
C2_FOX = 24.094 # no unit
C1_ROX = 4471.5 # no unit
C2_ROX = 96.034 # no unit
C1_UOX = 1983.7 # no unit
C2_UOX = 249.79 # no unit
r = 0.08 # discount rate
N = 10 # number of scenarios
T = 30 # total time period
GWP_init = 0.338723235 # 2020 Electricity GWP in EU 27 countries
theta_max = 1600000 # Max capacity
# Function to make GWP_EU matrix (TxN matrix)
def Electricity_GWP(GWP_init, n_years, num_episodes):
GWP_mean = 0.36258224*np.exp(-0.16395611*np.arange(1, n_years+2)) + 0.03091272
GWP_mean = GWP_mean.reshape(-1,1)
GWP_Yearly = np.tile(GWP_mean, num_episodes)
noise = np.zeros((n_years+1, num_episodes))
stdev2050 = GWP_mean[-1] * 0.25
stdev = np.arange(0, stdev2050 * (1 + 1/n_years), stdev2050/n_years)
for i in range(n_years+1):
noise[i,:] = np.random.normal(0, stdev[i], num_episodes)
GWP_forecast = GWP_Yearly + noise
return GWP_forecast
GWP_EU = Electricity_GWP(GWP_init, T, N) # (T+1)*N matrix
GWP_EU = GWP_EU[1:,:] # T*N matrix
print(np.shape(GWP_EU))
# Build Gekko model
m = GEKKO(remote=False)
theta = m.Array(m.Var, N, lb=0, ub=theta_max)
demand = np.ones((T,1))
demand[0] = 8031887.589
for k in range(1,11):
demand[k] = demand[k-1] * 1.026
for k in range(11,21):
demand[k] = demand[k-1] * 1.016
for k in range(21,T):
demand[k] = demand[k-1] * 1.011
demand = 0.12 * demand
demand = np.tile(demand, N) # T*N matrix
print(np.shape(demand))
obj = m.sum([m.sum([((1/(1+r))**(t+1))*((P_CO*m.min3(demand[t,s], theta[s])) \
+ (beta_CO2*P_CO2*m.min3(demand[t,s], theta[s])*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s])) \
- (C1_CAPEX+C2_CAPEX*theta[s]+C1_FOX+C2_FOX*theta[s])-(C1_ROX+C2_ROX*m.min3(demand[t,s], theta[s])+C1_UOX+C2_UOX*m.min3(demand[t,s], theta[s]))) for t in range(T)]) for s in range(N)])
m.Maximize(obj/N)
m.solve()
Output message
(30, 10)
(30, 10)
----------------------------------------------------------------
APMonitor, Version 1.0.0
APMonitor Optimization Suite
----------------------------------------------------------------
--------- APM Model Size ------------
Each time step contains
Objects : 11
Constants : 0
Variables : 5121
Intermediates: 0
Connections : 321
Equations : 3901
Residuals : 3901
Number of state variables: 5121
Number of total equations: - 3911
Number of slack variables: - 2400
---------------------------------------
Degrees of freedom : -1190
* Warning: DOF <= 0
----------------------------------------------
Steady State Optimization with APOPT Solver
----------------------------------------------
Iter: 1 I: 0 Tm: 18.61 NLPi: 5 Dpth: 0 Lvs: 0 Obj: -1.87E+09 Gap: 0.00E+00
Successful solution
---------------------------------------------------
Solver : APOPT (v1.0)
Solution time : 18.619200000000003 sec
Objective : -1.8677021320161405E+9
Successful solution
---------------------------------------------------
The negative DOF warning is because of the slack variables that are created when using the min3() function. It is only a warning that if all of the inequalities are active then this could lead to an over-specified system of equations (more equations than variables). If there is a successful solution then this warning can be ignored.
The negative objective is because most solvers require a minimization of the objective. Gekko automatically converts m.Maximize(obj) to m.Minimize(-obj). This is an equivalent objective. If you'd like to report the maximization and the positive objective, use the following at the end:
print('Objective: ',-m.options.OBJFCNVAL)
I made a small block of code that - given n objects of specified masses and vector coordinates over time - will calculate the center of mass. I think the code looks clunky (it uses 3 for-loops), and was wondering if there were numpy methods to vectorize (or at least speed up) this method. As a note, the use of the class Body could probably be averted for this task, but is used in other relevant code not shown here.
import numpy as np
class Body():
def __init__(self, mass, position):
self.mass = mass
self.position = position
def __str__(self):
return '\n .. mass:\n{}\n\n .. position:\n{}\n'.format(self.mass, self.position)
Three objects are initialized.
mass = 100 # same for all 3 objects
ndim = 3 # 3 dimensional space
nmoments = 10 # 10 moments in time
## initialize bodies
nelems = ndim * nmoments
x = np.arange(nelems).astype(int).reshape((nmoments, ndim))
A = Body(mass, position=x)
B = Body(mass, position=x / 2.)
C = Body(mass, position=x * 2.)
bodies = [A, B, C]
total_mass = sum([body.mass for body in bodies])
# print("\n ** A **\n{}\n".format(A))
# print("\n ** B **\n{}\n".format(B))
# print("\n ** C **\n{}\n".format(C))
## get center of mass
center_of_mass = []
for dim in range(ndim):
coms = []
for moment in range(nmoments):
numerator = 0
for body in bodies:
numerator += body.mass * body.position[moment, dim]
com = numerator / total_mass
coms.append(com)
center_of_mass.append(coms)
center_of_mass = np.array(center_of_mass).T
# print("\n .. center of mass:\n{}\n".format(center_of_mass))
As verification that the code works, the print statements in the code above output the following:
** A **
.. mass:
100
.. position:
[[ 0 1 2]
[ 3 4 5]
[ 6 7 8]
[ 9 10 11]
[12 13 14]
[15 16 17]
[18 19 20]
[21 22 23]
[24 25 26]
[27 28 29]]
** B **
.. mass:
100
.. position:
[[ 0. 0.5 1. ]
[ 1.5 2. 2.5]
[ 3. 3.5 4. ]
[ 4.5 5. 5.5]
[ 6. 6.5 7. ]
[ 7.5 8. 8.5]
[ 9. 9.5 10. ]
[10.5 11. 11.5]
[12. 12.5 13. ]
[13.5 14. 14.5]]
** C **
.. mass:
100
.. position:
[[ 0. 2. 4.]
[ 6. 8. 10.]
[12. 14. 16.]
[18. 20. 22.]
[24. 26. 28.]
[30. 32. 34.]
[36. 38. 40.]
[42. 44. 46.]
[48. 50. 52.]
[54. 56. 58.]]
.. center of mass:
[[ 0. 1.16666667 2.33333333]
[ 3.5 4.66666667 5.83333333]
[ 7. 8.16666667 9.33333333]
[10.5 11.66666667 12.83333333]
[14. 15.16666667 16.33333333]
[17.5 18.66666667 19.83333333]
[21. 22.16666667 23.33333333]
[24.5 25.66666667 26.83333333]
[28. 29.16666667 30.33333333]
[31.5 32.66666667 33.83333333]]
Using numpy will speed things up and make the code cleaner. I'm not an expert in n-body problems so I've hopefully followed the algorithm OK,the results look to be the same. All the loops become implicit in numpy.
# ***** From the question *****
import numpy as np
class Body():
def __init__(self, mass, position):
self.mass = mass
self.position = position
def __str__(self):
return '\n .. mass:\n{}\n\n .. position:\n{}\n'.format(self.mass, self.position)
mass = 100 # same for all 3 objects
ndim = 3 # 3 dimensional space
nmoments = 10 # 10 moments in time
## initialize bodies
nelems = ndim * nmoments
x = np.arange(nelems).astype(int).reshape((nmoments, ndim))
A = Body(mass, position=x)
B = Body(mass, position=x / 2.)
C = Body(mass, position=x * 2.)
bodies = [A, B, C]
# **** End of code from the question ****
# Fill the numpy arrays
np_mass = np.array( [ body.mass for body in bodies ])[ :,None, None ]
# the [:, None, None] turns np_mass into a 3D array for correct broadcasting
np_pos = np.array( [ body.position for body in bodies ]) # 3D
np_mass.shape
# (3, 1, 1) # (n_bodies, 1, 1 ) - The two 'spare' dimensions force the broadcasting to be along the correct axes
np_pos.shape
# (3, 10, 3) # ( n_bodies, nmoments, ndims )
total_mass = np_mass.sum() # Sum the three masses
numerator = (np_mass * np_pos).sum(axis=0) # sum np_mass * np_pos along the body (0) axis.
com = numerator / total_mass # divide by total_mass
# Could be a oneliner
# com = (np_mass * np_pos).sum(axis=0) / np.mass.sum()
print(com)
# array([[ 0. , 1.16666667, 2.33333333],
# [ 3.5 , 4.66666667, 5.83333333],
# [ 7. , 8.16666667, 9.33333333],
# [10.5 , 11.66666667, 12.83333333],
# [14. , 15.16666667, 16.33333333],
# [17.5 , 18.66666667, 19.83333333],
# [21. , 22.16666667, 23.33333333],
# [24.5 , 25.66666667, 26.83333333],
# [28. , 29.16666667, 30.33333333],
# [31.5 , 32.66666667, 33.83333333]])
center_of_mass = (A.mass * A.position + B.mass * B.position + C.mass * C.position) / total_mass
I need to use Backpropagation Neural Netwrok for multiclass classification purposes in my application. I have found this code and try to adapt it to my needs. It is based on the lections of Machine Learning in Coursera from Andrew Ng.
I have tested it in IRIS dataset and achieved good results (accuracy of classification around 0.96), whereas on my real data I get terrible results. I assume there is some implementation error, because the data is very simple. But I cannot figure out what exactly is the problem.
What are the parameters that it make sense to adjust?
I tried with:
number of units in hidden layer
generalization parameter (lambda)
number of iterations for minimization function
Built-in minimization function used in this code is pretty much confusing me. It is used just once, as #goncalopp has mentioned in comment. Shouldn't it iteratively update the weights? How it can be implemented?
Here is my training data (target class is in the last column):
65535, 3670, 65535, 3885, -0.73, 1
65535, 3962, 65535, 3556, -0.72, 1
65535, 3573, 65535, 3529, -0.61, 1
3758, 3123, 4117, 3173, -0.21, 0
3906, 3119, 4288, 3135, -0.28, 0
3750, 3073, 4080, 3212, -0.26, 0
65535, 3458, 65535, 3330, -0.85, 2
65535, 3315, 65535, 3306, -0.87, 2
65535, 3950, 65535, 3613, -0.84, 2
65535, 32576, 65535, 19613, -0.35, 3
65535, 16657, 65535, 16618, -0.37, 3
65535, 16657, 65535, 16618, -0.32, 3
The dependencies are so obvious, I think it should be so easy to classify it...
But results are terrible. I get accuracy of 0.6 to 0.8. This is absolutely inappropriate for my application. Can someone please point out possible improvements I could make in order to achieve better results.
Here is the code:
import numpy as np
from scipy import optimize
from sklearn import cross_validation
from sklearn.metrics import accuracy_score
import math
class NN_1HL(object):
def __init__(self, reg_lambda=0, epsilon_init=0.12, hidden_layer_size=25, opti_method='TNC', maxiter=500):
self.reg_lambda = reg_lambda
self.epsilon_init = epsilon_init
self.hidden_layer_size = hidden_layer_size
self.activation_func = self.sigmoid
self.activation_func_prime = self.sigmoid_prime
self.method = opti_method
self.maxiter = maxiter
def sigmoid(self, z):
return 1 / (1 + np.exp(-z))
def sigmoid_prime(self, z):
sig = self.sigmoid(z)
return sig * (1 - sig)
def sumsqr(self, a):
return np.sum(a ** 2)
def rand_init(self, l_in, l_out):
self.epsilon_init = (math.sqrt(6))/(math.sqrt(l_in + l_out))
return np.random.rand(l_out, l_in + 1) * 2 * self.epsilon_init - self.epsilon_init
def pack_thetas(self, t1, t2):
return np.concatenate((t1.reshape(-1), t2.reshape(-1)))
def unpack_thetas(self, thetas, input_layer_size, hidden_layer_size, num_labels):
t1_start = 0
t1_end = hidden_layer_size * (input_layer_size + 1)
t1 = thetas[t1_start:t1_end].reshape((hidden_layer_size, input_layer_size + 1))
t2 = thetas[t1_end:].reshape((num_labels, hidden_layer_size + 1))
return t1, t2
def _forward(self, X, t1, t2):
m = X.shape[0]
ones = None
if len(X.shape) == 1:
ones = np.array(1).reshape(1,)
else:
ones = np.ones(m).reshape(m,1)
# Input layer
a1 = np.hstack((ones, X))
# Hidden Layer
z2 = np.dot(t1, a1.T)
a2 = self.activation_func(z2)
a2 = np.hstack((ones, a2.T))
# Output layer
z3 = np.dot(t2, a2.T)
a3 = self.activation_func(z3)
return a1, z2, a2, z3, a3
def function(self, thetas, input_layer_size, hidden_layer_size, num_labels, X, y, reg_lambda):
t1, t2 = self.unpack_thetas(thetas, input_layer_size, hidden_layer_size, num_labels)
m = X.shape[0]
Y = np.eye(num_labels)[y]
_, _, _, _, h = self._forward(X, t1, t2)
costPositive = -Y * np.log(h).T
costNegative = (1 - Y) * np.log(1 - h).T
cost = costPositive - costNegative
J = np.sum(cost) / m
if reg_lambda != 0:
t1f = t1[:, 1:]
t2f = t2[:, 1:]
reg = (self.reg_lambda / (2 * m)) * (self.sumsqr(t1f) + self.sumsqr(t2f))
J = J + reg
return J
def function_prime(self, thetas, input_layer_size, hidden_layer_size, num_labels, X, y, reg_lambda):
t1, t2 = self.unpack_thetas(thetas, input_layer_size, hidden_layer_size, num_labels)
m = X.shape[0]
t1f = t1[:, 1:]
t2f = t2[:, 1:]
Y = np.eye(num_labels)[y]
Delta1, Delta2 = 0, 0
for i, row in enumerate(X):
a1, z2, a2, z3, a3 = self._forward(row, t1, t2)
# Backprop
d3 = a3 - Y[i, :].T
d2 = np.dot(t2f.T, d3) * self.activation_func_prime(z2)
Delta2 += np.dot(d3[np.newaxis].T, a2[np.newaxis])
Delta1 += np.dot(d2[np.newaxis].T, a1[np.newaxis])
Theta1_grad = (1 / m) * Delta1
Theta2_grad = (1 / m) * Delta2
if reg_lambda != 0:
Theta1_grad[:, 1:] = Theta1_grad[:, 1:] + (reg_lambda / m) * t1f
Theta2_grad[:, 1:] = Theta2_grad[:, 1:] + (reg_lambda / m) * t2f
return self.pack_thetas(Theta1_grad, Theta2_grad)
def fit(self, X, y):
num_features = X.shape[0]
input_layer_size = X.shape[1]
num_labels = len(set(y))
theta1_0 = self.rand_init(input_layer_size, self.hidden_layer_size)
theta2_0 = self.rand_init(self.hidden_layer_size, num_labels)
thetas0 = self.pack_thetas(theta1_0, theta2_0)
options = {'maxiter': self.maxiter}
_res = optimize.minimize(self.function, thetas0, jac=self.function_prime, method=self.method,
args=(input_layer_size, self.hidden_layer_size, num_labels, X, y, 0), options=options)
self.t1, self.t2 = self.unpack_thetas(_res.x, input_layer_size, self.hidden_layer_size, num_labels)
np.savetxt("weights_t1.txt", self.t1, newline="\n")
np.savetxt("weights_t2.txt", self.t2, newline="\n")
def predict(self, X):
return self.predict_proba(X).argmax(0)
def predict_proba(self, X):
_, _, _, _, h = self._forward(X, self.t1, self.t2)
return h
##################
# IR data #
##################
values = np.loadtxt('infrared_data.txt', delimiter=', ', usecols=[0,1,2,3,4])
targets = np.loadtxt('infrared_data.txt', delimiter=', ', dtype=(int), usecols=[5])
X_train, X_test, y_train, y_test = cross_validation.train_test_split(values, targets, test_size=0.4)
nn = NN_1HL()
nn.fit(values, targets)
print("Accuracy of classification: "+str(accuracy_score(y_test, nn.predict(X_test))))
The most obvious problem is that your training dataset is very small.
Since you're using scipy.optimize.minimize instead of the usual iterative gradient descent, I think it's also likely you're overfitting your model to your training data. Possibly a iterative algorithm works better, here. Don't forget to carefully monitor the validation error.
If you try backpropagation with gradient descent, notice that, depending on the parameters used on backpropagation, neural networks take a while to converge
You can try to feed the network the same training data multiple times or tweak the learning rate but ideally you should use more diverse data.
Correctly normalizing the data solved the problem. I used preprocessing module from sklearn. Here is example:
from sklearn import preprocessing
import numpy as np
X_train = np.array([[ 1., -1., 2.],
[ 2., 0., 0.],
[ 0., 1., -1.]])
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X_train)
print(X_train_minmax)
X_test = np.array([[ -3., -1., 4.]])
X_test_minmax = min_max_scaler.transform(X_test)
print(X_test_minmax)
And the output is:
[[ 0.5 0. 1. ]
[ 1. 0.5 0.3333]
[ 0. 1. 0. ]]
[[-1.5 0. 1.6667]]