'length not know' in theano.function - theano

I want to make some changes of the logistic_sgd.py in DeepLearning Tutorial. Details are below.
The original codes:
index = T.lscalar()
x = T.matrix('x')
y = T.ivector('y')
train_model = theano.function(
inputs=[index],
outputs=classifier.errors(y),
givens={
x: test_set_x[index * train_batch_size: (index + 1) * train_batch_size],
y: test_set_y[index * train_batch_size: (index + 1) * train_batch_size]
}
)
My codes:
index = T.lscalar()
idx_list = T.lvector()
x = T.matrix('x')
y = T.ivector('y')
train_model = theano.function(
inputs=[idx_list],
outputs=cost,
updates=updates,
givens={
x: train_set_x[[i for i in idx_list]],
y: train_set_y[[i for i in idx_list]]
}
)
I want to use the index of train_set_x and train_set_y from a vector idx_list, not the one in original codes index,but I got the following errors:
Traceback (most recent call last):
File "Y:/ARBM/code/logistic_sgd_rand.py", line 169, in <module>
train_batch_size=5, select_batch_size=10)
File "Y:/ARBM/code/logistic_sgd_rand.py", line 92, in sgd_optimization_mnist
x: train_set_x[[i for i in idx_list]],
File "C:\Anaconda\lib\site-packages\theano\tensor\var.py", line 433, in __iter__
for i in xrange(theano.tensor.basic.get_vector_length(self)):
File "C:\Anaconda\lib\site-packages\theano\tensor\basic.py", line 3773, in get_vector_length
raise ValueError("length not known")
ValueError: length not known

The problem is that you're mixing Python with symbolic Theano code in an unsupported way.
Instead of
x: train_set_x[[i for i in idx_list]],
y: train_set_y[[i for i in idx_list]]
you need
x: train_set_x[idx_list],
y: train_set_y[idx_list]
Here's a full example that demonstrates the change in a bit more detail:
import numpy
import theano
import theano.tensor as T
def v1(all_x):
batch_size = 3
index = T.lscalar()
x_part = T.vector()
f = theano.function(
inputs=[index],
outputs=x_part,
givens={
x_part: all_x[index * batch_size: (index + 1) * batch_size]
}
)
print f(1)
def v2_broken(all_x):
idx_list = T.lvector()
x_part = T.vector()
f = theano.function(
inputs=[idx_list],
outputs=x_part,
givens={
x_part: all_x[[i for i in idx_list]]
}
)
print f([2, 4, 6, 8])
def v2_fixed(all_x):
idx_list = T.lvector()
x_part = T.vector()
f = theano.function(
inputs=[idx_list],
outputs=x_part,
givens={
x_part: all_x[idx_list]
}
)
print f([2, 4, 6, 8])
def main():
all_x = theano.shared(-numpy.arange(10, dtype=theano.config.floatX))
v1(all_x)
# v2_broken(all_x) # raises ValueError: length not known
v2_fixed(all_x)
main()

Related

How to handle JAX reshape with JIT

I am trying to implement entmax-alpha as is described in here.
Here is the code.
import jax
import jax.numpy as jnp
from jax import custom_jvp
from jax import jit
from jax import lax
from jax import vmap
#jax.partial(jit, static_argnums=(2,))
def p_tau(z, tau, alpha=1.5):
return jnp.clip((alpha - 1) * z - tau, a_min=0) ** (1 / (alpha - 1))
#jit
def get_tau(tau, tau_max, tau_min, z_value):
return lax.cond(z_value < 1,
lambda _: (tau, tau_min),
lambda _: (tau_max, tau),
operand=None
)
#jit
def body(kwargs, x):
tau_min = kwargs['tau_min']
tau_max = kwargs['tau_max']
z = kwargs['z']
alpha = kwargs['alpha']
tau = (tau_min + tau_max) / 2
z_value = p_tau(z, tau, alpha).sum()
taus = get_tau(tau, tau_max, tau_min, z_value)
tau_max, tau_min = taus[0], taus[1]
return {'tau_min': tau_min, 'tau_max': tau_max, 'z': z, 'alpha': alpha}, None
#jax.partial(jit, static_argnums=(1, 2,))
def map_row(z_input, alpha, T):
z = (alpha - 1) * z_input
tau_min, tau_max = jnp.min(z) - 1, jnp.max(z) - z.shape[0] ** (1 - alpha)
result, _ = lax.scan(body, {'tau_min': tau_min, 'tau_max': tau_max, 'z': z, 'alpha': alpha}, xs=None,
length=T)
tau = (result['tau_max'] + result['tau_min']) / 2
result = p_tau(z, tau, alpha)
return result / result.sum()
#jax.partial(custom_jvp, nondiff_argnums=(1, 2, 3,))
def entmax(input, axis=-1, alpha=1.5, T=10):
reduce_length = input.shape[axis]
input = jnp.swapaxes(input, -1, axis)
input = input.reshape(input.size / reduce_length, reduce_length)
result = vmap(jax.partial(map_row, alpha=alpha, T=T), 0)(input)
return jnp.swapaxes(result, -1, axis)
#jax.partial(jit, static_argnums=(1, 2,))
def _entmax_jvp_impl(axis, alpha, T, primals, tangents):
input = primals[0]
Y = entmax(input, axis, alpha, T)
gppr = Y ** (2 - alpha)
grad_output = tangents[0]
dX = grad_output * gppr
q = dX.sum(axis=axis) / gppr.sum(axis=axis)
q = jnp.expand_dims(q, axis=axis)
dX -= q * gppr
return Y, dX
#entmax.defjvp
def entmax_jvp(axis, alpha, T, primals, tangents):
return _entmax_jvp_impl(axis, alpha, T, primals, tangents)
When I call it with the following code:
import numpy as np
from jax import value_and_grad
input = jnp.array(np.random.randn(64, 10))
weight = jnp.array(np.random.randn(64, 10))
def toy(input, weight):
return (weight*entmax(input, axis=-1, alpha=1.5, T=20)).sum()
value_and_grad(toy)(input, weight)
I got the following error.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-3-3a62e54c67d2> in <module>()
7 return (weight*entmax(input, axis=-1, alpha=1.5, T=20)).sum()
8
----> 9 value_and_grad(toy)(input, weight)
35 frames
<ipython-input-1-d85b1daec668> in entmax(input, axis, alpha, T)
49 #jax.partial(custom_jvp, nondiff_argnums=(1, 2, 3,))
50 def entmax(input, axis=-1, alpha=1.5, T=10):
---> 51 reduce_length = input.shape[axis]
52 input = jnp.swapaxes(input, -1, axis)
53 input = input.reshape(input.size / reduce_length, reduce_length)
TypeError: tuple indices must be integers or slices, not DynamicJaxprTracer
It seems to be always connected to the reshape operations. I am not sure why this happens, and any help will be really appreciated.
To recreate the problem, here is the colab notebook
Thanks a lot.
The error comes from the fact that you are attempting to index a Python tuple with a traced quantity, axis. You can fix this error by making axis a static argument:
#jax.partial(jit, static_argnums=(0, 1, 2,))
def _entmax_jvp_impl(axis, alpha, T, primals, tangents):
...
Unfortunately, this uncovers another problem: p_tau declares that the alpha parameter is static, but body() calls this with a traced quantity. This quantity cannot be easily marked static in body because it is passed within a dictionary of parameters that contains the input that is being traced.
To fix this, you'll have to rewrite your function signatures, carefully marking in each one which inputs are static and which are not, and making sure the two do not mix across the layers of function calls.

OverflowError: (34, 'Result too large') in round function

I need to use the round function in my code because at a point the floats became too big and python can't handle them, so i simply implemented it like this:
def h(x, theta):
return theta[0] + theta[1] * x
def err(theta, x, y):
error = []
i = 0
for e in x:
prevision = h(x[i], theta) - y[i] #part where i putted round function
prevision = round(prevision, 10)
print(prevision)
error.append(prevision)
i += 1
return error
def sqrErr(error):
sqrError = []
for e in error:
sqrError.append(e ** 2)
return sqrError
def errForX(error, x):
errorForX = []
i = 0
for e in error:
errorForX.append(error[i] * x[i])
i += 1
return errorForX
def Theta(theta, error, sqrError, errorForX, lr):
newThetaList = []
i = 0
for e in theta:
newTheta = 0
if i == 0: #theta_0
newTheta = e - lr * (1/2) * sum(error) * ((1/4) * sum(sqrError))
elif i == 1: #theta:1
newTheta = e - lr *(1/2) * sum(errorForX) * ((1/4) * sum(sqrError))
newThetaList.append(newTheta)
i += 1
return newThetaList
def Train():
nLoops = 1000000
y =[5, 11, 21]
x = [2, 5, 10]
theta = [0, 0]
lr = 0.00021
prediction = []
for loop in range(nLoops):
error = err(theta, x, y)
sqrError = sqrErr(error)
errorForX = errForX(error, x)
theta = Theta(theta, error, sqrError, errorForX, lr)
predictions = []
for e in x:
predictions.append(h(e, theta))
print("Theta: ")
print(theta)
print("Targets: ")
print(y)
print("Predictions: ")
print(predictions)
Train()
The numbers became too big and it throws an error.
This is my first ever script of a machine learning algorithm, the squared error became a really long number and i don't how to prevent that, tried to limit to 10 the number of digits of the number that is going to be raised to the second but it didn't work
This is the error:
Traceback (most recent call last):
File "C:/Users/flama/OneDrive/Desktop/rete neurale v3.py", line 74, in <module>
Train()
File "C:/Users/flama/OneDrive/Desktop/rete neurale v3.py", line 59, in Train
sqrError = sqrErr(error)
File "C:/Users/flama/OneDrive/Desktop/rete neurale v3.py", line 21, in sqrErr
sqrError.append(e ** 2)
OverflowError: (34, 'Result too large')

TypeError: 'numpy.float64' object is not callable in scipy.optimize.minimize Library

Hi I am trying to implement an Optimization Paper "Optimal Kernel Selection in
Kernel Fisher Discriminant Analysis" and I have implemented the code for it. However, I get this error after trying different methods. I use scipy.optimize.minimize function from the scipy library https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html#rdd2e1855725e-5
my code is as below:
def c_func(theta):
data_pima = pd.read_csv('~/Documents/Uwaterloo_Study_Docs/ECE_602/Project_final/Dataset/PIMA/pima-indians-diabetes.csv')
data_pima.rename(columns={'1':'Target', '6':'Pregnancies', '148':'Glucose', '72':'BloodPressure', '35':'SkinThickness', '0': 'Insulin', '33.6': 'BMI', '0.627':'DiabeticPedigreeFunction','50':'Age'},inplace=True)
X = data_pima.loc[:,:'Age'].values
y = data_pima['Target'].values
data_pima_positive = data_pima.loc[(data_pima['Target'] > 0)]
data_pima_negative = data_pima.loc[(data_pima['Target'] < 1)]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=42)
sq_dist = pdist(X, 'sqeuclidean')
sigma = [10**(0.1), 10**(-0.7), 10**(-0.4), 10**(-0.1), 10**(0.2), 10**(0.5), 10**(0.8), 10**(1.1), 10**(1.4), 10**(1.7)]
G = 0
for value in range(10):
gamma = 1/(sigma[value]**2)
gamma = -gamma * theta[value]
mat_sqr_dist = squareform(sq_dist)
g = np.exp(gamma * mat_sqr_dist)
G = np.add(G, g)
# number of positive sample from the dataset
m_plus = len(data_pima_positive.index)
data_pima_positive = data_pima_positive.values
m_minus = len(data_pima_negative.index)
one_plus = np.ones(m_plus)
one_minus = np.ones(m_minus)
I_plus = np.identity(m_plus)
J_plus_1value = np.dot(one_plus, one_plus.T)
J_plus = (1/np.sqrt(m_plus)) * (I_plus - (1/m_plus) * J_plus_1value)
I_minus = np.identity(m_minus)
J_minus_1value = np.dot(one_minus, one_minus.T)
J_minus = (1/np.sqrt(m_minus)) * (I_minus - (1/m_minus) * J_minus_1value)
J = linalg.block_diag(J_plus, J_minus)
a_plus_1 = (1/m_plus)* one_plus
a_minus_1 = (1/m_minus)* one_minus
zeros_a_plus = np.zeros(len(a_minus_1))
a_plus = np.block([a_plus_1, zeros_a_plus])
zeros_a_minus = np.zeros(len(a_plus_1))
a_minus = np.block([zeros_a_minus, a_minus_1])
a = a_plus - a_minus
lambda_val = 10**(-8)
I = np.identity(len(J))
J_G = np.matmul(J,G)
lambda_I = lambda_val*I
J_G_J = np.matmul(J_G, J)
value_1 = (lambda_I + J_G_J)
J_G_a = np.matmul(J_G,a)
G_J = np.matmul(G,J)
aT_G_J = np.matmul(a.T,G_J)
G_a = np.matmul(G,a)
aT_G_a = np.matmul(a.T, G_a)
value_1Inv = linalg.inv(value_1)
aT_G_J_value1Inv = np.matmul(aT_G_J, value_1Inv)
aT_G_J_value1Inv_J_G_a = np.matmul(aT_G_J_value1Inv, J_G_a)
func_val = (1/lambda_val)*(aT_G_J_value1Inv_J_G_a - aT_G_a)
return func_val
if __name__ == "__main__":
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import pdist, squareform
from scipy import linalg
from scipy.optimize import linprog
from scipy import optimize as optimize
theta_val = np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
value = c_func(theta_val)
result = optimize.minimize(value, theta_val, method='Newton-CG', jac=True, options={'disp':True})
print(result)
Here is the detailed error that I get:
Traceback (most recent call last):
File "test_project.py", line 76, in <module>
result = optimize.minimize(value, theta_val, method='Newton-CG', jac=True, options={'disp':True})
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/_minimize.py", line 607, in minimize
**options)
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 1588, in _minimize_newtoncg
old_fval = f(x0)
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 327, in function_wrapper
return function(*(wrapper_args + args))
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 65, in __call__
fg = self.fun(x, *args)
TypeError: 'numpy.float64' object is not callable
Can anyone help me in resolving this error?
log IndexError: (for context see comments discussion beneath)
Traceback (most recent call last):
File "test_project.py", line 120, in <module>
result = optimize.minimize(c_func, theta, method='Newton-CG', jac =True, options={'disp':True})
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/_minimize.py", line 607, in minimize
**options)
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 1588, in _minimize_newtoncg
old_fval = f(x0)
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 327, in function_wrapper
return function(*(wrapper_args + args))
File "/home/somesh/anaconda3/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 66, in __call__
self.jac = fg[1]
IndexError: invalid index to scalar variable.
So your issue here is that you've already evaluated c_func.
The minimize routine expects a callable and c_func itself is callable but when you call c_func on theta_val you get a float (or perhaps an array of floats. This/these are the return values of your c_func.
If you want to find the minimal value from the grid you can just evaluate the function and find the smallest value. What minimize does for you is accepts c_func and searches the argument space to find an optimal theta_val.
The second argument to minimize should be a starting value of theta_val.
It may be helpful for you to read (if you haven't already) the tutorial for minimize:
https://docs.scipy.org/doc/scipy/reference/tutorial/optimize.html#nelder-mead-simplex-algorithm-method-nelder-mead
to get an idea of how it works.
The call you probably want is not:
result = optimize.minimize(value, theta_val, method='Newton-CG', jac=True, options={'disp':True})
rather:
result = optimize.minimize(c_func, theta_val, method='Newton-CG', jac=True, options={'disp':True})
Which should work and return an OptimizeResult object.
The problem was that in the minimize library optimizing with Conjugate gradient method requires the gradient function or an array that need to be passed to the minimize function. If we don't pass the gradient then it gives the error.
I passed the gradient and it worked.
The modified code is:
def c_func_opt(theta):
data_pima = pd.read_csv('~/Documents/Uwaterloo_Study_Docs/ECE_602 Project_final/Dataset/PIMA/pima-indians-diabetes.csv')
data_pima.rename(columns={'1':'Target', '6':'Pregnancies', '148':'Glucose', '72':'BloodPressure', '35':'SkinThickness', '0': 'Insulin','33.6': 'BMI', '0.627':'DiabeticPedigreeFunction','50':'Age'},inplace=True)
X = data_pima.loc[:,:'Age'].values
y = data_pima['Target'].values
data_pima_positive = data_pima.loc[(data_pima['Target'] > 0)]
data_pima_negative = data_pima.loc[(data_pima['Target'] < 1)]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=42)
sq_dist = pdist(X, 'sqeuclidean')
sigma = [10**(0.1), 10**(-0.7), 10**(-0.4), 10**(-0.1), 10**(0.2), 10**(0.5), 10**(0.8), 10**(1.1), 10**(1.4), 10**(1.7)]
mat_sqr_dist = squareform(sq_dist)
G = 0
G_list = []
eq9_value = 0
for value in range(10):
gamma = 1/(sigma[value]**2)
gamma = -gamma
g = np.exp(gamma * mat_sqr_dist)
G_g = theta[value] * g
G_list.append(G_g)
G = np.add(G, G_g)
m_plus = len(data_pima_positive.index)
data_pima_positive = data_pima_positive.values
m_minus = len(data_pima_negative.index)
one_plus = np.ones(m_plus)
one_minus = np.ones(m_minus)
I_plus = np.identity(m_plus)
J_plus_1value = np.dot(one_plus, one_plus.T)
J_plus = (1/np.sqrt(m_plus)) * (I_plus - (1/m_plus) * J_plus_1value)
I_minus = np.identity(m_minus)
J_minus_1value = np.dot(one_minus, one_minus.T)
J_minus = (1/np.sqrt(m_minus)) * (I_minus - (1/m_minus) * J_minus_1value)
J = linalg.block_diag(J_plus, J_minus)
a_plus_1 = (1/m_plus)* one_plus
a_minus_1 = (1/m_minus)* one_minus
zeros_a_plus = np.zeros(len(a_minus_1))
a_plus = np.block([a_plus_1, zeros_a_plus])
zeros_a_minus = np.zeros(len(a_plus_1))
a_minus = np.block([zeros_a_minus, a_minus_1])
a = a_plus - a_minus
lambda_val = 10**(-8)
I = np.identity(len(J))
J_G = np.matmul(J,G)
lambda_I = lambda_val*I
J_G_J = np.matmul(J_G, J)
value_1 = (lambda_I + J_G_J)
J_G_a = np.matmul(J_G,a)
G_J = np.matmul(G,J)
aT_G_J = np.matmul(a.T,G_J)
G_a = np.matmul(G,a)
aT_G_a = np.matmul(a.T, G_a)
value_1Inv = linalg.inv(value_1)
aT_G_J_value1Inv = np.matmul(aT_G_J, value_1Inv)
aT_G_J_value1Inv_J_G_a = np.matmul(aT_G_J_value1Inv, J_G_a)
func1_val = (1/lambda_val)*(aT_G_J_value1Inv_J_G_a - aT_G_a)
eq9_value = 0
for index_k in range(10):
gamma = 1/(sigma[value]**2)
gamma = -gamma
g = np.exp(gamma * mat_sqr_dist)
theta_aT = theta[index_k] * a.T
theta_aT_g = np.matmul(theta_aT, g)
theta_aT_g_a = np.matmul(theta_aT_g, a)
eq9_value += theta_aT_g_a
func2_val = (1/lambda_val) * (aT_G_a - eq9_value)
return func2_val
def c_func(theta):
data_pima = pd.read_csv('~/Documents/Uwaterloo_Study_Docs/ECE_602/Project_final/Dataset/PIMA/pima-indians-diabetes.csv')
data_pima.rename(columns={'1':'Target', '6':'Pregnancies', '148':'Glucose', '72':'BloodPressure', '35':'SkinThickness', '0': 'Insulin','33.6': 'BMI', '0.627':'DiabeticPedigreeFunction','50':'Age'},inplace=True)
X = data_pima.loc[:,:'Age'].values
y = data_pima['Target'].values
data_pima_positive = data_pima.loc[(data_pima['Target'] > 0)]
data_pima_negative = data_pima.loc[(data_pima['Target'] < 1)]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=42)
sq_dist = pdist(X, 'sqeuclidean')
sigma = [10**(0.1), 10**(-0.7), 10**(-0.4), 10**(-0.1), 10**(0.2), 10**(0.5), 10**(0.8), 10**(1.1), 10**(1.4), 10**(1.7)]
mat_sqr_dist = squareform(sq_dist)
G = 0
for value in range(10):
gamma = 1/(sigma[value]**2)
gamma = -gamma
g = np.exp(gamma * mat_sqr_dist)
G_g = theta[value] * g
G = np.add(G, G_g)
# number of positive sample from the dataset
m_plus = len(data_pima_positive.index)
data_pima_positive = data_pima_positive.values
m_minus = len(data_pima_negative.index)
one_plus = np.ones(m_plus)
one_minus = np.ones(m_minus)
I_plus = np.identity(m_plus)
J_plus_1value = np.dot(one_plus, one_plus.T)
J_plus = (1/np.sqrt(m_plus)) * (I_plus - (1/m_plus) * J_plus_1value)
I_minus = np.identity(m_minus)
J_minus_1value = np.dot(one_minus, one_minus.T)
J_minus = (1/np.sqrt(m_minus)) * (I_minus - (1/m_minus) * J_minus_1value)
J = linalg.block_diag(J_plus, J_minus)
a_plus_1 = (1/m_plus)* one_plus
a_minus_1 = (1/m_minus)* one_minus
zeros_a_plus = np.zeros(len(a_minus_1))
a_plus = np.block([a_plus_1, zeros_a_plus])
zeros_a_minus = np.zeros(len(a_plus_1))
a_minus = np.block([zeros_a_minus, a_minus_1])
a = a_plus - a_minus
lambda_val = 10**(-8)
I = np.identity(len(J))
J_G = np.matmul(J,G)
lambda_I = lambda_val*I
J_G_J = np.matmul(J_G, J)
value_1 = (lambda_I + J_G_J)
J_G_a = np.matmul(J_G,a)
G_J = np.matmul(G,J)
aT_G_J = np.matmul(a.T,G_J)
G_a = np.matmul(G,a)
aT_G_a = np.matmul(a.T, G_a)
value_1Inv = linalg.inv(value_1)
aT_G_J_value1Inv = np.matmul(aT_G_J, value_1Inv)
aT_G_J_value1Inv_J_G_a = np.matmul(aT_G_J_value1Inv, J_G_a)
func_val = (1/lambda_val)*(aT_G_J_value1Inv_J_G_a - aT_G_a)
grad = []
for value in range(10):
gamma = 1/(sigma[value]**2)
gamma = -gamma
g = np.exp(gamma * mat_sqr_dist)
aT_g = np.matmul(a.T, g)
aT_g_a = np.matmul(aT_g, a)
grad.append(aT_g_a)
return sq_dist, sigma, a, J, grad
def gradient_value(grad):
lambda_va = 10**(-8)
grad = (-1/lambda_va)*grad
return grad
if __name__ == "__main__":
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import pdist, squareform
from scipy import linalg
from scipy.optimize import LinearConstraint
from scipy import optimize as optimize
import cvxpy as cvx
theta = np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
sq_dist, sigma, a, J, grad = c_func(theta)
grad = np.array(grad)
grad_val = gradient_value(grad)
one_vec = np.ones(len(theta))
one_vec_t_theta = np.matmul(one_vec.T, theta)
result = optimize.minimize(c_func_opt, theta, method='Newton-CG', jac = gradient_value, options={'disp':True}) #constraints= cons,
print(result)
Output:
Warning: Desired error not necessarily achieved due to precision loss.
Current function value: -16171400.005492
Iterations: 1
Function evaluations: 33
Gradient evaluations: 25
Hessian evaluations: 0
fun: -16171400.005492399
jac: array([-1.025e+10, -1.025e+10, -1.025e+10, -1.025e+10, -1.025e+10,
-1.025e+10, -1.025e+10, -1.025e+10, -1.025e+10, -1.025e+10])
message: 'Warning: Desired error not necessarily achieved due to precision loss.'
nfev: 33
nhev: 0
nit: 1
njev: 25
status: 2
success: False
x: array([102.5, 102.5, 102.5, 102.5, 102.5, 102.5, 102.5, 102.5, 102.5,
102.5])

IndexError: too many indices for array in Scipy.Optimize

I'm trying to debbug some code with Scipy.Optimize.
The bug comes from the constante: the optimisation works fine without it. The constante itself seems to works fine outside scipy.optimize (the variable testconst is computed normally). The code is the following:
from scipy.optimize import minimize
import numpy as np
def totaldist(dy):
n = np.shape(dy)[0]
temp = 0
for i in range(n):
temp += dy[i] ** 2
return -0.5 * temp
def create_bond(dy_max):
n = np.shape(dy_max)[0]
bond = np.zeros((n, 2))
for i in range(n):
bond[i, :] = [0, dy_max[i]]
tot = tuple([tuple(row) for row in bond])
return tot
# def create_const(type_x, dx, gamma, P):
def create_const(dy, *args):
arg = np.asarray(args)
n = np.shape(dy)[0]
dx = np.zeros((n, 2))
bnd = np.zeros((n, 2))
# from args to numpy array
type_x = np.zeros(n)
dP = 0
delta1 = np.zeros(n)
delta2 = np.zeros(n)
gamma = np.zeros((n, n))
for i in range(n):
a, b = bndr(arg[0, i])
delta1[i] = arg[0, i + n + 1]
delta2[i] = arg[0, i + 2*n + 1]
dx[i, 0] = (b - a) * dy[i]
gamma = GammaApprox(delta1, delta2, dx[:, 1], dx[:, 0])
d = np.dot(delta2, dx[:, 0])
g = np.dot(dx[:, 0], gamma)
g = np.dot(g, dx[:, 0])
dP = float(arg[0, n])
return d + 0.5 * g - dP
def GammaApprox(delta1, delta2, x1, x2):
n = np.shape(delta1)[0]
gamma = np.zeros((n, n))
for i in range(n):
if x2[i] == x1[i]:
gamma[i, i] = 0
else:
gamma[i, i] = (delta2[i] - delta1[i]) / (x2[i] - x1[i])
return gamma
def GetNewPoint(x1, x2, delta1, delta2, type_x, P):
n = np.shape(delta1)[0]
dmax = np.zeros(n)
dy0 = np.zeros(n)
# create the inequality data and initial points
for i in range(n):
a, b = bndr(type_x[i])
if x2[i] > x1[i]:
dmax[i] = (x2[i] - x1[i])/(b - a)
dy0[i] = 1 / (b - a) * (x2[i] - x1[i]) / 2
else:
dmax[i] = (x1[i] - x2[i])/(b - a)
dy0[i] = 1 / (b - a) * (x1[i] - x2[i]) / 2
bond = create_bond(dmax)
# create the args tuple
arg = ()
# type x
for i in range(n):
arg = arg + (type_x[i],)
# dP
arg = arg + (abs(P[0] - P[1]), )
# delta1
for i in range(n):
arg = arg + (delta1[i], )
# delta1
for i in range(n):
arg = arg + (delta2[i], )
testconst = create_const(dy0, arg)
# create the equality constraint
con1 = {'type': 'eq', 'fun': create_const}
cons = ([con1, ])
solution = minimize(totaldist, dy0, args=arg, method='SLSQP', bounds=bond, constraints=cons, options={'disp': True})
x = solution.x
print(x)
return x
def bndr(type_x):
if type_x == 'normal':
x_0 = -5
x_f = 1.5
if type_x == 'lognorm':
x_0 = 0.0001
x_f = 5
if type_x == 'chisquare':
x_0 = 0.0001
x_f = (0.8 * (10 ** .5))
return x_0, x_f
def test():
x1 = np.array([0.0001, 0.0001, -5])
x2 = np.array([1.6673, 0.84334, -5])
delta1 = np.array([0, 0, 0])
delta2 = np.array([2.44E-7, 2.41E-6, 4.07E-7])
type_x = np.array(['lognorm', 'chisquare', 'normal'])
P = (0, 6.54E-8)
f = GetNewPoint(x1, x2, delta1, delta2, type_x, P)
return f
test()
the error message is the following:
Traceback (most recent call last):
File "D:/Anaconda Project/TestQP - Simplified/QP.py", line 134, in <module>
test()
File "D:/Anaconda Project/TestQP - Simplified/QP.py", line 130, in test
f = GetNewPoint(x1, x2, delta1, delta2, type_x, P)
File "D:/Anaconda Project/TestQP - Simplified/QP.py", line 103, in GetNewPoint
solution = minimize(totaldist, dy0, args=arg, method='SLSQP', bounds=bond, constraints=cons, options={'disp': True})
File "C:\Program Files\Anaconda\lib\site-packages\scipy\optimize\_minimize.py", line 458, in minimize
constraints, callback=callback, **options)
File "C:\Program Files\Anaconda\lib\site-packages\scipy\optimize\slsqp.py", line 311, in _minimize_slsqp
meq = sum(map(len, [atleast_1d(c['fun'](x, *c['args'])) for c in cons['eq']]))
File "C:\Program Files\Anaconda\lib\site-packages\scipy\optimize\slsqp.py", line 311, in <listcomp>
meq = sum(map(len, [atleast_1d(c['fun'](x, *c['args'])) for c in cons['eq']]))
File "D:/Anaconda Project/TestQP - Simplified/QP.py", line 40, in create_const
a, b = bndr(arg[0, i])
IndexError: too many indices for array
I find roughly similar error in the website like: IndexError: index 1 is out of bounds for axis 0 with size 1/ForwardEuler
...but I failed to see it's really the same problem.
args is not passed to constraint-functions (automatically)!
This is indicated in the docs:
args : tuple, optional
Extra arguments passed to the objective function and its derivatives (Jacobian, Hessian).
You can see the problem easily by adding a print:
def create_const(dy, *args):
print('args:')
print(args)
arg = np.asarray(args)
...
which will output something like:
args:
(('lognorm', 'chisquare', 'normal', 6.54e-08, 0, 0, 0, 2.4400000000000001e-07, 2.4099999999999998e-06, 4.0699999999999998e-07),)
args:
()
ERROR...
If you remove your test (which is manually passing args; which works) testconst = create_const(dy0, arg), you will see only the non-working output:
args:
()
ERROR...
Constraints have their own mechanism of passing args as described in the docs:
constraints : dict or sequence of dict, optional
Constraints definition (only for COBYLA and SLSQP). Each constraint is defined in a dictionary with fields:
type : str
Constraint type: ‘eq’ for equality, ‘ineq’ for inequality.
fun : callable
The function defining the constraint.
jac : callable, optional
The Jacobian of fun (only for SLSQP).
args : sequence, optional
Extra arguments to be passed to the function and Jacobian.
Equality constraint means that the constraint function result is to be zero whereas inequality means that it is to be non-negative. Note that COBYLA only supports inequality constraints.
In your case:
con1 = {'type': 'eq', 'fun': create_const} # incomplete!
con1 = {'type': 'eq', 'fun': create_const, 'args': (arg,)} # (,)
# to make it behave as needed
# for your code!
This will make it run until some other problem occurs!

A weird error with updates in theano

I designed a variable net, but it occurred some problems with theano. The general idea is that different input will get different net with same parameters, something like a recursive neural network with auto-encoder.
There are two cases in my code, one case is run combine_feat_gt1_1() if c > 1, the other case is run combine_feat_gt1_0().
It is weird that the code can run without bugs if I comment updates=updates, which is not my expected (train_test theano function in code). However, if I uncomment updates=updates, an error occurred (train_test_bug theano function in code). The later one is that I'd like to implement.
I have been already spend some days on this bug. Who can help me? I will appreciate that.
import os
import sys
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.ifelse import ifelse
class Test(object):
def __init__(
self,
numpy_rng,
input=None,
output=None,
n_output=6,
n_input=3,
n_group=2,
W_r=None,
b_r=None
):
self.n_output = n_output
self.n_input = n_input
self.n_group = n_group
if not W_r:
initial_W_r = numpy.asarray(
numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_input + n_input)),
high=4 * numpy.sqrt(6. / (n_input + n_input)),
size=(n_input, n_input)
),
dtype=theano.config.floatX
)
W_r = theano.shared(value=initial_W_r, name='W_r', borrow=True)
if not b_r:
b_r = theano.shared(
value=numpy.zeros(
n_input,
dtype=theano.config.floatX
),
borrow=True
)
self.W_r = W_r
self.b_r = b_r
if input is None:
self.x = T.tensor4(name='input', dtype=theano.config.floatX)
else:
self.x = input
if output is None:
self.y = T.matrix(name='output', dtype=theano.config.floatX)
else:
self.y = output
self.params = [self.W_r, self.b_r]
def get_output_values(self, input):
a, b, c, d = input.shape
def recusive(x_t, h_tm1, wr, hr):
h_t = T.dot(h_tm1, wr) + T.dot(x_t, wr) + hr
return h_t
def combine_recusive(data):
hidden, _ = theano.scan(fn=recusive,
sequences=data[1:],
outputs_info=data[0],
non_sequences=[self.W_r, self.b_r],
n_steps=data[1:].shape[0],
strict=True)
return hidden[-1]
def combine_feat_gt1_1(input):
feats, _ = theano.scan(fn=combine_recusive,
sequences=input[0],
outputs_info=None,
n_steps=input[0].shape[0])
recusive_flag = T.ones(1)
return T.reshape(feats, (1,-1)) # concatenation
def combine_feat_gt1_0(input):
feats = input[0]
recusive_flag = T.zeros(1)
return T.reshape(feats, (1,-1)) # concatenation
feat = ifelse(T.gt(c, 1), combine_feat_gt1_1(input), combine_feat_gt1_0(input))
# debug code snippet
self.debug_ifelse = theano.function([input], T.gt(c, 1))
self.debug_1_0 = theano.function([input], ifelse(T.gt(c, 1), 1, 0))
return feat
def get_cost_updates(self):
learning_rate = 0.1
self.y_given_x = self.get_output_values(self.x)
cost = T.sum(( self.y_given_x - self.y) ** 2)
gparams = T.grad(cost, self.params)
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(self.params, gparams)
]
return (cost, updates)
if __name__ == "__main__":
toy_data = numpy.array([[[[1,1,1],[2,2,2]], [[3, 4,5],[4,5,6]]]],dtype=theano.config.floatX)
lable = numpy.array([[1,2,3,4,5,6]],dtype=theano.config.floatX)
toy_data2 = numpy.array([[[[1,1,1]], [[3,4,5]]]],dtype=theano.config.floatX)
lable2 = numpy.array([[6,5,4,3,2,1]],dtype=theano.config.floatX)
x = T.tensor4('x', dtype=theano.config.floatX)
y = T.matrix('y', dtype=theano.config.floatX)
newX = T.tensor4(dtype=x.dtype)
newY = T.matrix(dtype=y.dtype)
rng = numpy.random.RandomState(123)
test = Test(
numpy_rng=rng,
input=x,
output=y,
n_group=2,
n_input=3,
n_output=6
)
cost, updates= test.get_cost_updates()
train_test = theano.function(
[newX, newY],
cost,
# updates=updates,
givens={
x : newX,
y : newY
}
)
train_test_bug = theano.function(
[newX, newY],
cost,
updates=updates,
givens={
x : newX,
y : newY
}
)
print train_test(toy_data, lable)
print train_test(toy_data2, lable2)
# code with bug
# print train_test_bug(toy_data, lable)
# print train_test_bug(toy_data2, lable2)
EDIT (by #danielrenshaw)
I've cut the code down to a simpler demonstration of the problem.
The cause is in the gradient computation of a double-nested scan expression. The problem disappears when a modified inner-most recursive expression is used (see comments in first function below).
import numpy
import theano
import theano.tensor as tt
import theano.ifelse
def inner_scan_step(x_t_t, h_tm1, w):
# Fails when using this recursive expression
h_t = tt.dot(h_tm1, w) + x_t_t
# No failure when using this recursive expression
# h_t = h_tm1 + tt.dot(x_t_t, w)
return h_t
def outer_scan_step(x_t, w):
h, _ = theano.scan(inner_scan_step,
sequences=[x_t[1:]],
outputs_info=[x_t[0]],
non_sequences=[w],
strict=True)
return h[-1]
def get_outputs(x, w):
features, _ = theano.scan(outer_scan_step,
sequences=[x],
non_sequences=[w],
strict=True)
return tt.grad(features.sum(), w)
def main():
theano.config.compute_test_value = 'raise'
x_value = numpy.arange(12, dtype=theano.config.floatX).reshape((2, 2, 3))
x = tt.tensor3()
x.tag.test_value = x_value
w = theano.shared(value=numpy.ones((3, 3), dtype=theano.config.floatX), borrow=True)
f = theano.function(inputs=[x], outputs=get_outputs(x, w))
print f(x_value)
if __name__ == "__main__":
main()
I solved this problem edited by danielrenshaw. When I add h0 as outputs_info, it work. Before that I used first element of sequence as outputs_info, I think it caused the error. But I still cannot solve my original problem.
import numpy
import theano
import theano.tensor as tt
import theano.ifelse
def inner_scan_step(x_t_t, h_tm1, w):
# Fails when using this recursive expression
h_t = tt.dot(h_tm1, w) + x_t_t
# No failure when using this recursive expression
# h_t = h_tm1 + tt.dot(x_t_t, w)
return h_t
def outer_scan_step(x_t, w, h0):
h, _ = theano.scan(inner_scan_step,
sequences=[x_t],
outputs_info=[h0],
non_sequences=[w],
strict=True)
return h[-1]
def get_outputs(x, w, h0):
features, _ = theano.scan(outer_scan_step,
sequences=[x],
non_sequences=[w, h0],
strict=True)
return tt.grad(features.sum(), w)
def main():
theano.config.compute_test_value = 'raise'
x_value = numpy.arange(12, dtype=theano.config.floatX).reshape((2, 2, 3))
x = tt.tensor3()
x.tag.test_value = x_value
w = theano.shared(value=numpy.ones((3, 3), dtype=theano.config.floatX), borrow=True)
h0 = theano.shared(value=numpy.zeros(3, dtype=theano.config.floatX), borrow=True)
f = theano.function(inputs=[x], outputs=get_outputs(x, w, h0))
print f(x_value)
if __name__ == "__main__":
main()
I've encountered the same issue and I fixed it by letting optimizer=fast_compile in theano_flags. Guess that is a bug of theano.

Resources