Why isn’t NUTS sampling with tt.dot or pm.math.dot? - theano

I am trying to implement parts of Facebook's prophet with some help from this example.
https://github.com/luke14free/pm-prophet/blob/master/pmprophet/model.py
This goes well :), but I am having some problems with the dot product I don't understand. Note that I am implementing the linear trends.
ds = pd.to_datetime(df['dagindex'], format='%d-%m-%y')
m = pm.Model()
changepoint_prior_scale = 0.05
n_changepoints = 25
changepoints = pd.date_range(
start=pd.to_datetime(ds.min()),
end=pd.to_datetime(ds.max()),
periods=n_changepoints + 2
)[1: -1]
with m:
# priors
sigma = pm.HalfCauchy('sigma', 10, testval=1)
#trend
growth = pm.Normal('growth', 0, 10)
prior_changepoints = pm.Laplace('changepoints', 0, changepoint_prior_scale, shape=len(changepoints))
y = np.zeros(len(df))
# indexes x_i for the changepoints.
s = [np.abs((ds - i).values).argmin() for i in changepoints]
g = growth
x = np.arange(len(ds))
# delta
d = prior_changepoints
regression = x * g
base_piecewise_regression = []
for i in s:
local_x = x.copy()[:-i]
local_x = np.concatenate([np.zeros(i), local_x])
base_piecewise_regression.append(local_x)
piecewise_regression = np.array(base_piecewise_regression)
# this dot product doesn't work?
piecewise_regression = pm.math.dot(theano.shared(piecewise_regression).T, d)
# If I comment out this line and use that one as dot product. It works fine
# piecewise_regression = (piecewise_regression.T * d[None, :]).sum(axis=-1)
regression += piecewise_regression
y += regression
obs = pm.Normal('y',
mu=(y - df.gebruikers.mean()) / df.gebruikers.std(),
sd=sigma,
observed=(df.gebruikers - df.gebruikers.mean()) / df.gebruikers.std())
start = pm.find_MAP(maxeval=10000)
trace = pm.sample(500, step=pm.NUTS(), start=start)
If I run the snippet above with
piecewise_regression = (piecewise_regression.T * d[None, :]).sum(axis=-1)
the model works as expected. However I cannot get it to work with a dot product. The NUTS sampler doesn't sample at all.
piecewise_regression = pm.math.dot(theano.shared(piecewise_regression).T, d)
EDIT
Ive got a minimal working example
The problem still occurs with theano.shared. I’ve got a minimal working example:
np.random.seed(5)
n_changepoints = 10
t = np.arange(1000)
s = np.sort(np.random.choice(t, size=n_changepoints, replace=False))
a = (t[:, None] > s) * 1
real_delta = np.random.normal(size=n_changepoints)
y = np.dot(a, real_delta) * t
with pm.Model():
sigma = pm.HalfCauchy('sigma', 10, testval=1)
delta = pm.Laplace('delta', 0, 0.05, shape=n_changepoints)
g = tt.dot(a, delta) * t
obs = pm.Normal('obs',
mu=(g - y.mean()) / y.std(),
sd=sigma,
observed=(y - y.mean()) / y.std())
trace = pm.sample(500)
It seems to have something to do with the size of matrix a. NUTS doesnt’t sample if I start with
t = np.arange(1000)
however the example above does sample when I reduce the size of t to:
t = np.arange(100)

Related

Gekko feasible in smaller problem while infeasible in larger problem

I am trying to solve the problem as follows with Gekko in python.
I_s is an indicator variable in the problem whose value is 1 if theta is positive and 0 if theta is zero.
I wrote the problem in a code using Gekko, python.
In contrast to my previous posts, I add some constraints with respect to I, which is an indicator variable.
If I set N=10, the solution, theta is all zero, which is the result that I want.
But if I set N=100 or 200, the solution cannot be found. I cannot understand why this happens.
I want to check if theta is also zero in larger N (200).
Is there any way to solve this issue?
My code is as belows.
# Import package
from gekko import GEKKO
import numpy as np
# Define parameters
P_CO = 600 # $/tonCO
beta_CO2 = 1 # no unit
P_CO2 = 80 # $/tonCO2eq
E_ref = 3.1022616 # tonCO2eq/tonCO
E_dir = -1.600570692 # tonCO2eq/tonCO
E_indir_others = 0.3339226804 # tonCO2eq/tonCO
E_indir_elec_cons = 18.46607256 # GJ/tonCO
C1_CAPEX = 285695 # no unit
C2_CAPEX = 188.42 # no unit
C1_FOX = 82282 # no unit
C2_FOX = 24.094 # no unit
C1_ROX = 4471.5 # no unit
C2_ROX = 96.034 # no unit
C1_UOX = 7934.9 # no unit
C2_UOX = 986.9 # no unit
r = 0.08 # discount rate
N = 10 # number of scenarios
T = 30 # total time period
GWP_init = 0.338723235 # 2020 Electricity GWP in EU 27 countries
theta_max = 1600000 # Max capacity
# Function to make GWP_EU matrix (TxN matrix)
def Electricity_GWP(GWP_init, n_years, num_episodes):
GWP_mean = 0.36258224*np.exp(-0.16395611*np.arange(1, n_years+2)) + 0.03091272
GWP_mean = GWP_mean.reshape(-1,1)
GWP_Yearly = np.tile(GWP_mean, num_episodes)
noise = np.zeros((n_years+1, num_episodes))
stdev2050 = GWP_mean[-1] * 0.25
stdev = np.arange(0, stdev2050 * (1 + 1/n_years), stdev2050/n_years)
for i in range(n_years+1):
noise[i,:] = np.random.normal(0, stdev[i], num_episodes)
GWP_forecast = GWP_Yearly + noise
return GWP_forecast
GWP_EU = Electricity_GWP(GWP_init, T, N) # (T+1)*N matrix
GWP_EU = GWP_EU[1:,:] # T*N matrix
print(np.shape(GWP_EU))
# Build Gekko model
m = GEKKO(remote=False)
theta = m.Array(m.Var, N, lb=0, ub=theta_max)
I = m.Array(m.Var, N, lb=0, ub=1, integer=True)
demand = np.ones((T,1))
demand[0] = 8031887.589
for k in range(1,11):
demand[k] = demand[k-1] * 1.026
for k in range(11,21):
demand[k] = demand[k-1] * 1.016
for k in range(21,T):
demand[k] = demand[k-1] * 1.011
demand = 0.12 * demand
demand = np.tile(demand, N) # T*N matrix
print(np.shape(demand))
m3 = [[m.min3(demand[t,s],theta[s]) for t in range(T)] for s in range(N)]
obj = m.sum([sum([((1/(1+r))**(t+1))*((P_CO*m3[s][t]) \
+ (beta_CO2*P_CO2*m3[s][t]*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s])) \
- (C1_CAPEX*I[s]+C2_CAPEX*theta[s]+C1_FOX*I[s]+C2_FOX*theta[s])\
- (C1_ROX*I[s]+C2_ROX*m3[s][t]+C1_UOX*I[s]+C2_UOX*m3[s][t])) for t in range(T)]) for s in range(N)])
for i in range(N):
m.Equation(theta[i]<=1000000*I[i])
m.Equation(-theta[i]<1000000*(1-I[i]))
# obj = m.sum([m.sum([((1/(1+r))**(t+1))*((P_CO*m.min3(demand[t,s], theta[s])) \
# + (beta_CO2*P_CO2*m.min3(demand[t,s], theta[s])*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s])) \
# - (C1_CAPEX+C2_CAPEX*theta[s]+C1_FOX+C2_FOX*theta[s])-(C1_ROX+C2_ROX*m.min3(demand[t,s], theta[s])+C1_UOX+C2_UOX*m.min3(demand[t,s], theta[s]))) for t in range(T)]) for s in range(N)])
m.Maximize(obj/N)
m.solve(disp=True)
# s = m.sum(m.sum(((1/(1+r))**(t+1))*((P_CO*m.min3(demand[t,s], theta[s])) \
# + beta_CO2*P_CO2*m.min3(demand[t,s], theta[s])*(E_ref-E_dir-E_indir_others-E_indir_elec_cons*GWP_EU[t,s]) \
# - (C1_CAPEX + C2_CAPEX*theta[s]) - (C1_FOX + C2_FOX*theta[s]) - (C1_ROX + C2_ROX*m.min3(demand[t,s], theta[s])) - (C1_UOX + C2_UOX*m.min3(demand[t,s], theta[s])))
# for s in range(N)) for t in range(T))/N
print(theta)
I solved this issue by increasing the big M in the constraint for an indicator variable I, 1000000 to 10000000.
for i in range(N):
m.Equation(theta[i]<=10000000*I[i])
m.Equation(-theta[i]<10000000*(1-I[i]))
I didn't understand why this worked, but the result gave me the solution of 200*1 array with all zero.

Optimizing asymmetrically reweighted penalized least squares smoothing (from matlab to python)

I'm trying to apply the method for baselinining vibrational spectra, which is announced as an improvement over asymmetric and iterative re-weighted least-squares algorithms in the 2015 paper (doi:10.1039/c4an01061b), where the following matlab code was provided:
function z = baseline(y, lambda, ratio)
% Estimate baseline with arPLS in Matlab
N = length(y);
D = diff(speye(N), 2);
H = lambda*D'*D;
w = ones(N, 1);
while true
W = spdiags(w, 0, N, N);
% Cholesky decomposition
C = chol(W + H);
z = C \ (C' \ (w.*y) );
d = y - z;
% make d-, and get w^t with m and s
dn = d(d<0);
m = mean(d);
s = std(d);
wt = 1./ (1 + exp( 2* (d-(2*s-m))/s ) );
% check exit condition and backup
if norm(w-wt)/norm(w) < ratio, break; end
end
that I rewrote into python:
def baseline_arPLS(y, lam, ratio):
# Estimate baseline with arPLS
N = len(y)
k = [numpy.ones(N), -2*numpy.ones(N-1), numpy.ones(N-2)]
offset = [0, 1, 2]
D = diags(k, offset).toarray()
H = lam * numpy.matmul(D.T, D)
w_ = numpy.ones(N)
while True:
W = spdiags(w_, 0, N, N, format='csr')
# Cholesky decomposition
C = cholesky(W + H)
z_ = spsolve(C.T, w_ * y)
z = spsolve(C, z_)
d = y - z
# make d- and get w^t with m and s
dn = d[d<0]
m = numpy.mean(dn)
s = numpy.std(dn)
wt = 1. / (1 + numpy.exp(2 * (d - (2*s-m)) / s))
# check exit condition and backup
norm_wt, norm_w = norm(w_-wt), norm(w_)
if (norm_wt / norm_w) < ratio:
break
w_ = wt
return(z)
Except for the input vector y the method requires parameters lam and ratio and it runs ok for values lam<1.e+07 and ratio>1.e-01, but outputs poor results. When values are changed outside this range, for example lam=1e+07, ratio=1e-02 the CPU starts heating up and job never finishes (I interrupted it after 1min). Also in both cases the following warning shows up:
/usr/local/lib/python3.9/site-packages/scipy/sparse/linalg/dsolve/linsolve.py: 144: SparseEfficencyWarning: spsolve requires A to be CSC or CSR matrix format warn('spsolve requires A to be CSC or CSR format',
although I added the recommended format='csr' option to the spdiags call.
And here's some synthetic data (similar to one in the paper) for testing purposes. The noise was added along with a 3rd degree polynomial baseline The method works well for parameters bl_1 and fails to converge for bl_2:
import numpy
from matplotlib import pyplot
from scipy.sparse import spdiags, diags, identity
from scipy.sparse.linalg import spsolve
from numpy.linalg import cholesky, norm
import sys
x = numpy.arange(0, 1000)
noise = numpy.random.uniform(low=0, high = 10, size=len(x))
poly_3rd_degree = numpy.poly1d([1.2e-06, -1.23e-03, .36, -4.e-04])
poly_baseline = poly_3rd_degree(x)
y = 100 * numpy.exp(-((x-300)/15)**2)+\
200 * numpy.exp(-((x-750)/30)**2)+ \
100 * numpy.exp(-((x-800)/15)**2) + noise + poly_baseline
bl_1 = baseline_arPLS(y, 1e+07, 1e-01)
bl_2 = baseline_arPLS(y, 1e+07, 1e-02)
pyplot.figure(1)
pyplot.plot(x, y, 'C0')
pyplot.plot(x, poly_baseline, 'C1')
pyplot.plot(x, bl_1, 'k')
pyplot.show()
sys.exit(0)
All this is telling me that I'm doing something very non-optimal in my python implementation. Since I'm not knowledgeable enough about the intricacies of scipy computations I'm kindly asking for suggestions on how to achieve convergence in this calculations.
(I encountered an issue in running the "straight" matlab version of the code because the line D = diff(speye(N), 2); truncates the last two rows of the matrix, creating dimension mismatch later in the function. Following the description of matrix D's appearance I substituted this line by directly creating a tridiagonal matrix using the diags function.)
Guided by the comment #hpaulj made, and suspecting that the loop exit wasn't coded properly, I re-visited the paper and found out that the authors actually implemented an exit condition that was not featured in their matlab script. Changing the while loop condition provides an exit for any set of parameters; my understanding is that algorithm is not guaranteed to converge in all cases, which is why this condition is necessary but was omitted by error. Here's the edited version of my python code:
def baseline_arPLS(y, lam, ratio):
# Estimate baseline with arPLS
N = len(y)
k = [numpy.ones(N), -2*numpy.ones(N-1), numpy.ones(N-2)]
offset = [0, 1, 2]
D = diags(k, offset).toarray()
H = lam * numpy.matmul(D.T, D)
w_ = numpy.ones(N)
i = 0
N_iterations = 100
while i < N_iterations:
W = spdiags(w_, 0, N, N, format='csr')
# Cholesky decomposition
C = cholesky(W + H)
z_ = spsolve(C.T, w_ * y)
z = spsolve(C, z_)
d = y - z
# make d- and get w^t with m and s
dn = d[d<0]
m = numpy.mean(dn)
s = numpy.std(dn)
wt = 1. / (1 + numpy.exp(2 * (d - (2*s-m)) / s))
# check exit condition and backup
norm_wt, norm_w = norm(w_-wt), norm(w_)
if (norm_wt / norm_w) < ratio:
break
w_ = wt
i += 1
return(z)

Speed Up a for Loop - Python

I have a code that works perfectly well but I wish to speed up the time it takes to converge. A snippet of the code is shown below:
def myfunction(x, i):
y = x + (min(0, target[i] - data[i, :]x))*data[i]/(norm(data[i])**2))
return y
rows, columns = data.shape
start = time.time()
iterate = 0
iterate_count = []
norm_count = []
res = 5
x_not = np.ones(columns)
norm_count.append(norm(x_not))
iterate_count.append(0)
while res > 1e-8:
for row in range(rows):
y = myfunction(x_not, row)
x_not = y
iterate += 1
iterate_count.append(iterate)
norm_count.append(norm(x_not))
res = abs(norm_count[-1] - norm_count[-2])
print('Converge at {} iterations'.format(iterate))
print('Duration: {:.4f} seconds'.format(time.time() - start))
I am relatively new in Python. I will appreciate any hint/assistance.
Ax=b is the problem we wish to solve. Here, 'A' is the 'data' and 'b' is the 'target'
Ugh! After spending a while on this I don't think it can be done the way you've set up your problem. In each iteration over the row, you modify x_not and then pass the updated result to get the solution for the next row. This kind of setup can't be vectorized easily. You can learn the thought process of vectorization from the failed attempt, so I'm including it in the answer. I'm also including a different iterative method to solve linear systems of equations. I've included a vectorized version -- where the solution is updated using matrix multiplication and vector addition, and a loopy version -- where the solution is updated using a for loop to demonstrate what you can expect to gain.
1. The failed attempt
Let's take a look at what you're doing here.
def myfunction(x, i):
y = x + (min(0, target[i] - data[i, :] # x)) * (data[i] / (norm(data[i])**2))
return y
You subtract
the dot product of (the ith row of data and x_not)
from the ith row of target,
limited at zero.
You multiply this result with the ith row of data divided my the norm of that row squared. Let's call this part2
Then you add this to the ith element of x_not
Now let's look at the shapes of the matrices.
data is (M, N).
target is (M, ).
x_not is (N, )
Instead of doing these operations rowwise, you can operate on the entire matrix!
1.1. Simplifying the dot product.
Instead of doing data[i, :] # x, you can do data # x_not and this gives an array with the ith element giving the dot product of the ith row with x_not. So now we have data # x_not with shape (M, )
Then, you can subtract this from the entire target array, so target - (data # x_not) has shape (M, ).
So far, we have
part1 = target - (data # x_not)
Next, if anything is greater than zero, set it to zero.
part1[part1 > 0] = 0
1.2. Finding rowwise norms.
Finally, you want to multiply this by the row of data, and divide by the square of the L2-norm of that row. To get the norm of each row of a matrix, you do
rownorms = np.linalg.norm(data, axis=1)
This is a (M, ) array, so we need to convert it to a (M, 1) array so we can divide each row. rownorms[:, None] does this. Then divide data by this.
part2 = data / (rownorms[:, None]**2)
1.3. Add to x_not
Finally, we're adding each row of part1 * part2 to the original x_not and returning the result
result = x_not + (part1 * part2).sum(axis=0)
Here's where we get stuck. In your approach, each call to myfunction() gives a value of part1 that depends on target[i], which was changed in the last call to myfunction().
2. Why vectorize?
Using numpy's inbuilt methods instead of looping allows it to offload the calculation to its C backend, so it runs faster. If your numpy is linked to a BLAS backend, you can extract even more speed by using your processor's SIMD registers
The conjugate gradient method is a simple iterative method to solve certain systems of equations. There are other more complex algorithms that can solve general systems well, but this should do for the purposes of our demo. Again, the purpose is not to have an iterative algorithm that will perfectly solve any linear system of equations, but to show what kind of speedup you can expect if you vectorize your code.
Given your system
data # x_not = target
Let's define some variables:
A = data.T # data
b = data.T # target
And we'll solve the system A # x = b
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
p = resid
while (np.abs(resid) > tolerance).any():
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
x = x + alpha * p
resid_new = resid - alpha * Ap
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
To contrast the fully vectorized approach with one that uses iterations to update the rows of x and resid_new, let's define another implementation of the CG solver that does this.
def solve_loopy(data, target, itermax = 100, tolerance = 1e-8):
A = data.T # data
b = data.T # target
rows, columns = data.shape
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
resid_new = b - A # x
p = resid
niter = 0
while (np.abs(resid) > tolerance).any() and niter < itermax:
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
for i in range(len(x)):
x[i] = x[i] + alpha * p[i]
resid_new[i] = resid[i] - alpha * Ap[i]
# resid_new = resid - alpha * A # p
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
niter += 1
return x
And our original vector method:
def solve_vect(data, target, itermax = 100, tolerance = 1e-8):
A = data.T # data
b = data.T # target
rows, columns = data.shape
x = np.zeros((columns,)) # Initial guess. Can be anything
resid = b - A # x
resid_new = b - A # x
p = resid
niter = 0
while (np.abs(resid) > tolerance).any() and niter < itermax:
Ap = A # p
alpha = (resid.T # resid) / (p.T # Ap)
x = x + alpha * p
resid_new = resid - alpha * Ap
beta = (resid_new.T # resid_new) / (resid.T # resid)
p = resid_new + beta * p
resid = resid_new + 0
niter += 1
return x
Let's solve a simple system to see if this works first:
2x1 + x2 = -5
−x1 + x2 = -2
should give a solution of [-1, -3]
data = np.array([[ 2, 1],
[-1, 1]])
target = np.array([-5, -2])
print(solve_loopy(data, target))
print(solve_vect(data, target))
Both give the correct solution [-1, -3], yay! Now on to bigger things:
data = np.random.random((100, 100))
target = np.random.random((100, ))
Let's ensure the solution is still correct:
sol1 = solve_loopy(data, target)
np.allclose(data # sol1, target)
# Output: False
sol2 = solve_vect(data, target)
np.allclose(data # sol2, target)
# Output: False
Hmm, looks like the CG method doesn't work for badly conditioned random matrices we created. Well, at least both give the same result.
np.allclose(sol1, sol2)
# Output: True
But let's not get discouraged! We don't really care if it works perfectly, the point of this is to demonstrate how amazing vectorization is. So let's time this:
import timeit
timeit.timeit('solve_loopy(data, target)', number=10, setup='from __main__ import solve_loopy, data, target')
# Output: 0.25586539999994784
timeit.timeit('solve_vect(data, target)', number=10, setup='from __main__ import solve_vect, data, target')
# Output: 0.12008900000000722
Nice! A ~2x speedup simply by avoiding a loop while updating our solution!
For larger systems, this will be even better.
for N in [10, 50, 100, 500, 1000]:
data = np.random.random((N, N))
target = np.random.random((N, ))
t_loopy = timeit.timeit('solve_loopy(data, target)', number=10, setup='from __main__ import solve_loopy, data, target')
t_vect = timeit.timeit('solve_vect(data, target)', number=10, setup='from __main__ import solve_vect, data, target')
print(N, t_loopy, t_vect, t_loopy/t_vect)
This gives us:
N t_loopy t_vect speedup
00010 0.002823 0.002099 1.345390
00050 0.051209 0.014486 3.535048
00100 0.260348 0.114601 2.271773
00500 0.980453 0.240151 4.082644
01000 1.769959 0.508197 3.482822

PyMC3- Custom theano Op to do numerical integration

I am using PyMC3 for parameter estimation using a particular likelihood function which has to be defined. I googled it and found out that I should use the densitydist method for implementing the user defined likelihood functions but it is not working. How to incorporate a user defined likelihood function in PyMC3 and to find out the maximum a posteriori (MAP) estimate for my model? My code is given below. Here L is the analytic form of my Likelihood function. I have some observational data for the radial velocity(vr) and postion (r) for some objects, which is imported from excel file.
data_ = np.array(pandas.read_excel('aaa.xlsx',header=None))
gamma=3.77;
G = 4.302*10**-6;
rmin = 3.0;
R = 95.7;
vr=data_[:,1];
r= data_[:,0];
h= np.pi;
class integrateOut(theano.Op):
def __init__(self,f,t,t0,tf,*args,**kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self,*inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f,self.fvars)
except:
self.gradF = None
return theano.Apply(self,self.fvars,[tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t]+self.fvars,self.f)
output_storage[0][0] = quad(f,self.t0,self.tf,args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g,self.t,self.t0,self.tf)(*inputs)*grads[0] \
for g in self.gradF]
basic_model = pm.Model()
with basic_model:
M=[]
beta=[]
interval=0.01*10**12
M=pm.Uniform('M',
lower=0.5*10**12,upper=3.50*10**12,transform='interval')
beta=pm.Uniform('beta',lower=2.001,upper=2.999,transform='interval')
gamma=3.77
logp=[]
arr=[]
vnew=[]
rnew=[]
theta = tt.scalar('theta')
beta = tt.scalar('beta')
z = tt.cos(theta)**(2*( (gamma/(beta - 2)) - 3/2) + 3)
intZ = integrateOut(z,theta,-(np.pi)/2,(np.pi)/2)(beta)
gradIntZ = tt.grad(intZ,[beta])
funcIntZ = theano.function([beta],intZ)
funcGradIntZ = theano.function([beta],gradIntZ)
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
rnew.append(r[j]+(0.05*r[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
vn=np.array(vnew)
rn=np.array(rnew)
for beta in np.arange (2.01,2.99,0.01):
for M in np.arange (0.5,2.50,0.01):
i=np.arange(0,59,1)
q =( gamma/(beta - 2)) - 3/2
B = (G*M*10**12)/((beta -2 )*( R**(3 - beta)))
K = (gamma - 3)/((rmin**(3 - gamma))*funcIntZ(beta)*m.sqrt(2*B))
logp= -np.log(K*((1 -(( 1/(2*B) )*((vn[i]**2)*rn[i]**(beta -
2))))**(q+1))*(rn[i]**(1-gamma +(beta/2))))
arr.append(logp.sum())
def logp_func(rn,vn):
return min(np.array(arr))
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP(model=basic_model)
step = pm.Metropolis()
basicmodeltrace = pm.sample(10000, step=step,
start=start,random_seed=1,progressbar=True)
print(pm.summary(basicmodeltrace))
map_estimate = pm.find_MAP(model=basic_model)
print(map_estimate)
I am getting the following error message:
ValueError: Cannot compute test value: input 0 (theta) of Op
Elemwise{cos,no_inplace}(theta) missing default value.
Backtrace when that variable is created:
I am unable to get the output since the numerical integration is not working. I have used custom theano op for numerical integration code which i got from Custom Theano Op to do numerical integration . The integration works if I run it seperately inputting a particular value of beta, but not within the model.
I made a few changes to your code, this still does not work, but I hope it is closer to a solution. Please check this thread, as someone is trying so solve essentially the same problem.
class integrateOut(theano.Op):
def __init__(self, f, t, t0, tf,*args, **kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self, *inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f, self.fvars)
except:
self.gradF = None
return theano.Apply(self, self.fvars, [tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t] + self.fvars,self.f)
output_storage[0][0] = quad(f, self.t0, self.tf, args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g, self.t, self.t0, self.tf)(*inputs)*grads[0] \
for g in self.gradF]
gamma = 3.77
G = 4.302E-6
rmin = 3.0
R = 95.7
vr = data[:,1]
r = data[:,0]
h = np.pi
interval = 1E10
vnew = []
rnew = []
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
rnew.append(r[j]+(0.05*r[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
vn = np.array(vnew)
rn = np.array(rnew)
def integ(gamma, beta, theta):
z = tt.cos(theta)**(2*((gamma/(beta - 2)) - 3/2) + 3)
return integrateOut(z, theta, -(np.pi)/2, (np.pi)/2)(beta)
with pm.Model() as basic_model:
M = pm.Uniform('M', lower=0.5*10**12, upper=3.50*10**12)
beta = pm.Uniform('beta', lower=2.001, upper=2.999)
theta = pm.Normal('theta', 0, 10**2)
def logp_func(rn,vn):
q = (gamma/(beta - 2)) - 3/2
B = (G*M*1E12) / ((beta -2 )*(R**(3 - beta)))
K = (gamma - 3) / ((rmin**(3 - gamma)) * integ(gamma, beta, theta) * (2*B)**0.5)
logp = - np.log(K*((1 -((1/(2*B))*((vn**2)*rn**(beta -
2))))**(q+1))*(rn**(1-gamma +(beta/2))))
return logp.sum()
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP()
#basicmodeltrace = pm.sample()
print(start)

TypeError when accessing RNN results

I am trying to extract the decisions of an RNN for all tokens in a sequence but receive the error mentioned at the end of the post.
Any help will be appreciated!
Code:
# gated recurrent unit
def gru_step(x, h_prev, W_xm, W_hm, W_xh, W_hh):
m = theano.tensor.nnet.sigmoid(theano.tensor.dot(x, W_xm) + theano.tensor.dot(h_prev, W_hm))
r = _slice(m, 0, 2)
z = _slice(m, 1, 2)
_h = theano.tensor.tanh(theano.tensor.dot(x, W_xh) + theano.tensor.dot(r * h_prev, W_hh))
h = z * h_prev + (1.0 - z) * _h
return h
# return h, theano.scan_module.until(previous_power*2 > max_value)
W_xm = self.create_parameter_matrix('W_xm', (word_embedding_size, recurrent_size*2))
W_hm = self.create_parameter_matrix('W_hm', (recurrent_size, recurrent_size*2))
W_xh = self.create_parameter_matrix('W_xh', (word_embedding_size, recurrent_size))
W_hh = self.create_parameter_matrix('W_hh', (recurrent_size, recurrent_size))
initial_hidden_vector = theano.tensor.alloc(numpy.array(0, dtype=floatX), recurrent_size)
initial_process_vector = theano.tensor.alloc(recurrent_size, n_classes)
hidden_vector, _ = theano.scan(
gru_step,
sequences = input_vectors,
outputs_info = initial_hidden_vector,
non_sequences = [W_xm, W_hm, W_xh, W_hh]
)
W_output = self.create_parameter_matrix('W_output', (n_classes,recurrent_size))
rnn_output = theano.tensor.nnet.softmax([theano.tensor.dot(W_output, hidden_vector[-1])])[0]
rnn+predicted_class = theano.tensor.argmax(output)
# Process hidden_vector to decision vectors
def process_hidden_vector(x, W_dot):
return theano.tensor.dot(W_dot, x)
all_tokens_output_vector = theano.scan(process_hidden_vector, sequences=hidden_vector, non_sequences=W_output)
The results should be the "all_tokens_output_vector", but I get the following error when trying to output it:
TypeError: Outputs must be theano Variable or Out instances. Received (for{cpu,scan_fn}.0, OrderedUpdates()) of type <type 'tuple'>
What are you doing with "all_tokens_output_vector" after that? Your print it directly?
There should be a theano.function to compile the graph

Resources