Theano error when using PyMC3: theano.gof.fg.MissingInputError - python-3.x

I am generating some (noisy) data-points (y) with some known parameters (m,c) that represent the equation of a straight line. Using sampling-based Bayesian methods, I now want to know the true values of parameters (m,c) from the data. Therefore, I am using DE Metropolis (PyMC3) to estimate the true parameters.
I am getting theano error theano.gof.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(c_interval__), was not provided and not given a value.
Theano version: 1.0.4
PyMC3 version: 3.9.1
import matplotlib.pyplot as plt
import numpy as np
import arviz as az
import pymc3
import theano.tensor as tt
from theano.compile.ops import as_op
plt.style.use("ggplot")
# define a theano Op for our likelihood function
class LogLike(tt.Op):
itypes = [tt.dvector] # expects a vector of parameter values when called
otypes = [tt.dscalar] # outputs a single scalar value (the log likelihood)
def __init__(self, loglike, data, x, sigma):
# add inputs as class attributes
self.likelihood = loglike
self.data = data
self.x = x
self.sigma = sigma
def perform(self, node, inputs, outputs):
# the method that is used when calling the Op
theta, = inputs # this will contain my variables
# call the log-likelihood function
logl = self.likelihood(theta, self.x, self.data, self.sigma)
outputs[0][0] = np.array(logl) # output the log-likelihood
def my_model(theta, x):
y = theta[0]*x + theta[1]
return y
def my_loglike(theta, x, data, sigma):
model = my_model(theta, x)
ll = -(0.5/sigma**2)*np.sum((data - model)**2)
return ll
# set up our data
N = 10 # number of data points
sigma = 1. # standard deviation of noise
x = np.linspace(0., 9., N)
mtrue = 0.4 # true gradient
ctrue = 3. # true y-intercept
truemodel = my_model([mtrue, ctrue], x)
# make data
np.random.seed(716742) # set random seed, so the data is reproducible each time
data = sigma*np.random.randn(N) + truemodel
print(data)
ndraws = 3000 # number of draws from the distribution
# create our Op
logl = LogLike(my_loglike, data, x, sigma)
# use PyMC3 to sampler from log-likelihood
with pymc3.Model():
# uniform priors on m and c
m = pymc3.Uniform('m', lower=-10., upper=10.)
c = pymc3.Uniform('c', lower=-10., upper=10.)
# convert m and c to a tensor vector
theta = tt.as_tensor_variable([m, c])
# use a DensityDist (use a lamdba function to "call" the Op)
pymc3.DensityDist('likelihood', lambda v: logl(v), observed={'v': theta})
step = pymc3.DEMetropolis()
trace = pymc3.sample(ndraws, step)
# plot the traces
axes = az.plot_trace(trace)
fig = axes.ravel()[0].figure
fig.savefig('./trace_plots.png')
Find the full trace here:
Population sampling (4 chains)
DEMetropolis: [c, m]
Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.
Population parallelization failed. Falling back to sequential stepping of chains.---------------------| 0.00% [0/4 00:00<00:00]
Sampling 4 chains for 0 tune and 4_000 draw iterations (0 + 16_000 draws total) took 5 seconds.███████| 100.00% [4000/4000 00:04<00:00]
Traceback (most recent call last):
File "test.py", line 75, in <module>
trace = pymc3.sample(ndraws, step)
File "/home/csl_user/.local/lib/python3.7/site-packages/pymc3/sampling.py", line 599, in sample
idata = arviz.from_pymc3(trace, **ikwargs)
File "/home/csl_user/.local/lib/python3.7/site-packages/arviz/data/io_pymc3.py", line 531, in from_pymc3
save_warmup=save_warmup,
File "/home/csl_user/.local/lib/python3.7/site-packages/arviz/data/io_pymc3.py", line 159, in __init__
self.observations, self.multi_observations = self.find_observations()
File "/home/csl_user/.local/lib/python3.7/site-packages/arviz/data/io_pymc3.py", line 172, in find_observations
multi_observations[key] = val.eval() if hasattr(val, "eval") else val
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/graph.py", line 522, in eval
self._fn_cache[inputs] = theano.function(inputs, self)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function.py", line 317, in function
output_keys=output_keys)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/pfunc.py", line 486, in pfunc
output_keys=output_keys)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function_module.py", line 1839, in orig_function
name=name)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function_module.py", line 1487, in __init__
accept_inplace)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function_module.py", line 181, in std_fgraph
update_mapping=update_mapping)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/fg.py", line 175, in __init__
self.__import_r__(output, reason="init")
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/fg.py", line 346, in __import_r__
self.__import__(variable.owner, reason=reason)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/fg.py", line 391, in __import__
raise MissingInputError(error_msg, variable=r)
theano.gof.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(c_interval__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.

I've run into the same problem when following the example how to sample from a black box likelihood found here:
https://docs.pymc.io/notebooks/blackbox_external_likelihood.html
This seems to be a version problem. I'm on Manjaro Linux and also ran theano 1.0.4 and pymc3 3.9 using python 3.8. I could solve the issue and make the code work by downgrading to python 3.7 and pymc3 3.8. This seems to be in issue with python 3.8, as simply downgrading pymc3 did not solve the issue for me. I am far from an expert in pymc3 so I don't have a solution how to fix this issue using the newest versions, but for now downgrading makes my simulations run.
Hope this helps.
Edit: The devs seem to be aware of this, there is a an open issue on their github page
https://github.com/pymc-devs/pymc3/issues/4002

Related

scipy.integrate error:only size-1 arrays can be converted to Python scalars [duplicate]

This question already has an answer here:
scipy curve_fit doesn't like math module
(1 answer)
Closed 2 years ago.
I am trying to calculate probabilities from Gaussian Distribution by varying the standard deviation(std).
I am expecting by using the Gaussian Quadrature with 21 points, integrating over a range from -1 to +1 with mean = 0 and setting std= 1 and std =2 will yield p = 0.68 and p = 0.95 respectively (picture attached).
import scipy.integrate as integrate
import math as m
#mean=0
#varying the sigma
def f(sigma,x):
return m.exp(-1*(x**2)/(2*sigma**2))/(sigma*m.sqrt(2*m.pi))
def prob_at_nsigma(sigma):
value = 0.
ans,anserr =integrate.quadrature(f,-1,1,args=(sigma ,),maxiter=21)
value = ans
return value
print(prob_at_nsigma(1))
And I get the following errors which I don't see why the "divide by zero" and "only size-1 arrays can be converted to Python scalars" arise:
runfile('C:/pythonExe/ntu_cp/untitled0.py', wdir='C:/pythonExe/ntu_cp')
C:\pythonExe\ntu_cp\untitled0.py:14: RuntimeWarning: divide by zero encountered in true_divide
return m.exp(-1*(x**2)/(2*sigma**2))/(sigma*m.sqrt(2*m.pi))
C:\pythonExe\ntu_cp\untitled0.py:14: RuntimeWarning: invalid value encountered in true_divide
return m.exp(-1*(x**2)/(2*sigma**2))/(sigma*m.sqrt(2*m.pi))
Traceback (most recent call last):
File "C:\pythonExe\ntu_cp\untitled0.py", line 23, in <module>
print(prob_at_nsigma(1))
File "C:\pythonExe\ntu_cp\untitled0.py", line 19, in prob_at_nsigma
ans,anserr =integrate.quadrature(f,-1,1,args=(sigma ,),maxiter=21)
File "C:\Users\cztee\anaconda3\lib\site-packages\scipy\integrate\_quadrature.py", line 238, in quadrature
newval = fixed_quad(vfunc, a, b, (), n)[0]
File "C:\Users\cztee\anaconda3\lib\site-packages\scipy\integrate\_quadrature.py", line 119, in fixed_quad
return (b-a)/2.0 * np.sum(w*func(y, *args), axis=-1), None
File "C:\Users\cztee\anaconda3\lib\site-packages\scipy\integrate\_quadrature.py", line 149, in vfunc
return func(x, *args)
File "C:\pythonExe\ntu_cp\untitled0.py", line 14, in f
return m.exp(-1*(x**2)/(2*sigma**2))/(sigma*m.sqrt(2*m.pi))
TypeError: only size-1 arrays can be converted to Python scalars
Appreciate any help. Thanks!
Gaussian distribution
Two problems in your code:
As mentioned, you should use numpy instead of math module.
Your f function's argument order is wrong. Scipy assumes that the first variable is x followed by other parameters. So it should be f(x, sigma), not f(sigma, x).
Solution:
import numpy as np
import scipy.integrate as integrate
def f(x, sigma):
return np.exp(-1*(x**2)/(2*sigma**2))/(sigma*np.sqrt(2*np.pi))
def prob_at_nsigma(sigma):
value = 0.
ans, anserr = integrate.quadrature(f, -1, 1, args=(sigma,), maxiter=21)
return ans
print(prob_at_nsigma(1))
# 0.6826894922280757

ValueError: Number of priors must match number of classes

I want to compile my python3 code on ubuntu, and also want to know about the problem, such that i can handle that in future.
It seems there is some problem with the imported library function.
## sample code
1 import numpy as np
2 x = np.array([[-1,-1],[-2,-1],[-3,-2],[1,1],[2,1],[3,2]])
3 y = np.array([1,1,1,2,2,2])
4 from sklearn.naive_bayes import GaussianNB
5 clf = GaussianNB(x, y)
6 clf = clf.fit(x,y) ###showing error on compiling
7 print(clf.predict([[-2,1]]))
## output shown
Traceback (most recent call last):
File "naive.py", line 7, in <module>
clf = clf.fit(x,y)
File "/home/abhihsek/.local/lib/python3.6/site-
packages/sklearn/naive_bayes.py", line 192, in fit
sample_weight=sample_weight)
File "/home/abhihsek/.local/lib/python3.6/site-
packages/sklearn/naive_bayes.py", line 371, in _partial_fit
raise ValueError('Number of priors must match number of'
ValueError: Number of priors must match number of classes.
## code of library function line 192
190 X, y = check_X_y(X, y)
191 return self._partial_fit(X, y, np.unique(y),
_refit=True,
192
sample_weight=sample_weight)
## code of library function line 371
369 # Check that the provide prior match the number of classes
370 if len(priors) != n_classes:
371 raise ValueError('Number of priors must
match
number of'
372 ' classes.')
373 # Check that the sum is 1
As #Suvan Pandey mentioned, then the code won't give any error when writing clf = GaussianNB() instead of clf = GaussianNB(x, y).
If we look at the GaussianNB class then the __init__() can take these parameters:
def __init__(self, priors=None, var_smoothing=1e-9): # <-- these have a default value
self.priors = priors
self.var_smoothing = var_smoothing
The documentation about the two parameters:
priors – Prior probabilities of the classes. If specified the priors are not adjusted according to the data.
var_smoothing – Portion of the largest variance of all features that is added to variances for calculation stability.
As your x and y variables both return an array object then they don't fit the parameters of the __init__(...).

Tensorflow Adagrad optimizer isn't working

When I run the following script, I notice the following couple of errors:
import tensorflow as tf
import numpy as np
import seaborn as sns
import random
#set random seed:
random.seed(42)
def potential(N):
points = np.random.rand(N,2)*10
values = np.array([np.exp((points[i][0]-5.0)**2 + (points[i][1]-5.0)**2) for i in range(N)])
return points, values
def init_weights(shape,var_name):
"""
Xavier initialisation of neural networks
"""
init = tf.contrib.layers.xavier_initializer()
return tf.get_variable(initializer=init,name = var_name,shape=shape)
def neural_net(X):
with tf.variable_scope("model",reuse=tf.AUTO_REUSE):
w_h = init_weights([2,10],"w_h")
w_h2 = init_weights([10,10],"w_h2")
w_o = init_weights([10,1],"w_o")
### bias terms:
bias_1 = init_weights([10],"bias_1")
bias_2 = init_weights([10],"bias_2")
bias_3 = init_weights([1],"bias_3")
h = tf.nn.relu(tf.add(tf.matmul(X, w_h),bias_1))
h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2),bias_2))
return tf.nn.relu(tf.add(tf.matmul(h2, w_o),bias_3))
X = tf.placeholder(tf.float32, [None, 2])
with tf.Session() as sess:
model = neural_net(X)
## define optimizer:
opt = tf.train.AdagradOptimizer(0.0001)
values =tf.placeholder(tf.float32, [None, 1])
squared_loss = tf.reduce_mean(tf.square(model-values))
## define model variables:
model_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,"model")
train_model = opt.minimize(squared_loss,var_list=model_vars)
sess.run(tf.global_variables_initializer())
for i in range(10):
points, val = potential(100)
train_feed = {X : points,values: val.reshape((100,1))}
sess.run(train_model,feed_dict = train_feed)
print(sess.run(model,feed_dict = {X:points}))
### plot the approximating model:
res = 0.1
xy = np.mgrid[0:10:res, 0:10:res].reshape(2,-1).T
values = sess.run(model, feed_dict={X: xy})
sns.heatmap(values.reshape((int(10/res),int(10/res))),xticklabels=False,yticklabels=False)
On the first run I get:
[nan] [nan] [nan] [nan] [nan] [nan] [nan]] Traceback (most
recent call last):
...
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/seaborn/matrix.py",
line 485, in heatmap
yticklabels, mask)
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/seaborn/matrix.py",
line 167, in init
cmap, center, robust)
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/seaborn/matrix.py",
line 206, in _determine_cmap_params
vmin = np.percentile(calc_data, 2) if robust else calc_data.min()
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/numpy/core/_methods.py",
line 29, in _amin
return umr_minimum(a, axis, None, out, keepdims)
ValueError: zero-size array to reduction operation minimum which has
no identity
On the second run I have:
ValueError: Variable model/w_h/Adagrad/ already exists, disallowed.
Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?
It's not clear to me why I get either of these errors. Furthermore, when I use:
for i in range(10):
points, val = potential(10)
train_feed = {X : points,values: val.reshape((10,1))}
sess.run(train_model,feed_dict = train_feed)
print(sess.run(model,feed_dict = {X:points}))
I find that on the first run, I sometimes get a network that has collapsed to the constant function with output 0. Right now my hunch is that this might simply be a numerics problem but I might be wrong.
If so, it's a serious problem as the model I have used here is very simple.
Right now my hunch is that this might simply be a numerics problem
indeed, when running potential(100) I sometimes get values as large as 1E21. The largest points will dominate your loss function and will drive the network parameters.
Even when normalizing your target values e.g. to unit variance, the problem of the largest values dominating the loss would still remain (look e.g. at plt.hist(np.log(potential(100)[1]), bins = 100)).
If you can, try learning the log of val instead of val itself. Note however that then you are changing the assumption of the loss function from 'predictions follow a normal distribution around the target values' to 'log predictions follow a normal distribution around log of the target values'.

TypeError: Image data can not convert to float. How to mitigate this error?

from numpy import mean,cov,cumsum,dot,linalg,size,flipud
import numpy as np
from pylab import imread,subplot,imshow,title,gray,figure,show,NullLocator
def princomp(A,numpc=0):
#computing eigenvalues and eigenvectors of covariance matrix
M = (A-mean(A.T,axis=1)).T # subtract the mean (along columns)
[latent,coeff] = linalg.eig(cov(M))
p = size(coeff,axis=1)
idx = np.argsort(latent) # sorting the eigenvalues
idx = idx[::-1] # in ascending order
#sorting eigenvectors according to the sorted eigenvalues
coeff = coeff[:,idx]
latent = latent[idx] # sorting eigenvalues
if numpc < p and numpc >= 0:
coeff = coeff[:,range(numpc)] # cutting some PCs if needed
score = dot(coeff.T,M) # projection of the data in the new space
return coeff,score,latent
A = imread('beatles.jpg') # load an image
A = mean(A,2) # to get a 2-D array
full_pc = size(A,axis=1) # numbers of all the principal components
i = 1
dist = []
for numpc in range(0,full_pc+10,10): # 0 10 20 ... full_pc
coeff, score, latent = princomp(A,numpc)
Ar = dot(coeff,score).T+mean(A,axis=0) # image reconstruction
# difference in Frobenius norm
dist.append(linalg.norm(A-Ar,'fro'))
# showing the pics reconstructed with less than 50 PCs
if numpc <= 50:
ax = subplot(2,3,i,frame_on=False)
ax.xaxis.set_major_locator(NullLocator()) # remove ticks
ax.yaxis.set_major_locator(NullLocator())
i += 1
imshow(flipud(Ar))
title('PCs # '+str(numpc))
gray()
figure()
imshow(flipud(A))
title('numpc FULL')
gray()
show()
The error is coming as shown above. This is a sample code on PCA image compression that I found on the net. Suggestions would be helpful. The entire error is :
Traceback (most recent call last):
File "try.py", line 36, in <module>
imshow(flipud(Ar))
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/pyplot.py", line 3157, in imshow
**kwargs)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/__init__.py", line 1898, in inner
return func(ax, *args, **kwargs)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/axes/_axes.py", line 5124, in imshow
im.set_data(X)
File "/home/user/.virtualenvs/cv/local/lib/python3.5/site-packages/matplotlib/image.py", line 596, in set_data
raise TypeError("Image data can not convert to float")
TypeError: Image data can not convert to float
You cannot plot a complex array with matplotlib imshow. So you need to take the real or imaginary part alone, e.g.
Ar = dot(coeff,score).T+mean(A,axis=0) # image reconstruction
Ar = Ar.real

How do tf.gradients() work?

I am fairly new to tensorflow, i have seen some tutorials but i dont know how tf.gradients() works. if i give it an input of two 2D matrices, how will it compute the partial derivatives? i am really confused ,please help me if you anyone could, it would be of a great help.
import tensorflow as tf
import numpy as np
X = np.random.rand(3,3)
y = np.random.rand(2,2)
grad = tf.gradients(X,y)
with tf.Session() as sess:
sess.run(grad)
print(grad)
this gives an error:
Traceback (most recent call last):
File "C:/Users/Sandeep IPK/PycharmProjects/tests/samples2.py", line 10, in
sess.run(grad)
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 767, in run
run_metadata_ptr)
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 952, in _run
fetch_handler = _FetchHandler(self._graph, fetches, feed_dict_string)
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 408, in init
self._fetch_mapper = _FetchMapper.for_fetch(fetches)
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 230, in for_fetch
return _ListFetchMapper(fetch)
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 337, in init
self._mappers = [_FetchMapper.for_fetch(fetch) for fetch in fetches]
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 337, in
self._mappers = [_FetchMapper.for_fetch(fetch) for fetch in fetches]
File "C:\Users\Sandeep IPK\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\client\session.py", line 227, in for_fetch
(fetch, type(fetch)))
TypeError: Fetch argument None has invalid type
Process finished with exit code 1
TensorFlow uses reverse accumulation which is based on the chain rule, to compute the gradient value at point. In order to compute gradient of function with respect to a variable you have to define both. Also you have to specify value at which you want to compute the gradient. In this example you compute gradient of y=x**2+x+1 with respect to x at 2:
#!/usr/bin/env python3
import tensorflow as tf
x = tf.Variable(2.0)
y = x**2 + x - 1
grad = tf.gradients(y, x)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
grad_value = sess.run(grad)
print(grad_value)
# output: [5.0]
It is also possible to compute a gradient in case your variable is a matrix. In such case the gradient will be also a matrix. Here we use a simple case when the function depends on the sum of all matrix elements:
#!/usr/bin/env python3
import tensorflow as tf
X = tf.Variable(tf.random_normal([3, 3]))
X_sum = tf.reduce_sum(X)
y = X_sum**2 + X_sum - 1
grad = tf.gradients(y, X)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
grad_value = sess.run(grad)
print(grad_value)
# output: [array([[ 9.6220665, 9.6220665, 9.6220665],
# [ 9.6220665, 9.6220665, 9.6220665],
# [ 9.6220665, 9.6220665, 9.6220665]], dtype=float32)]

Resources