Cannot calculate gradients using theano - theano

I cannot calculate gradient if I use theano.fft.rfft. Below is a minimal code example which reproduce the problem:
import theano.tensor as tt
import theano.tensor.fft
def fft_real(input):
a = input.dimshuffle('x', 0)
spec = tt.fft.rfft(a, norm='ortho')
return spec[0,:,0]
inp = tt.vector('real_n', dtype='float64')
f = fft_real(inp)
cost = f.sum()
g = tt.grad(cost, inp)
It gives an error:
File "C:\Anaconda3\envs\theano36\lib\site-packages\theano\tensor\elemwise.py", line 197, in __init__
(input_broadcastable, new_order))
ValueError: ('You cannot drop a non-broadcastable dimension.', ((False, False), [1]))
I am using theano version 1.0.2

Related

Theano error when using PyMC3: theano.gof.fg.MissingInputError

I am generating some (noisy) data-points (y) with some known parameters (m,c) that represent the equation of a straight line. Using sampling-based Bayesian methods, I now want to know the true values of parameters (m,c) from the data. Therefore, I am using DE Metropolis (PyMC3) to estimate the true parameters.
I am getting theano error theano.gof.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(c_interval__), was not provided and not given a value.
Theano version: 1.0.4
PyMC3 version: 3.9.1
import matplotlib.pyplot as plt
import numpy as np
import arviz as az
import pymc3
import theano.tensor as tt
from theano.compile.ops import as_op
plt.style.use("ggplot")
# define a theano Op for our likelihood function
class LogLike(tt.Op):
itypes = [tt.dvector] # expects a vector of parameter values when called
otypes = [tt.dscalar] # outputs a single scalar value (the log likelihood)
def __init__(self, loglike, data, x, sigma):
# add inputs as class attributes
self.likelihood = loglike
self.data = data
self.x = x
self.sigma = sigma
def perform(self, node, inputs, outputs):
# the method that is used when calling the Op
theta, = inputs # this will contain my variables
# call the log-likelihood function
logl = self.likelihood(theta, self.x, self.data, self.sigma)
outputs[0][0] = np.array(logl) # output the log-likelihood
def my_model(theta, x):
y = theta[0]*x + theta[1]
return y
def my_loglike(theta, x, data, sigma):
model = my_model(theta, x)
ll = -(0.5/sigma**2)*np.sum((data - model)**2)
return ll
# set up our data
N = 10 # number of data points
sigma = 1. # standard deviation of noise
x = np.linspace(0., 9., N)
mtrue = 0.4 # true gradient
ctrue = 3. # true y-intercept
truemodel = my_model([mtrue, ctrue], x)
# make data
np.random.seed(716742) # set random seed, so the data is reproducible each time
data = sigma*np.random.randn(N) + truemodel
print(data)
ndraws = 3000 # number of draws from the distribution
# create our Op
logl = LogLike(my_loglike, data, x, sigma)
# use PyMC3 to sampler from log-likelihood
with pymc3.Model():
# uniform priors on m and c
m = pymc3.Uniform('m', lower=-10., upper=10.)
c = pymc3.Uniform('c', lower=-10., upper=10.)
# convert m and c to a tensor vector
theta = tt.as_tensor_variable([m, c])
# use a DensityDist (use a lamdba function to "call" the Op)
pymc3.DensityDist('likelihood', lambda v: logl(v), observed={'v': theta})
step = pymc3.DEMetropolis()
trace = pymc3.sample(ndraws, step)
# plot the traces
axes = az.plot_trace(trace)
fig = axes.ravel()[0].figure
fig.savefig('./trace_plots.png')
Find the full trace here:
Population sampling (4 chains)
DEMetropolis: [c, m]
Attempting to parallelize chains to all cores. You can turn this off with `pm.sample(cores=1)`.
Population parallelization failed. Falling back to sequential stepping of chains.---------------------| 0.00% [0/4 00:00<00:00]
Sampling 4 chains for 0 tune and 4_000 draw iterations (0 + 16_000 draws total) took 5 seconds.███████| 100.00% [4000/4000 00:04<00:00]
Traceback (most recent call last):
File "test.py", line 75, in <module>
trace = pymc3.sample(ndraws, step)
File "/home/csl_user/.local/lib/python3.7/site-packages/pymc3/sampling.py", line 599, in sample
idata = arviz.from_pymc3(trace, **ikwargs)
File "/home/csl_user/.local/lib/python3.7/site-packages/arviz/data/io_pymc3.py", line 531, in from_pymc3
save_warmup=save_warmup,
File "/home/csl_user/.local/lib/python3.7/site-packages/arviz/data/io_pymc3.py", line 159, in __init__
self.observations, self.multi_observations = self.find_observations()
File "/home/csl_user/.local/lib/python3.7/site-packages/arviz/data/io_pymc3.py", line 172, in find_observations
multi_observations[key] = val.eval() if hasattr(val, "eval") else val
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/graph.py", line 522, in eval
self._fn_cache[inputs] = theano.function(inputs, self)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function.py", line 317, in function
output_keys=output_keys)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/pfunc.py", line 486, in pfunc
output_keys=output_keys)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function_module.py", line 1839, in orig_function
name=name)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function_module.py", line 1487, in __init__
accept_inplace)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/compile/function_module.py", line 181, in std_fgraph
update_mapping=update_mapping)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/fg.py", line 175, in __init__
self.__import_r__(output, reason="init")
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/fg.py", line 346, in __import_r__
self.__import__(variable.owner, reason=reason)
File "/home/csl_user/.local/lib/python3.7/site-packages/theano/gof/fg.py", line 391, in __import__
raise MissingInputError(error_msg, variable=r)
theano.gof.fg.MissingInputError: Input 0 of the graph (indices start from 0), used to compute sigmoid(c_interval__), was not provided and not given a value. Use the Theano flag exception_verbosity='high', for more information on this error.
I've run into the same problem when following the example how to sample from a black box likelihood found here:
https://docs.pymc.io/notebooks/blackbox_external_likelihood.html
This seems to be a version problem. I'm on Manjaro Linux and also ran theano 1.0.4 and pymc3 3.9 using python 3.8. I could solve the issue and make the code work by downgrading to python 3.7 and pymc3 3.8. This seems to be in issue with python 3.8, as simply downgrading pymc3 did not solve the issue for me. I am far from an expert in pymc3 so I don't have a solution how to fix this issue using the newest versions, but for now downgrading makes my simulations run.
Hope this helps.
Edit: The devs seem to be aware of this, there is a an open issue on their github page
https://github.com/pymc-devs/pymc3/issues/4002

Why i am getting "NotImplementedError()" when building a custom optimizer in Tensorflow

I am working on image classification and i am trying to implement a custom optimizer(based on a paper published on ELSEVIER) in Tensorflow,
I tried to modify the code as below: I have some other functions but it is all related with preprocessing and model architecture etc. My optimizer code follows;
import os
os.environ['TF_KERAS'] = '1'
from tensorflow import keras
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
import cv2
import imutils
import matplotlib.pyplot as plt
from os import listdir
from sklearn.metrics import confusion_matrix,classification_report
import logging, warnings
import numpy as np
from tensorflow.python.training import optimizer
from tensorflow.python.ops import math_ops, state_ops, control_flow_ops, variable_scope
from tensorflow.python.framework import ops
class BPVAM(optimizer.Optimizer):
"""Back-propagation algorithm with variable adaptive momentum.
Variables are updated in two steps:
1) v(t + 1) = alpha * v(t)- lr * g(t)
2) w(t + 1) = w(t) + v(t + 1)
where
- v(t + 1): delta for update at step t + 1
- w(t + 1): weights at step t + 1 (after update)
- g(t): gradients at step t.
- lr: learning rate
- alpha: momentum parameter
In the algorithm alpha is not fixed. It is variable and it is parametrized by:
alpha(t) = lambda / (1 - beta ^ t)
"""
def __init__(
self,
lr: float = 0.001,
lam: float = 0.02,
beta: float = 0.998,
use_locking: bool = False,
name: str = 'BPVAM'
):
"""
Args:
lr: learning rate
lam: momentum parameter
beta: momentum parameter
use_locking:
name:
"""
super(BPVAM, self).__init__(use_locking, name)
self._lr = lr
self._lambda = lam
self._beta = beta
self._lr_tensor = None
self._lambda_tensor = None
self._beta_tensor = None
def _create_slots(self, var_list):
for v in var_list:
self._zeros_slot(v, 'v', self._name)
self._get_or_make_slot(v,
ops.convert_to_tensor(self._beta),
'beta',
self._name)
def _prepare(self):
self._lr_tensor = ops.convert_to_tensor(self._lr, name='lr')
self._lambda_tensor = ops.convert_to_tensor(self._lambda, name='lambda')
def _apply_dense(self, grad, var):
lr_t = math_ops.cast(self._lr_tensor, var.dtype.base_dtype)
lambda_t = math_ops.cast(self._lambda_tensor, var.dtype.base_dtype)
v = self.get_slot(var, 'v')
betas = self.get_slot(var, 'beta')
beta_t = state_ops.assign(betas, betas * betas)
alpha = lambda_t / (1 - beta_t)
v_t = state_ops.assign(v, alpha * v - lr_t * grad)
var_update = state_ops.assign_add(var, v_t, use_locking=self._use_locking)
return control_flow_ops.group(*[beta_t, v_t, var_update])
After i create my optimizer and run;
myopt = BPVAM()
model.compile(optimizer= myopt, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
I got this error message;
Traceback (most recent call last):
File "/Users/classification.py", line 264, in <module>model.fit(x=X_train,y=y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val))
File"/Users/ venv/lib/python3.7/sitepackages/tensorflow/python/keras/engine/training.py", line 780, in fit steps_name='steps_per_epoch')
File"/Users/venv/lib/python3.7/sitepackages/tensorflow/python/keras/engine/training_arrays.py", line 157, in model_iteration f = _make_execution_function(model, mode)
File"/Users/ venv/lib/python3.7/sitepackages/tensorflow/python/keras/engine/training_arrays.py", line 532, in _make_execution_function return model._make_execution_function(mode)
File"/Users/ venv/lib/python3.7/sitepackages/tensorflow/python/keras/engine/training.py", line 2276, in _make_execution_function self._make_train_function()
File"/Users/ venv/lib/python3.7/sitepackages/tensorflow/python/keras/engine/training.py", line 2219, in _make_train_function params=self._collected_trainable_weights, loss=self.total_loss)
File "/Users/ venv/lib/python3.7/site-packages/tensorflow/python/keras/optimizers.py", line 753, in get_updates grads, global_step=self.iterations)
File "/Users/ venv/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py", line 614, in apply_gradients update_ops.append(processor.update_op(self, grad))
File "/Users/venv/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py", line 171, in update_op update_op = optimizer._resource_apply_dense(g, self._v)
File "/Users/venv/lib/python3.7/site-packages/tensorflow/python/training/optimizer.py", line 954, in _resource_apply_dense
raise NotImplementedError()
NotImplementedError
I can not understand where the problem is. I am using Tensorflow 1.14.0 version and python 3.7. I created virtual environment and tried other tensorflow and python versions but it still does not work.
In order to use a class that inherits from tensorflow.python.training.optimizer.Optimizer you have to implement at least the following methods:
_apply_dense
_resource_apply_dense
_apply_sparse
Check out the source code of the Optimizer for more information.
Since you try to implement a custom momentum method you might want to subclass MomentumOptimizer directly.

Solving simple chemical network odes in pymc3 with theano

Im trying to solve a simple chemical network A->B(reaction rate k1) and A1->B(reaction rate k2) with Bayesian inference. My hopes are to get sensitivity analysis of k1 and k2. If A, A1 and B are my constant variables only logical thing would be that if for example k1 decreases k2 should increase for some proportional amount and vice versa. But I am having some troubles with ODE's in pymc3. So here is my attempt:
import numpy as np
import matplotlib.pyplot as plt
from scipy.integrate import odeint, solve_ivp
import seaborn
import pymc3 as pm
import theano.tensor as T
from theano.compile.ops import as_op
from sys import exit
time = 10
Nt = 11
tt = np.linspace(0,time, Nt)
y0 = [1,2,0]
k1, k2 = 1, 1
#Actual Solution of the Differential Equation(Used to generate data)
def real(t,c):
da_dt = -k1*c[0]
da1_dt = -k2*c[1]
db_dt = k1*c[0] + k2*c[1]
return da_dt, da1_dt, db_dt
c_est = solve_ivp(real, t_span = [0,time], t_eval = tt, y0 = y0)
#Method For Solving the ODE
def lv(xdata, k1=1, k2=1):
def equat(c,t):
da_dt = -k1*c[0]
da1_dt = -k2*c[1]
db_dt = k1*c[0] + k2*c[1]
return da_dt, da1_dt, db_dt
Y, dict = odeint(equat,y0,xdata,full_output=True)
return Y
#Generating Data for Bayesian Inference
k1, k2 = 1, 1
ydata = c_est.y
# Adding some error to the ydata points
yerror = 10*np.random.rand(Nt)
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)
#as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])
def func(al,be):
Q = lv(tt, k1=al, k2=be)
return np.ravel(Q)
# Number of Samples and Initial Conditions
nsample = 5000
y0 = 1.0
sd = 0.2
# Model for Bayesian Inference
model = pm.Model()
with model:
# Priors for unknown model parameters
k1 = pm.HalfNormal('k1', sd = sd)
k2 = pm.HalfNormal('k2', sd = sd)
# Expected value of outcome
mu = func(k1,k2)
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal('Y_obs', mu=mu, sd=yerror, observed=y_data)
trace = pm.sample(nsample, nchains=1)
pm.traceplot(trace)
plt.show()
But it doesn't "loop" through equat function. Output error:
Traceback (most recent call last):
File "<ipython-input-16-14ca425a8735>", line 1, in <module>
runfile('/folder/code.py', wdir='/folder')
File "/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 786, in runfile
execfile(filename, namespace)
File "/anaconda3/lib/python3.7/site-packages/spyder_kernels/customize/spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "/code.py", line 77, in <module>
mu = func(k1,k2)
File "/anaconda3/lib/python3.7/site-packages/theano/gof/op.py", line 674, in __call__
required = thunk()
File "/anaconda3/lib/python3.7/site-packages/theano/gof/op.py", line 892, in rval
r = p(n, [x[0] for x in i], o)
File "/anaconda3/lib/python3.7/site-packages/theano/compile/ops.py", line 555, in perform
outs = self.__fn(*inputs)
File "/code.py", line 60, in func
Q = lv(tt, k1=al, k2=be)
File "/code.py", line 42, in lv
Y, dict = odeint(equat,y0,xdata,full_output=True)
File "/anaconda3/lib/python3.7/site-packages/scipy/integrate/odepack.py", line 233, in odeint
int(bool(tfirst)))
File "/code.py", line 39, in equat
da1_dt = -k2*c[1]
IndexError: index 1 is out of bounds for axis 0 with size 1
I'm going nuts here. :( I don't even know if I am on the right path.
Edit, corrected that but now it shows another error.
If anyone else has difficulty here I solved it!
from scipy.integrate import odeint, solve_ivp
import numpy as np
import matplotlib.pyplot as plt
from theano.compile.ops import as_op
import theano.tensor as T
import pymc3 as pm
import copy
from sys import exit
time = 10
Nt = 11
tt = np.linspace(0,time, Nt+1)
y0 = [1,2,0]
k1, k2 = 1, 1
def real_equat(t,c):
da_dt = -k1*c[0]
da1_dt = -k2*c[1]
db_dt = k1*c[0] + k2*c[1]
return da_dt, da1_dt, db_dt
z = solve_ivp(real_equat, t_span=[0,time], t_eval= tt, y0 = y0)
def lv(xdata, k1=k1, k2=k2):
def equat(c,t):
da_dt = -k1*c[0]
da1_dt = -k2*c[1]
db_dt = k1*c[0] + k2*c[1]
return da_dt, da1_dt, db_dt
Y, dict = odeint(equat,y0,tt,full_output=True)
return Y
a = z.y
ydata = copy.copy(a)
yerror = 10*np.random.rand(Nt+1)
ydata += np.random.normal(0.0, np.sqrt(yerror))
ydata = np.ravel(ydata)
#as_op(itypes=[T.dscalar, T.dscalar], otypes=[T.dvector])
def func(al,be):
Q = lv(tt, k1 = al, k2 = be)
return np.ravel(Q)
niter = 10
model = pm.Model()
with model:
# Priors for unknown model parameters
k1 = pm.Uniform('k1', upper = 1.2, lower = 0.8)
k2 = pm.Uniform('k2', upper = 1.2, lower = 0.8)
# Expected value of outcome
mu = func(k1,k2)
# Likelihood (sampling distribution) of observations
Y_obs = pm.Normal('Y_obs', mu=mu, sd=0.2, observed=ydata)
trace = pm.sample(niter = niter, nchains=4)

Using a `tf.Tensor` as a Python `bool` is not allowed. Use `if t is not None:` instead of `if t:` to test if a tensor is defined,

I am trying to define func(x) in order to use the genetic algs library here:
https://github.com/bobirdmi/genetic-algorithms/tree/master/examples
However, when I try and use sga.init_random_population(population_size, params, interval) the code complains of me using tf.Tensors as python bools.
However, I am only referencing one bool in the entire code (Elitism) so I have no idea why this error is even showing. Asked around others who used sga.init_... and my inputs/setup is fine. Any suggestions would be greatly appreciated.
Full traceback:
Traceback (most recent call last):
File "C:\Users\Eric\eclipse-workspace\hw1\ga2.py", line 74, in <module>
sga.init_random_population(population_size, params, interval)
File "C:\Program Files\Python36\lib\site-packages\geneticalgs\real_ga.py", line 346, in init_random_population
self._sort_population()
File "C:\Program Files\Python36\lib\site-packages\geneticalgs\standard_ga.py", line 386, in _sort_population
self.population.sort(key=lambda x: x.fitness_val, reverse=True)
File "C:\Program Files\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 671, in __bool__
raise TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. "
TypeError: Using a `tf.Tensor` as a Python `bool` is not allowed. Use `if t is not None:` instead of `if t:` to test if a tensor is defined, and use TensorFlow ops such as tf.cond to execute subgraphs conditioned on the value of a tensor.
code
import hw1
#import matplotlib
from geneticalgs import BinaryGA, RealGA, DiffusionGA, MigrationGA
#import numpy as np
#import csv
#import time
#import pickle
#import math
#import matplotlib.pyplot as plt
from keras.optimizers import Adam
from hw1 import x_train, y_train, x_test, y_test
from keras.losses import mean_squared_error
#import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
# GA standard settings
generation_num = 50
population_size = 16
elitism = True
selection = 'rank'
tournament_size = None # in case of tournament selection
mut_type = 1
mut_prob = 0.05
cross_type = 1
cross_prob = 0.95
optim = 'min' # minimize or maximize a fitness value? May be 'min' or 'max'.
interval = (-1, 1)
# Migration GA settings
period = 5
migrant_num = 3
cloning = True
def func(x):
#dimensions of weights and biases
#layer0weights = [10][23]
#layer0biases = [10]
#layer1weights = [10][20]
#layer1biases = [20]
#layer2weights = [1][20]
#layer2biases = [1]
#split up x for weights and biases
lay0 = x[0:230]
bias0 = x[230:240]
lay1 = x[240:440]
bias1 = x[440:460]
lay2 = x[460:480]
bias2 = x[480:481]
#fit to the shape of the actual model
lay0 = lay0.reshape(23,10)
bias0 = bias0.reshape(10,)
lay1 = lay1.reshape(10,20)
bias1 = bias1.reshape(20,)
lay2 = lay2.reshape(20,1)
bias2 = bias2.reshape(1,)
#set the newly shaped object to layers
hw1.model.layers[0].set_weights([lay0, bias0])
hw1.model.layers[1].set_weights([lay1, bias1])
hw1.model.layers[2].set_weights([lay2, bias2])
res = hw1.model.predict(x_train)
error = mean_squared_error(res,y_train)
return error
ga_model = Sequential()
ga_model.add(Dense(10, input_dim=23, activation='relu'))
ga_model.add(Dense(20, activation='relu'))
ga_model.add(Dense(1, activation='sigmoid'))
sga = RealGA(func, optim=optim, elitism=elitism, selection=selection,
mut_type=mut_type, mut_prob=mut_prob,
cross_type=cross_type, cross_prob=cross_prob)
params = 481
sga.init_random_population(population_size, params, interval)
optimal = sga.best_solution[0]
predict = func(optimal)
print(predict)
Tensorflow generates a computational graph of operations to be executed in an Tensorflow session.
geneticalgs.RealGA.init_random_population is an operation that uses the numpy.random.uniform to generate a numpy array. 1
The generated population being a Tensor object could mean maybe:
numpy.random.uniform invoked in geneticalgs.RealGA.init_random_population was decorated to return Tensors
numpy.random.uniform was added in the computation graph to be executed in a session.
I'll try executing the program eagerly by enabling eager execution. 2
tf.enable_execution()
You can also in a way execute the parts that you care about eagerly.
size = tf.placeholder(tf.int64)
dim = tf.placeholder(tf.int64)
interval = tf.placeholder(tf.int64, shape=(2,))
init_random_population = tf.py_func(
sga.init_random_population, [size, dim, interval], [])
with tf.Session() as session:
session.run(
init_random_population,
{size: population_size, dim: params, interval: interval})

Tensorflow Adagrad optimizer isn't working

When I run the following script, I notice the following couple of errors:
import tensorflow as tf
import numpy as np
import seaborn as sns
import random
#set random seed:
random.seed(42)
def potential(N):
points = np.random.rand(N,2)*10
values = np.array([np.exp((points[i][0]-5.0)**2 + (points[i][1]-5.0)**2) for i in range(N)])
return points, values
def init_weights(shape,var_name):
"""
Xavier initialisation of neural networks
"""
init = tf.contrib.layers.xavier_initializer()
return tf.get_variable(initializer=init,name = var_name,shape=shape)
def neural_net(X):
with tf.variable_scope("model",reuse=tf.AUTO_REUSE):
w_h = init_weights([2,10],"w_h")
w_h2 = init_weights([10,10],"w_h2")
w_o = init_weights([10,1],"w_o")
### bias terms:
bias_1 = init_weights([10],"bias_1")
bias_2 = init_weights([10],"bias_2")
bias_3 = init_weights([1],"bias_3")
h = tf.nn.relu(tf.add(tf.matmul(X, w_h),bias_1))
h2 = tf.nn.relu(tf.add(tf.matmul(h, w_h2),bias_2))
return tf.nn.relu(tf.add(tf.matmul(h2, w_o),bias_3))
X = tf.placeholder(tf.float32, [None, 2])
with tf.Session() as sess:
model = neural_net(X)
## define optimizer:
opt = tf.train.AdagradOptimizer(0.0001)
values =tf.placeholder(tf.float32, [None, 1])
squared_loss = tf.reduce_mean(tf.square(model-values))
## define model variables:
model_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,"model")
train_model = opt.minimize(squared_loss,var_list=model_vars)
sess.run(tf.global_variables_initializer())
for i in range(10):
points, val = potential(100)
train_feed = {X : points,values: val.reshape((100,1))}
sess.run(train_model,feed_dict = train_feed)
print(sess.run(model,feed_dict = {X:points}))
### plot the approximating model:
res = 0.1
xy = np.mgrid[0:10:res, 0:10:res].reshape(2,-1).T
values = sess.run(model, feed_dict={X: xy})
sns.heatmap(values.reshape((int(10/res),int(10/res))),xticklabels=False,yticklabels=False)
On the first run I get:
[nan] [nan] [nan] [nan] [nan] [nan] [nan]] Traceback (most
recent call last):
...
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/seaborn/matrix.py",
line 485, in heatmap
yticklabels, mask)
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/seaborn/matrix.py",
line 167, in init
cmap, center, robust)
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/seaborn/matrix.py",
line 206, in _determine_cmap_params
vmin = np.percentile(calc_data, 2) if robust else calc_data.min()
File
"/Users/aidanrockea/anaconda/lib/python3.6/site-packages/numpy/core/_methods.py",
line 29, in _amin
return umr_minimum(a, axis, None, out, keepdims)
ValueError: zero-size array to reduction operation minimum which has
no identity
On the second run I have:
ValueError: Variable model/w_h/Adagrad/ already exists, disallowed.
Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?
It's not clear to me why I get either of these errors. Furthermore, when I use:
for i in range(10):
points, val = potential(10)
train_feed = {X : points,values: val.reshape((10,1))}
sess.run(train_model,feed_dict = train_feed)
print(sess.run(model,feed_dict = {X:points}))
I find that on the first run, I sometimes get a network that has collapsed to the constant function with output 0. Right now my hunch is that this might simply be a numerics problem but I might be wrong.
If so, it's a serious problem as the model I have used here is very simple.
Right now my hunch is that this might simply be a numerics problem
indeed, when running potential(100) I sometimes get values as large as 1E21. The largest points will dominate your loss function and will drive the network parameters.
Even when normalizing your target values e.g. to unit variance, the problem of the largest values dominating the loss would still remain (look e.g. at plt.hist(np.log(potential(100)[1]), bins = 100)).
If you can, try learning the log of val instead of val itself. Note however that then you are changing the assumption of the loss function from 'predictions follow a normal distribution around the target values' to 'log predictions follow a normal distribution around log of the target values'.

Resources