A gradient clipping in Chainer

A gradient clipping in Chainer - pytorch

Can I get a Gradient Clipping function in Chainer?
I found some codes in Pytorch Documentation : https://pytorch.org/docs/stable/_modules/torch/nn/utils/clip_grad.html
Is there anything like alternative function in Chainer?
I just found chainer.optimizer_hooks.GradientClipping, but this is hard to utilize.
Thanks in advance.

how about trying this.
just I just re-wrote pyTorch function in Chainer style.
import cupy
def clip_grad_norm(model, max_norm, norm_type=2):
params = list( filter(lambda p : p.grad is not None , model.params()) )
max_norm = float(max_norm)
norm_type = float(norm_type)
total_norm = 0.0
for p in params:
g = p.grad
norm = cupy.linalg.norm(g)
total_norm += norm**(norm_type)
total_norm = total_norm **(1/norm_type)
clip_coef = max_norm / (total_norm + 1e-6)
if clip_coef < 1:
for p in params:
g = p.grad
p.grad = g * clip_coef

Related

How to remove inplace operation error in Pytorch?

I get this error from the following Pytorch code:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [3]] is at version 10; expected version 9 instead.
As it is seen the code does not have inplace operations.
import torch
device = torch.device('cpu')
class MesNet(torch.nn.Module):
def __init__(self):
super(MesNet, self).__init__()
self.cov_lin = torch.nn.Sequential(torch.nn.Linear(6, 5)).double()
def forward(self, u):
z_cov = self.cov_lin(u.transpose(0, 2).squeeze(-1))
return z_cov
class UpdateModel(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
self.P_dim = 18
self.Id3 = torch.eye(3).double()
def run_KF(self):
N = 10
u = torch.randn(N, 6).double()
v = torch.zeros(N, 3).double()
model = MesNet()
measurements_covs_l = model(u.t().unsqueeze(0))
# remember to remove this afterwards
torch.autograd.set_detect_anomaly(True)
for i in range(1, N):
v[i] = self.update_pos(v[i].detach(), measurements_covs_l[i-1])
criterion = torch.nn.MSELoss(reduction="sum")
targ = torch.rand(10, 3).double()
loss = criterion(v, targ)
loss = torch.mean(loss)
loss.backward()
return v, p
def update_pos(self, v, measurement_cov):
Omega = torch.eye(3).double()
H = torch.ones((5, self.P_dim)).double()
R = torch.diag(measurement_cov)
Kt = H.t().mm(torch.inverse(R))
# it is indicating inplace error even with this:
# Kt = H.t().mm(R)
dx = Kt.mv(torch.ones(5).double())
dR = self.trans(dx[:9].clone())
v_up = dR.mv(v)
return v_up
def trans(self, xi):
phi = xi[:3].clone()
angle = torch.norm(phi.clone())
if angle.abs().lt(1e-10):
skew_phi = torch.eye(3).double()
J = self.Id3 + 0.5 * skew_phi
Rot = self.Id3 + skew_phi
else:
axis = phi / angle
skew_axis = torch.eye(3).double()
s = torch.sin(angle)
c = torch.cos(angle)
Rot = c * self.Id3
return Rot
net = UpdateModel()
net.run_KF()

I think the issue is that you are overwriting v[i] elements.
You could instead construct an auxiliary list v_ from the loop, then convert it tensor:
v_ = [v[0]]
for i in range(1, N):
v_.append(self.update_pos(v[i].detach(), measurements_covs_l[i-1]))
v = torch.stack(v_)

convert Tensorflow1 to Tensorflow 2

there is a code written with tensorflow1 on this link.
https://github.com/carlthome/tensorflow-convlstm-cell/blob/master/cell.py
I want to use this class as a layer in TensorFlow.Keras. So it should be written with TensorFlow version 2.
How can do it?
this is this code:
import tensorflow as tf
class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
"""A LSTM cell with convolutions instead of multiplications.
Reference:
Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
"""
def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
super(ConvLSTMCell, self).__init__(_reuse=reuse)
self._kernel = kernel
self._filters = filters
self._forget_bias = forget_bias
self._activation = activation
self._normalize = normalize
self._peephole = peephole
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters])
self._feature_axis = self._size.ndims
self._data_format = None
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0
self._data_format = 'NC'
else:
raise ValueError('Unknown data_format')
#property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
#property
def output_size(self):
return self._size
def call(self, x, state):
c, h = state
x = tf.concat([x, h], axis=self._feature_axis)
n = x.shape[-1].value
m = 4 * self._filters if self._filters > 1 else 4
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
if not self._normalize:
y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
if self._peephole:
i += tf.get_variable('W_ci', c.shape[1:]) * c
f += tf.get_variable('W_cf', c.shape[1:]) * c
if self._normalize:
j = tf.contrib.layers.layer_norm(j)
i = tf.contrib.layers.layer_norm(i)
f = tf.contrib.layers.layer_norm(f)
f = tf.sigmoid(f + self._forget_bias)
i = tf.sigmoid(i)
c = c * f + i * self._activation(j)
if self._peephole:
o += tf.get_variable('W_co', c.shape[1:]) * c
if self._normalize:
o = tf.contrib.layers.layer_norm(o)
c = tf.contrib.layers.layer_norm(c)
o = tf.sigmoid(o)
h = o * self._activation(c)
state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
return h, state

How to resolve value error in Scipy function fmintnc?

I am trying to implement coursera assignments in python, while doing Scipy optimise for logistic regression. However, I am getting the error below.
Can any one help!
Note: cost, gradient functions are working fine.
#Sigmoid function
def sigmoid(z):
h_of_z = np.zeros([z.shape[0]])
h_of_z = np.divide(1,(1+(np.exp(-z))))
return h_of_z
def cost(x,y,theta):
m = y.shape[0]
h_of_x = sigmoid(np.matmul(x,theta))
term1 = sum(-1 * y.T # np.log(h_of_x) - (1-y.T) # np.log(1-h_of_x))
J = 1/m * term1
return J
def grad(x,y,theta):
grad = np.zeros_like(theta)
m = y.shape[0]
h_of_x = sigmoid(x#theta)
grad = (x.T # (h_of_x - y)) * (1/m)
return grad
#add intercept term for X
x = np.hstack([np.ones_like(y),X[:,0:2]])
#initialise theta
[m,n] = np.shape(x)
initial_theta = np.zeros([n,1])
#optimising theta from given theta and gradient
result = opt.fmin_tnc(func=cost, x0=initial_theta, args=(x, y))
ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 99 is different from 3)

I got it !
so the problem is fmin_tnc function programmed in a way we should parse the the parameter 'theta' before calling arguments x and y .
Since in my function 'cost' I have passed x and y first, it interpreted values differently so thrown ValueError .
Below are the corrected code..
def sigmoid(x):
return 1/(1+np.exp(-x))
def cost(theta,x,y):
J = (-1/m) * np.sum(np.multiply(y, np.log(sigmoid(x # theta)))
+ np.multiply((1-y), np.log(1 - sigmoid(x # theta))))
return J
def gradient(theta,x,y):
h_of_x = sigmoid(x#theta)
grad = 1 / m * (x.T # (h_of_x - y))
return grad
#initialise theta
init_theta = np.zeros([n+1,1])
#optimise theta
from scipy import optimize as op
result = op.fmin_tnc(func=cost,
x0=init_theta.flatten(),
fprime=gradient,
args=(x,y.flatten()))

PyMC3- Custom theano Op to do numerical integration

I am using PyMC3 for parameter estimation using a particular likelihood function which has to be defined. I googled it and found out that I should use the densitydist method for implementing the user defined likelihood functions but it is not working. How to incorporate a user defined likelihood function in PyMC3 and to find out the maximum a posteriori (MAP) estimate for my model? My code is given below. Here L is the analytic form of my Likelihood function. I have some observational data for the radial velocity(vr) and postion (r) for some objects, which is imported from excel file.
data_ = np.array(pandas.read_excel('aaa.xlsx',header=None))
gamma=3.77;
G = 4.302*10**-6;
rmin = 3.0;
R = 95.7;
vr=data_[:,1];
r= data_[:,0];
h= np.pi;
class integrateOut(theano.Op):
def __init__(self,f,t,t0,tf,*args,**kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self,*inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f,self.fvars)
except:
self.gradF = None
return theano.Apply(self,self.fvars,[tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t]+self.fvars,self.f)
output_storage[0][0] = quad(f,self.t0,self.tf,args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g,self.t,self.t0,self.tf)(*inputs)*grads[0] \
for g in self.gradF]
basic_model = pm.Model()
with basic_model:
M=[]
beta=[]
interval=0.01*10**12
M=pm.Uniform('M',
lower=0.5*10**12,upper=3.50*10**12,transform='interval')
beta=pm.Uniform('beta',lower=2.001,upper=2.999,transform='interval')
gamma=3.77
logp=[]
arr=[]
vnew=[]
rnew=[]
theta = tt.scalar('theta')
beta = tt.scalar('beta')
z = tt.cos(theta)**(2*( (gamma/(beta - 2)) - 3/2) + 3)
intZ = integrateOut(z,theta,-(np.pi)/2,(np.pi)/2)(beta)
gradIntZ = tt.grad(intZ,[beta])
funcIntZ = theano.function([beta],intZ)
funcGradIntZ = theano.function([beta],gradIntZ)
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
rnew.append(r[j]+(0.05*r[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
vn=np.array(vnew)
rn=np.array(rnew)
for beta in np.arange (2.01,2.99,0.01):
for M in np.arange (0.5,2.50,0.01):
i=np.arange(0,59,1)
q =( gamma/(beta - 2)) - 3/2
B = (G*M*10**12)/((beta -2 )*( R**(3 - beta)))
K = (gamma - 3)/((rmin**(3 - gamma))*funcIntZ(beta)*m.sqrt(2*B))
logp= -np.log(K*((1 -(( 1/(2*B) )*((vn[i]**2)*rn[i]**(beta -
2))))**(q+1))*(rn[i]**(1-gamma +(beta/2))))
arr.append(logp.sum())
def logp_func(rn,vn):
return min(np.array(arr))
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP(model=basic_model)
step = pm.Metropolis()
basicmodeltrace = pm.sample(10000, step=step,
start=start,random_seed=1,progressbar=True)
print(pm.summary(basicmodeltrace))
map_estimate = pm.find_MAP(model=basic_model)
print(map_estimate)
I am getting the following error message:
ValueError: Cannot compute test value: input 0 (theta) of Op
Elemwise{cos,no_inplace}(theta) missing default value.
Backtrace when that variable is created:
I am unable to get the output since the numerical integration is not working. I have used custom theano op for numerical integration code which i got from Custom Theano Op to do numerical integration . The integration works if I run it seperately inputting a particular value of beta, but not within the model.

I made a few changes to your code, this still does not work, but I hope it is closer to a solution. Please check this thread, as someone is trying so solve essentially the same problem.
class integrateOut(theano.Op):
def __init__(self, f, t, t0, tf,*args, **kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self, *inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f, self.fvars)
except:
self.gradF = None
return theano.Apply(self, self.fvars, [tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t] + self.fvars,self.f)
output_storage[0][0] = quad(f, self.t0, self.tf, args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g, self.t, self.t0, self.tf)(*inputs)*grads[0] \
for g in self.gradF]
gamma = 3.77
G = 4.302E-6
rmin = 3.0
R = 95.7
vr = data[:,1]
r = data[:,0]
h = np.pi
interval = 1E10
vnew = []
rnew = []
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
rnew.append(r[j]+(0.05*r[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
vn = np.array(vnew)
rn = np.array(rnew)
def integ(gamma, beta, theta):
z = tt.cos(theta)**(2*((gamma/(beta - 2)) - 3/2) + 3)
return integrateOut(z, theta, -(np.pi)/2, (np.pi)/2)(beta)
with pm.Model() as basic_model:
M = pm.Uniform('M', lower=0.5*10**12, upper=3.50*10**12)
beta = pm.Uniform('beta', lower=2.001, upper=2.999)
theta = pm.Normal('theta', 0, 10**2)
def logp_func(rn,vn):
q = (gamma/(beta - 2)) - 3/2
B = (G*M*1E12) / ((beta -2 )*(R**(3 - beta)))
K = (gamma - 3) / ((rmin**(3 - gamma)) * integ(gamma, beta, theta) * (2*B)**0.5)
logp = - np.log(K*((1 -((1/(2*B))*((vn**2)*rn**(beta -
2))))**(q+1))*(rn**(1-gamma +(beta/2))))
return logp.sum()
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP()
#basicmodeltrace = pm.sample()
print(start)

A weird error with updates in theano

I designed a variable net, but it occurred some problems with theano. The general idea is that different input will get different net with same parameters, something like a recursive neural network with auto-encoder.
There are two cases in my code, one case is run combine_feat_gt1_1() if c > 1, the other case is run combine_feat_gt1_0().
It is weird that the code can run without bugs if I comment updates=updates, which is not my expected (train_test theano function in code). However, if I uncomment updates=updates, an error occurred (train_test_bug theano function in code). The later one is that I'd like to implement.
I have been already spend some days on this bug. Who can help me? I will appreciate that.
import os
import sys
import numpy
import theano
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams
from theano.ifelse import ifelse
class Test(object):
def __init__(
self,
numpy_rng,
input=None,
output=None,
n_output=6,
n_input=3,
n_group=2,
W_r=None,
b_r=None
):
self.n_output = n_output
self.n_input = n_input
self.n_group = n_group
if not W_r:
initial_W_r = numpy.asarray(
numpy_rng.uniform(
low=-4 * numpy.sqrt(6. / (n_input + n_input)),
high=4 * numpy.sqrt(6. / (n_input + n_input)),
size=(n_input, n_input)
),
dtype=theano.config.floatX
)
W_r = theano.shared(value=initial_W_r, name='W_r', borrow=True)
if not b_r:
b_r = theano.shared(
value=numpy.zeros(
n_input,
dtype=theano.config.floatX
),
borrow=True
)
self.W_r = W_r
self.b_r = b_r
if input is None:
self.x = T.tensor4(name='input', dtype=theano.config.floatX)
else:
self.x = input
if output is None:
self.y = T.matrix(name='output', dtype=theano.config.floatX)
else:
self.y = output
self.params = [self.W_r, self.b_r]
def get_output_values(self, input):
a, b, c, d = input.shape
def recusive(x_t, h_tm1, wr, hr):
h_t = T.dot(h_tm1, wr) + T.dot(x_t, wr) + hr
return h_t
def combine_recusive(data):
hidden, _ = theano.scan(fn=recusive,
sequences=data[1:],
outputs_info=data[0],
non_sequences=[self.W_r, self.b_r],
n_steps=data[1:].shape[0],
strict=True)
return hidden[-1]
def combine_feat_gt1_1(input):
feats, _ = theano.scan(fn=combine_recusive,
sequences=input[0],
outputs_info=None,
n_steps=input[0].shape[0])
recusive_flag = T.ones(1)
return T.reshape(feats, (1,-1)) # concatenation
def combine_feat_gt1_0(input):
feats = input[0]
recusive_flag = T.zeros(1)
return T.reshape(feats, (1,-1)) # concatenation
feat = ifelse(T.gt(c, 1), combine_feat_gt1_1(input), combine_feat_gt1_0(input))
# debug code snippet
self.debug_ifelse = theano.function([input], T.gt(c, 1))
self.debug_1_0 = theano.function([input], ifelse(T.gt(c, 1), 1, 0))
return feat
def get_cost_updates(self):
learning_rate = 0.1
self.y_given_x = self.get_output_values(self.x)
cost = T.sum(( self.y_given_x - self.y) ** 2)
gparams = T.grad(cost, self.params)
updates = [
(param, param - learning_rate * gparam)
for param, gparam in zip(self.params, gparams)
]
return (cost, updates)
if __name__ == "__main__":
toy_data = numpy.array([[[[1,1,1],[2,2,2]], [[3, 4,5],[4,5,6]]]],dtype=theano.config.floatX)
lable = numpy.array([[1,2,3,4,5,6]],dtype=theano.config.floatX)
toy_data2 = numpy.array([[[[1,1,1]], [[3,4,5]]]],dtype=theano.config.floatX)
lable2 = numpy.array([[6,5,4,3,2,1]],dtype=theano.config.floatX)
x = T.tensor4('x', dtype=theano.config.floatX)
y = T.matrix('y', dtype=theano.config.floatX)
newX = T.tensor4(dtype=x.dtype)
newY = T.matrix(dtype=y.dtype)
rng = numpy.random.RandomState(123)
test = Test(
numpy_rng=rng,
input=x,
output=y,
n_group=2,
n_input=3,
n_output=6
)
cost, updates= test.get_cost_updates()
train_test = theano.function(
[newX, newY],
cost,
# updates=updates,
givens={
x : newX,
y : newY
}
)
train_test_bug = theano.function(
[newX, newY],
cost,
updates=updates,
givens={
x : newX,
y : newY
}
)
print train_test(toy_data, lable)
print train_test(toy_data2, lable2)
# code with bug
# print train_test_bug(toy_data, lable)
# print train_test_bug(toy_data2, lable2)
EDIT (by #danielrenshaw)
I've cut the code down to a simpler demonstration of the problem.
The cause is in the gradient computation of a double-nested scan expression. The problem disappears when a modified inner-most recursive expression is used (see comments in first function below).
import numpy
import theano
import theano.tensor as tt
import theano.ifelse
def inner_scan_step(x_t_t, h_tm1, w):
# Fails when using this recursive expression
h_t = tt.dot(h_tm1, w) + x_t_t
# No failure when using this recursive expression
# h_t = h_tm1 + tt.dot(x_t_t, w)
return h_t
def outer_scan_step(x_t, w):
h, _ = theano.scan(inner_scan_step,
sequences=[x_t[1:]],
outputs_info=[x_t[0]],
non_sequences=[w],
strict=True)
return h[-1]
def get_outputs(x, w):
features, _ = theano.scan(outer_scan_step,
sequences=[x],
non_sequences=[w],
strict=True)
return tt.grad(features.sum(), w)
def main():
theano.config.compute_test_value = 'raise'
x_value = numpy.arange(12, dtype=theano.config.floatX).reshape((2, 2, 3))
x = tt.tensor3()
x.tag.test_value = x_value
w = theano.shared(value=numpy.ones((3, 3), dtype=theano.config.floatX), borrow=True)
f = theano.function(inputs=[x], outputs=get_outputs(x, w))
print f(x_value)
if __name__ == "__main__":
main()

I solved this problem edited by danielrenshaw. When I add h0 as outputs_info, it work. Before that I used first element of sequence as outputs_info, I think it caused the error. But I still cannot solve my original problem.
import numpy
import theano
import theano.tensor as tt
import theano.ifelse
def inner_scan_step(x_t_t, h_tm1, w):
# Fails when using this recursive expression
h_t = tt.dot(h_tm1, w) + x_t_t
# No failure when using this recursive expression
# h_t = h_tm1 + tt.dot(x_t_t, w)
return h_t
def outer_scan_step(x_t, w, h0):
h, _ = theano.scan(inner_scan_step,
sequences=[x_t],
outputs_info=[h0],
non_sequences=[w],
strict=True)
return h[-1]
def get_outputs(x, w, h0):
features, _ = theano.scan(outer_scan_step,
sequences=[x],
non_sequences=[w, h0],
strict=True)
return tt.grad(features.sum(), w)
def main():
theano.config.compute_test_value = 'raise'
x_value = numpy.arange(12, dtype=theano.config.floatX).reshape((2, 2, 3))
x = tt.tensor3()
x.tag.test_value = x_value
w = theano.shared(value=numpy.ones((3, 3), dtype=theano.config.floatX), borrow=True)
h0 = theano.shared(value=numpy.zeros(3, dtype=theano.config.floatX), borrow=True)
f = theano.function(inputs=[x], outputs=get_outputs(x, w, h0))
print f(x_value)
if __name__ == "__main__":
main()

I've encountered the same issue and I fixed it by letting optimizer=fast_compile in theano_flags. Guess that is a bug of theano.

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

A gradient clipping in Chainer - pytorch

Related

How to remove inplace operation error in Pytorch?

convert Tensorflow1 to Tensorflow 2

How to resolve value error in Scipy function fmintnc?

PyMC3- Custom theano Op to do numerical integration

A weird error with updates in theano

Categories

Resources