there is a code written with tensorflow1 on this link.
https://github.com/carlthome/tensorflow-convlstm-cell/blob/master/cell.py
I want to use this class as a layer in TensorFlow.Keras. So it should be written with TensorFlow version 2.
How can do it?
this is this code:
import tensorflow as tf
class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
"""A LSTM cell with convolutions instead of multiplications.
Reference:
Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
"""
def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
super(ConvLSTMCell, self).__init__(_reuse=reuse)
self._kernel = kernel
self._filters = filters
self._forget_bias = forget_bias
self._activation = activation
self._normalize = normalize
self._peephole = peephole
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters])
self._feature_axis = self._size.ndims
self._data_format = None
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0
self._data_format = 'NC'
else:
raise ValueError('Unknown data_format')
#property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
#property
def output_size(self):
return self._size
def call(self, x, state):
c, h = state
x = tf.concat([x, h], axis=self._feature_axis)
n = x.shape[-1].value
m = 4 * self._filters if self._filters > 1 else 4
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
if not self._normalize:
y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
if self._peephole:
i += tf.get_variable('W_ci', c.shape[1:]) * c
f += tf.get_variable('W_cf', c.shape[1:]) * c
if self._normalize:
j = tf.contrib.layers.layer_norm(j)
i = tf.contrib.layers.layer_norm(i)
f = tf.contrib.layers.layer_norm(f)
f = tf.sigmoid(f + self._forget_bias)
i = tf.sigmoid(i)
c = c * f + i * self._activation(j)
if self._peephole:
o += tf.get_variable('W_co', c.shape[1:]) * c
if self._normalize:
o = tf.contrib.layers.layer_norm(o)
c = tf.contrib.layers.layer_norm(c)
o = tf.sigmoid(o)
h = o * self._activation(c)
state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
return h, state
Related
I get this error from the following Pytorch code:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [3]] is at version 10; expected version 9 instead.
As it is seen the code does not have inplace operations.
import torch
device = torch.device('cpu')
class MesNet(torch.nn.Module):
def __init__(self):
super(MesNet, self).__init__()
self.cov_lin = torch.nn.Sequential(torch.nn.Linear(6, 5)).double()
def forward(self, u):
z_cov = self.cov_lin(u.transpose(0, 2).squeeze(-1))
return z_cov
class UpdateModel(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
self.P_dim = 18
self.Id3 = torch.eye(3).double()
def run_KF(self):
N = 10
u = torch.randn(N, 6).double()
v = torch.zeros(N, 3).double()
model = MesNet()
measurements_covs_l = model(u.t().unsqueeze(0))
# remember to remove this afterwards
torch.autograd.set_detect_anomaly(True)
for i in range(1, N):
v[i] = self.update_pos(v[i].detach(), measurements_covs_l[i-1])
criterion = torch.nn.MSELoss(reduction="sum")
targ = torch.rand(10, 3).double()
loss = criterion(v, targ)
loss = torch.mean(loss)
loss.backward()
return v, p
def update_pos(self, v, measurement_cov):
Omega = torch.eye(3).double()
H = torch.ones((5, self.P_dim)).double()
R = torch.diag(measurement_cov)
Kt = H.t().mm(torch.inverse(R))
# it is indicating inplace error even with this:
# Kt = H.t().mm(R)
dx = Kt.mv(torch.ones(5).double())
dR = self.trans(dx[:9].clone())
v_up = dR.mv(v)
return v_up
def trans(self, xi):
phi = xi[:3].clone()
angle = torch.norm(phi.clone())
if angle.abs().lt(1e-10):
skew_phi = torch.eye(3).double()
J = self.Id3 + 0.5 * skew_phi
Rot = self.Id3 + skew_phi
else:
axis = phi / angle
skew_axis = torch.eye(3).double()
s = torch.sin(angle)
c = torch.cos(angle)
Rot = c * self.Id3
return Rot
net = UpdateModel()
net.run_KF()
I think the issue is that you are overwriting v[i] elements.
You could instead construct an auxiliary list v_ from the loop, then convert it tensor:
v_ = [v[0]]
for i in range(1, N):
v_.append(self.update_pos(v[i].detach(), measurements_covs_l[i-1]))
v = torch.stack(v_)
I coded a general convolution function in Python for CNNs.
As it turned out the time taken for this function was almost 5x more than the Keras Conv2D takes.
So I was curious if anyone knows why is there a speed difference ?
(It took almost 10-15min for 1 epoch of MNIST Dataset for my convolution function. Whereas Keras does it in almost 3-4min)
Heres my Conv class :
class Convolutional2D(Layer):
def __init__(self, kernel_size, feature_maps):
self.kernel_size = kernel_size
self.feature_maps = feature_maps
self.b = np.zeros((feature_maps))#np.random.rand(feature_maps)
def connect(self, to_layer):
if len(to_layer.layer_shape) == 2:
kernel_shape = [self.feature_maps, self.kernel_size, self.kernel_size]
self.layer_shape = [self.feature_maps] + list(np.array(to_layer.layer_shape)-self.kernel_size+1)
else:
kernel_shape = [self.feature_maps, to_layer.layer_shape[0], self.kernel_size, self.kernel_size]
self.layer_shape = [self.feature_maps] + list(np.array(to_layer.layer_shape[1:])-self.kernel_size+1)
self.kernel = np.random.random(kernel_shape)
super().init_adam_params(self.kernel, self.b)
def convolve(self, x, k, mode='forward'):
if mode == 'forward':
ksize = k.shape[-1]
if len(x.shape) == 3:
out = np.zeros((x.shape[0], k.shape[0], x.shape[1]-k.shape[1]+1, x.shape[2]-k.shape[2]+1))
else:
out = np.zeros((x.shape[0], k.shape[0], x.shape[2]-k.shape[2]+1, x.shape[3]-k.shape[3]+1))
for i in range(out.shape[2]):
for j in range(out.shape[3]):
if len(x.shape) == 3:
window = x[:,i:i+ksize,j:j+ksize]
m = np.reshape(window, (window.shape[0], 1, window.shape[1], window.shape[2]))*k
m = np.sum(m, axis=(2,3))
else:
window = x[:,:,i:i+ksize,j:j+ksize]
m = np.reshape(window, (window.shape[0], 1, window.shape[1], window.shape[2], window.shape[3]))*k
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
elif mode == 'backward_i':
if len(k.shape) == 3:
out = np.zeros((x.shape[0], x.shape[2]+k.shape[1]-1, x.shape[3]+k.shape[2]-1))
x = np.pad(x, ((0, 0), (0, 0), (k.shape[1]-1, k.shape[1]-1), (k.shape[2]-1, k.shape[2]-1)))
else:
out = np.zeros((x.shape[0], k.shape[1], x.shape[2]+k.shape[2]-1, x.shape[3]+k.shape[3]-1))
x = np.pad(x, ((0, 0), (0, 0), (k.shape[2]-1, k.shape[2]-1), (k.shape[3]-1, k.shape[3]-1)))
fk = np.transpose(k, axes=(1,0,2,3))
x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2], x.shape[3]))
ksize = k.shape[-1]
for i in range(out.shape[-2]):
for j in range(out.shape[-1]):
if len(k.shape) == 3:
window = x[:,:,i:i+ksize,j:j+ksize]
m = window*k
m = np.sum(m, axis=(1,2,3))
out[:,i,j] = m
else:
window = x[:,:,:,i:i+ksize,j:j+ksize]
m = window*fk
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
elif mode == 'backward_k':
if len(x.shape) == 3:
out = np.zeros((k.shape[1], x.shape[1]-k.shape[2]+1, x.shape[2]-k.shape[3]+1))
else:
out = np.zeros((k.shape[1], x.shape[1], x.shape[2]-k.shape[2]+1, x.shape[3]-k.shape[3]+1))
x = np.transpose(x, axes=(1,0,2,3))
x = np.reshape(x, (x.shape[0], x.shape[1], x.shape[2], x.shape[3]))
ksize = k.shape[-1]
k = np.transpose(k, axes=(1,0,2,3))
if len(x.shape) != 3:
fk = np.reshape(k, (k.shape[0], 1, k.shape[1], k.shape[2], k.shape[3]))
for i in range(out.shape[-2]):
for j in range(out.shape[-1]):
if len(x.shape) == 3:
window = x[:,i:i+ksize,j:j+ksize]
m = window*k
m = np.sum(m, axis=(1,2,3))
out[:,i,j] = m
else:
window = x[:,:,i:i+ksize,j:j+ksize]
m = window*fk
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
def forward(self, x):
return self.convolve(x, self.kernel)
def backward(self, x, loss_grad, params):
if len(self.kernel.shape) == 3:
flipped_kernel = np.flip(self.kernel, axis=(1,2))
flipped_loss_grad = np.flip(loss_grad, axis=(1,2))
else:
flipped_kernel = np.flip(self.kernel, axis=(2,3))
flipped_loss_grad = np.flip(loss_grad, axis=(2,3))
i_grad = self.convolve(loss_grad, flipped_kernel, mode='backward_i')
k_grad = self.convolve(x, flipped_loss_grad, mode='backward_k')
self.vw = params['beta1']*self.vw + (1-params['beta1'])*k_grad
self.sw = params['beta2']*self.sw + (1-params['beta2'])*(k_grad**2)
self.kernel += params['lr']*self.vw/np.sqrt(self.sw+params['eps'])
return i_grad
def get_save_data(self):
return {'type':'Convolutional2D', 'shape':np.array(self.layer_shape).tolist(), 'data':[self.kernel_size, self.feature_maps, self.kernel.tolist()]}
def load_saved_data(data):
obj = Convolutional2D(data['data'][0], data['data'][1])
obj.layer_shape = data['shape']
obj.kernel = np.array(data['data'][2])
obj.init_adam_params(obj.kernel, obj.b)
return obj
Keras and Pytorch are much more efficient because they take advantage of vectorization and the fact that matrix multiplication is very well optimized. They basically convert the convolution into a matrix multiplication by flattening the filter and creating a new matrix whose column values are the values of each block. They also take advantage of how the data is stored in memory. You can find more information in this article: https://towardsdatascience.com/how-are-convolutions-actually-performed-under-the-hood-226523ce7fbf
When I run code below, I get:
TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'
But I don't know where is dtype('O') and dtype('int64'). Does anyone know where is to parse?
import collections
import numpy as np
import math
import pandas as pd
def pre_prob(y):
y_dict = collections.Counter(y)
pre_probab = np.ones(2)
for i in range(0, 2):
pre_probab[i] = y_dict[i]/y.shape[0]
return pre_probab
def mean_var(X, y):
n_features = X.shape[1]
m = np.ones((2, n_features))
v = np.ones((2, n_features))
n_0 = np.bincount(y)[np.nonzero(np.bincount(y))[0]][0]
x0 = np.ones((n_0, n_features))
x1 = np.ones((X.shape[0] - n_0, n_features))
k = 0
for i in range(0, X.shape[0]):
if y[i] == 0:
x0[k] = X[i]
k = k + 1
k = 0
for i in range(0, X.shape[0]):
if y[i] == 1:
x1[k] = X[i]
k = k + 1
for j in range(0, n_features):
m[0][j] = np.mean(x0.T[j])
v[0][j] = np.var(x0.T[j])*(n_0/(n_0 - 1))
m[1][j] = np.mean(x1.T[j])
v[1][j] = np.var(x1.T[j])*((X.shape[0]-n_0)/((X.shape[0]- n_0) - 1))
return m, v # mean and variance
def prob_feature_class(m, v, x):
n_features = m.shape[1]
pfc = np.ones(2)
for i in range(0, 2):
product = 1
for j in range(0, n_features):
product = product * (1/math.sqrt(2*3.14*v[i][j])) * math.exp(-0.5* pow((x[j] - m[i][j]),2)/v[i][j])
pfc[i] = product
return pfc
def GNB(X, y, x):
m, v = mean_var(X, y)
pfc = prob_feature_class(m, v, x)
pre_probab = pre_prob(y)
pcf = np.ones(2)
total_prob = 0
for i in range(0, 2):
total_prob = total_prob + (pfc[i] * pre_probab[i])
for i in range(0, 2):
pcf[i] = (pfc[i] * pre_probab[i])/total_prob
prediction = int(pcf.argmax())
return m, v, pre_probab, pfc, pcf, prediction
I am using PyMC3 for parameter estimation using a particular likelihood function which has to be defined. I googled it and found out that I should use the densitydist method for implementing the user defined likelihood functions but it is not working. How to incorporate a user defined likelihood function in PyMC3 and to find out the maximum a posteriori (MAP) estimate for my model? My code is given below. Here L is the analytic form of my Likelihood function. I have some observational data for the radial velocity(vr) and postion (r) for some objects, which is imported from excel file.
data_ = np.array(pandas.read_excel('aaa.xlsx',header=None))
gamma=3.77;
G = 4.302*10**-6;
rmin = 3.0;
R = 95.7;
vr=data_[:,1];
r= data_[:,0];
h= np.pi;
class integrateOut(theano.Op):
def __init__(self,f,t,t0,tf,*args,**kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self,*inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f,self.fvars)
except:
self.gradF = None
return theano.Apply(self,self.fvars,[tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t]+self.fvars,self.f)
output_storage[0][0] = quad(f,self.t0,self.tf,args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g,self.t,self.t0,self.tf)(*inputs)*grads[0] \
for g in self.gradF]
basic_model = pm.Model()
with basic_model:
M=[]
beta=[]
interval=0.01*10**12
M=pm.Uniform('M',
lower=0.5*10**12,upper=3.50*10**12,transform='interval')
beta=pm.Uniform('beta',lower=2.001,upper=2.999,transform='interval')
gamma=3.77
logp=[]
arr=[]
vnew=[]
rnew=[]
theta = tt.scalar('theta')
beta = tt.scalar('beta')
z = tt.cos(theta)**(2*( (gamma/(beta - 2)) - 3/2) + 3)
intZ = integrateOut(z,theta,-(np.pi)/2,(np.pi)/2)(beta)
gradIntZ = tt.grad(intZ,[beta])
funcIntZ = theano.function([beta],intZ)
funcGradIntZ = theano.function([beta],gradIntZ)
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
rnew.append(r[j]+(0.05*r[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
vn=np.array(vnew)
rn=np.array(rnew)
for beta in np.arange (2.01,2.99,0.01):
for M in np.arange (0.5,2.50,0.01):
i=np.arange(0,59,1)
q =( gamma/(beta - 2)) - 3/2
B = (G*M*10**12)/((beta -2 )*( R**(3 - beta)))
K = (gamma - 3)/((rmin**(3 - gamma))*funcIntZ(beta)*m.sqrt(2*B))
logp= -np.log(K*((1 -(( 1/(2*B) )*((vn[i]**2)*rn[i]**(beta -
2))))**(q+1))*(rn[i]**(1-gamma +(beta/2))))
arr.append(logp.sum())
def logp_func(rn,vn):
return min(np.array(arr))
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP(model=basic_model)
step = pm.Metropolis()
basicmodeltrace = pm.sample(10000, step=step,
start=start,random_seed=1,progressbar=True)
print(pm.summary(basicmodeltrace))
map_estimate = pm.find_MAP(model=basic_model)
print(map_estimate)
I am getting the following error message:
ValueError: Cannot compute test value: input 0 (theta) of Op
Elemwise{cos,no_inplace}(theta) missing default value.
Backtrace when that variable is created:
I am unable to get the output since the numerical integration is not working. I have used custom theano op for numerical integration code which i got from Custom Theano Op to do numerical integration . The integration works if I run it seperately inputting a particular value of beta, but not within the model.
I made a few changes to your code, this still does not work, but I hope it is closer to a solution. Please check this thread, as someone is trying so solve essentially the same problem.
class integrateOut(theano.Op):
def __init__(self, f, t, t0, tf,*args, **kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self, *inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f, self.fvars)
except:
self.gradF = None
return theano.Apply(self, self.fvars, [tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t] + self.fvars,self.f)
output_storage[0][0] = quad(f, self.t0, self.tf, args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g, self.t, self.t0, self.tf)(*inputs)*grads[0] \
for g in self.gradF]
gamma = 3.77
G = 4.302E-6
rmin = 3.0
R = 95.7
vr = data[:,1]
r = data[:,0]
h = np.pi
interval = 1E10
vnew = []
rnew = []
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
rnew.append(r[j]+(0.05*r[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
vn = np.array(vnew)
rn = np.array(rnew)
def integ(gamma, beta, theta):
z = tt.cos(theta)**(2*((gamma/(beta - 2)) - 3/2) + 3)
return integrateOut(z, theta, -(np.pi)/2, (np.pi)/2)(beta)
with pm.Model() as basic_model:
M = pm.Uniform('M', lower=0.5*10**12, upper=3.50*10**12)
beta = pm.Uniform('beta', lower=2.001, upper=2.999)
theta = pm.Normal('theta', 0, 10**2)
def logp_func(rn,vn):
q = (gamma/(beta - 2)) - 3/2
B = (G*M*1E12) / ((beta -2 )*(R**(3 - beta)))
K = (gamma - 3) / ((rmin**(3 - gamma)) * integ(gamma, beta, theta) * (2*B)**0.5)
logp = - np.log(K*((1 -((1/(2*B))*((vn**2)*rn**(beta -
2))))**(q+1))*(rn**(1-gamma +(beta/2))))
return logp.sum()
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP()
#basicmodeltrace = pm.sample()
print(start)
I have some data and I want to classification like svm with cvxopt function.
In documentation of cvxopt.solvers.qp, there were some matrixes with vectorized and transposed.
How can I find correct params (P, q, G, h, A, b) when I know n_samples and n_features?
solution = cvxopt.solvers.qp(P, q, G, h, A, b)
Solved it. I got a hint how to set parameters of cvx.opt from here. Code is as bellows.
I made a matrix (n_samples, n_samples) and get other parameters with cvxopt.matrix.
class SVM(object):
def __init__(self, kernel=linear_kernel, C=None):
self.kernel = kernel
self.C = C
if self.C is not None: self.C = float(self.C)
def fit(self, X, y):
n_samples, n_features = X.shape
# Gram matrix
K = np.zeros((n_samples, n_samples))
for i in range(n_samples):
for j in range(n_samples):
K[i,j] = self.kernel(X[i], X[j])
P = cvxopt.matrix(np.outer(y,y) * K)
q = cvxopt.matrix(np.ones(n_samples) * -1)
A = cvxopt.matrix(y, (1,n_samples))
b = cvxopt.matrix(0.0)
if self.C is None:
G = cvxopt.matrix(np.diag(np.ones(n_samples) * -1))
h = cvxopt.matrix(np.zeros(n_samples))
else:
tmp1 = np.diag(np.ones(n_samples) * -1)
tmp2 = np.identity(n_samples)
G = cvxopt.matrix(np.vstack((tmp1, tmp2)))
tmp1 = np.zeros(n_samples)
tmp2 = np.ones(n_samples) * self.C
h = cvxopt.matrix(np.hstack((tmp1, tmp2)))