Related
there is a code written with tensorflow1 on this link.
https://github.com/carlthome/tensorflow-convlstm-cell/blob/master/cell.py
I want to use this class as a layer in TensorFlow.Keras. So it should be written with TensorFlow version 2.
How can do it?
this is this code:
import tensorflow as tf
class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
"""A LSTM cell with convolutions instead of multiplications.
Reference:
Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
"""
def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
super(ConvLSTMCell, self).__init__(_reuse=reuse)
self._kernel = kernel
self._filters = filters
self._forget_bias = forget_bias
self._activation = activation
self._normalize = normalize
self._peephole = peephole
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters])
self._feature_axis = self._size.ndims
self._data_format = None
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0
self._data_format = 'NC'
else:
raise ValueError('Unknown data_format')
#property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
#property
def output_size(self):
return self._size
def call(self, x, state):
c, h = state
x = tf.concat([x, h], axis=self._feature_axis)
n = x.shape[-1].value
m = 4 * self._filters if self._filters > 1 else 4
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
if not self._normalize:
y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
if self._peephole:
i += tf.get_variable('W_ci', c.shape[1:]) * c
f += tf.get_variable('W_cf', c.shape[1:]) * c
if self._normalize:
j = tf.contrib.layers.layer_norm(j)
i = tf.contrib.layers.layer_norm(i)
f = tf.contrib.layers.layer_norm(f)
f = tf.sigmoid(f + self._forget_bias)
i = tf.sigmoid(i)
c = c * f + i * self._activation(j)
if self._peephole:
o += tf.get_variable('W_co', c.shape[1:]) * c
if self._normalize:
o = tf.contrib.layers.layer_norm(o)
c = tf.contrib.layers.layer_norm(c)
o = tf.sigmoid(o)
h = o * self._activation(c)
state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
return h, state
I coded a general convolution function in Python for CNNs.
As it turned out the time taken for this function was almost 5x more than the Keras Conv2D takes.
So I was curious if anyone knows why is there a speed difference ?
(It took almost 10-15min for 1 epoch of MNIST Dataset for my convolution function. Whereas Keras does it in almost 3-4min)
Heres my Conv class :
class Convolutional2D(Layer):
def __init__(self, kernel_size, feature_maps):
self.kernel_size = kernel_size
self.feature_maps = feature_maps
self.b = np.zeros((feature_maps))#np.random.rand(feature_maps)
def connect(self, to_layer):
if len(to_layer.layer_shape) == 2:
kernel_shape = [self.feature_maps, self.kernel_size, self.kernel_size]
self.layer_shape = [self.feature_maps] + list(np.array(to_layer.layer_shape)-self.kernel_size+1)
else:
kernel_shape = [self.feature_maps, to_layer.layer_shape[0], self.kernel_size, self.kernel_size]
self.layer_shape = [self.feature_maps] + list(np.array(to_layer.layer_shape[1:])-self.kernel_size+1)
self.kernel = np.random.random(kernel_shape)
super().init_adam_params(self.kernel, self.b)
def convolve(self, x, k, mode='forward'):
if mode == 'forward':
ksize = k.shape[-1]
if len(x.shape) == 3:
out = np.zeros((x.shape[0], k.shape[0], x.shape[1]-k.shape[1]+1, x.shape[2]-k.shape[2]+1))
else:
out = np.zeros((x.shape[0], k.shape[0], x.shape[2]-k.shape[2]+1, x.shape[3]-k.shape[3]+1))
for i in range(out.shape[2]):
for j in range(out.shape[3]):
if len(x.shape) == 3:
window = x[:,i:i+ksize,j:j+ksize]
m = np.reshape(window, (window.shape[0], 1, window.shape[1], window.shape[2]))*k
m = np.sum(m, axis=(2,3))
else:
window = x[:,:,i:i+ksize,j:j+ksize]
m = np.reshape(window, (window.shape[0], 1, window.shape[1], window.shape[2], window.shape[3]))*k
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
elif mode == 'backward_i':
if len(k.shape) == 3:
out = np.zeros((x.shape[0], x.shape[2]+k.shape[1]-1, x.shape[3]+k.shape[2]-1))
x = np.pad(x, ((0, 0), (0, 0), (k.shape[1]-1, k.shape[1]-1), (k.shape[2]-1, k.shape[2]-1)))
else:
out = np.zeros((x.shape[0], k.shape[1], x.shape[2]+k.shape[2]-1, x.shape[3]+k.shape[3]-1))
x = np.pad(x, ((0, 0), (0, 0), (k.shape[2]-1, k.shape[2]-1), (k.shape[3]-1, k.shape[3]-1)))
fk = np.transpose(k, axes=(1,0,2,3))
x = np.reshape(x, (x.shape[0], 1, x.shape[1], x.shape[2], x.shape[3]))
ksize = k.shape[-1]
for i in range(out.shape[-2]):
for j in range(out.shape[-1]):
if len(k.shape) == 3:
window = x[:,:,i:i+ksize,j:j+ksize]
m = window*k
m = np.sum(m, axis=(1,2,3))
out[:,i,j] = m
else:
window = x[:,:,:,i:i+ksize,j:j+ksize]
m = window*fk
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
elif mode == 'backward_k':
if len(x.shape) == 3:
out = np.zeros((k.shape[1], x.shape[1]-k.shape[2]+1, x.shape[2]-k.shape[3]+1))
else:
out = np.zeros((k.shape[1], x.shape[1], x.shape[2]-k.shape[2]+1, x.shape[3]-k.shape[3]+1))
x = np.transpose(x, axes=(1,0,2,3))
x = np.reshape(x, (x.shape[0], x.shape[1], x.shape[2], x.shape[3]))
ksize = k.shape[-1]
k = np.transpose(k, axes=(1,0,2,3))
if len(x.shape) != 3:
fk = np.reshape(k, (k.shape[0], 1, k.shape[1], k.shape[2], k.shape[3]))
for i in range(out.shape[-2]):
for j in range(out.shape[-1]):
if len(x.shape) == 3:
window = x[:,i:i+ksize,j:j+ksize]
m = window*k
m = np.sum(m, axis=(1,2,3))
out[:,i,j] = m
else:
window = x[:,:,i:i+ksize,j:j+ksize]
m = window*fk
m = np.sum(m, axis=(2,3,4))
out[:,:,i,j] = m
return out
def forward(self, x):
return self.convolve(x, self.kernel)
def backward(self, x, loss_grad, params):
if len(self.kernel.shape) == 3:
flipped_kernel = np.flip(self.kernel, axis=(1,2))
flipped_loss_grad = np.flip(loss_grad, axis=(1,2))
else:
flipped_kernel = np.flip(self.kernel, axis=(2,3))
flipped_loss_grad = np.flip(loss_grad, axis=(2,3))
i_grad = self.convolve(loss_grad, flipped_kernel, mode='backward_i')
k_grad = self.convolve(x, flipped_loss_grad, mode='backward_k')
self.vw = params['beta1']*self.vw + (1-params['beta1'])*k_grad
self.sw = params['beta2']*self.sw + (1-params['beta2'])*(k_grad**2)
self.kernel += params['lr']*self.vw/np.sqrt(self.sw+params['eps'])
return i_grad
def get_save_data(self):
return {'type':'Convolutional2D', 'shape':np.array(self.layer_shape).tolist(), 'data':[self.kernel_size, self.feature_maps, self.kernel.tolist()]}
def load_saved_data(data):
obj = Convolutional2D(data['data'][0], data['data'][1])
obj.layer_shape = data['shape']
obj.kernel = np.array(data['data'][2])
obj.init_adam_params(obj.kernel, obj.b)
return obj
Keras and Pytorch are much more efficient because they take advantage of vectorization and the fact that matrix multiplication is very well optimized. They basically convert the convolution into a matrix multiplication by flattening the filter and creating a new matrix whose column values are the values of each block. They also take advantage of how the data is stored in memory. You can find more information in this article: https://towardsdatascience.com/how-are-convolutions-actually-performed-under-the-hood-226523ce7fbf
class loss(Function):
#staticmethod
def forward(ctx,x,INPUT):
batch_size = x.shape[0]
X = x.detach().numpy()
input = INPUT.detach().numpy()
Loss = 0
for i in range(batch_size):
t_R_r = input[i,0:4]
R_r = t_R_r[np.newaxis,:]
t_R_i = input[i,4:8]
R_i = t_R_i[np.newaxis,:]
t_H_r = input[i,8:12]
H_r = t_H_r[np.newaxis,:]
t_H_i = input[i,12:16]
H_i = t_H_i[np.newaxis,:]
t_T_r = input[i, 16:32]
T_r = t_T_r.reshape(4,4)
t_T_i = input[i, 32:48]
T_i = t_T_i.reshape(4,4)
R = np.concatenate((R_r, R_i), axis=1)
H = np.concatenate((H_r, H_i), axis=1)
temp_t1 = np.concatenate((T_r,T_i),axis=1)
temp_t2 = np.concatenate((-T_i,T_r),axis=1)
T = np.concatenate((temp_t1,temp_t2),axis=0)
phi_r = np.zeros((4,4))
row, col = np.diag_indices(4)
phi_r[row,col] = X[i,0:4]
phi_i = np.zeros((4, 4))
row, col = np.diag_indices(4)
phi_i[row, col] = 1 - np.power(X[i, 0:4],2)
temp_phi1 = np.concatenate((phi_r,phi_i),axis=1)
temp_phi2 = np.concatenate((-phi_i, phi_r), axis=1)
phi = np.concatenate((temp_phi1,temp_phi2),axis=0)
temp1 = np.matmul(R,phi)
temp2 = np.matmul(temp1,T) # error
H_hat = H + temp2
t_Q_r = np.zeros((4,4))
t_Q_r[np.triu_indices(4,1)] = X[i,4:10]
Q_r = t_Q_r + t_Q_r.T
row,col = np.diag_indices(4)
Q_r[row,col] = X[i,10:14]
Q_i = np.zeros((4,4))
Q_i[np.triu_indices(4,1)] = X[i,14:20]
Q_i = Q_i - Q_i.T
temp_Q1 = np.concatenate((Q_r,Q_i),axis=1)
temp_Q2 = np.concatenate((-Q_i,Q_r),axis=1)
Q = np.concatenate((temp_Q1,temp_Q2),axis=0)
t_H_hat_r = H_hat[0,0:4]
H_hat_r = t_H_hat_r[np.newaxis,:]
t_H_hat_i= H_hat[0,4:8]
H_hat_i = t_H_hat_i[np.newaxis,:]
temp_H1 = np.concatenate((-H_hat_i.T,H_hat_r.T),axis=0)
H_hat_H = np.concatenate((H_hat.T,temp_H1),axis=1)
temp_result1 = np.matmul(H_hat,Q)
temp_result2 = np.matmul(temp_result1,H_hat_H)
Loss += np.log10(1+temp_result2[0][0])
Loss = t.from_numpy(np.array(Loss / batch_size))
return Loss
#staticmethod
def backward(ctx,grad_output):
print('gradient')
return grad_output
def criterion(output,input):
return loss.apply(output,input)
This is my loss function. But it present the error:
Traceback (most recent call last):
File "/Users/mrfang/channel_capacity/training.py", line 24, in
loss.backward() File "/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/tensor.py",
line 150, in backward
torch.autograd.backward(self, gradient, retain_graph, create_graph) File
"/Users/mrfang/anaconda3/lib/python3.6/site-packages/torch/autograd/init.py",
line 99, in backward
allow_unreachable=True) # allow_unreachable flag RuntimeError: function lossBackward returned an incorrect number of gradients
(expected 2, got 1)
How could I fix it. Thanks very much
Your forward(ctx,x,INPUT) takes two inputs, x and INPUT, thus backward should output two gradients as well, grad_x and grad_INPUT.
In addition, in your snippet, you're not really computing a custom gradient, so you could compute that with Pytorch's autograd, without having to define a special Function.
If this is working code and you're going to define the custom loss, here's a quick boilerplate of what backward should comprise:
#staticmethod
def forward(ctx, x, INPUT):
# this is required so they're available during the backwards call
ctx.save_for_backward(x, INPUT)
# custom forward
#staticmethod
def backward(ctx, grad_output):
x, INPUT = ctx.saved_tensors
grad_x = grad_INPUT = None
# compute grad here
return grad_x, grad_INPUT
You don't need to return gradients for inputs that don't require it, thus you can return None for them.
More info here and here.
When I run code below, I get:
TypeError: Cannot cast array data from dtype('O') to dtype('int64') according to the rule 'safe'
But I don't know where is dtype('O') and dtype('int64'). Does anyone know where is to parse?
import collections
import numpy as np
import math
import pandas as pd
def pre_prob(y):
y_dict = collections.Counter(y)
pre_probab = np.ones(2)
for i in range(0, 2):
pre_probab[i] = y_dict[i]/y.shape[0]
return pre_probab
def mean_var(X, y):
n_features = X.shape[1]
m = np.ones((2, n_features))
v = np.ones((2, n_features))
n_0 = np.bincount(y)[np.nonzero(np.bincount(y))[0]][0]
x0 = np.ones((n_0, n_features))
x1 = np.ones((X.shape[0] - n_0, n_features))
k = 0
for i in range(0, X.shape[0]):
if y[i] == 0:
x0[k] = X[i]
k = k + 1
k = 0
for i in range(0, X.shape[0]):
if y[i] == 1:
x1[k] = X[i]
k = k + 1
for j in range(0, n_features):
m[0][j] = np.mean(x0.T[j])
v[0][j] = np.var(x0.T[j])*(n_0/(n_0 - 1))
m[1][j] = np.mean(x1.T[j])
v[1][j] = np.var(x1.T[j])*((X.shape[0]-n_0)/((X.shape[0]- n_0) - 1))
return m, v # mean and variance
def prob_feature_class(m, v, x):
n_features = m.shape[1]
pfc = np.ones(2)
for i in range(0, 2):
product = 1
for j in range(0, n_features):
product = product * (1/math.sqrt(2*3.14*v[i][j])) * math.exp(-0.5* pow((x[j] - m[i][j]),2)/v[i][j])
pfc[i] = product
return pfc
def GNB(X, y, x):
m, v = mean_var(X, y)
pfc = prob_feature_class(m, v, x)
pre_probab = pre_prob(y)
pcf = np.ones(2)
total_prob = 0
for i in range(0, 2):
total_prob = total_prob + (pfc[i] * pre_probab[i])
for i in range(0, 2):
pcf[i] = (pfc[i] * pre_probab[i])/total_prob
prediction = int(pcf.argmax())
return m, v, pre_probab, pfc, pcf, prediction
I am using PyMC3 for parameter estimation using a particular likelihood function which has to be defined. I googled it and found out that I should use the densitydist method for implementing the user defined likelihood functions but it is not working. How to incorporate a user defined likelihood function in PyMC3 and to find out the maximum a posteriori (MAP) estimate for my model? My code is given below. Here L is the analytic form of my Likelihood function. I have some observational data for the radial velocity(vr) and postion (r) for some objects, which is imported from excel file.
data_ = np.array(pandas.read_excel('aaa.xlsx',header=None))
gamma=3.77;
G = 4.302*10**-6;
rmin = 3.0;
R = 95.7;
vr=data_[:,1];
r= data_[:,0];
h= np.pi;
class integrateOut(theano.Op):
def __init__(self,f,t,t0,tf,*args,**kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self,*inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f,self.fvars)
except:
self.gradF = None
return theano.Apply(self,self.fvars,[tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t]+self.fvars,self.f)
output_storage[0][0] = quad(f,self.t0,self.tf,args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g,self.t,self.t0,self.tf)(*inputs)*grads[0] \
for g in self.gradF]
basic_model = pm.Model()
with basic_model:
M=[]
beta=[]
interval=0.01*10**12
M=pm.Uniform('M',
lower=0.5*10**12,upper=3.50*10**12,transform='interval')
beta=pm.Uniform('beta',lower=2.001,upper=2.999,transform='interval')
gamma=3.77
logp=[]
arr=[]
vnew=[]
rnew=[]
theta = tt.scalar('theta')
beta = tt.scalar('beta')
z = tt.cos(theta)**(2*( (gamma/(beta - 2)) - 3/2) + 3)
intZ = integrateOut(z,theta,-(np.pi)/2,(np.pi)/2)(beta)
gradIntZ = tt.grad(intZ,[beta])
funcIntZ = theano.function([beta],intZ)
funcGradIntZ = theano.function([beta],gradIntZ)
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
rnew.append(r[j]+(0.05*r[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
vn=np.array(vnew)
rn=np.array(rnew)
for beta in np.arange (2.01,2.99,0.01):
for M in np.arange (0.5,2.50,0.01):
i=np.arange(0,59,1)
q =( gamma/(beta - 2)) - 3/2
B = (G*M*10**12)/((beta -2 )*( R**(3 - beta)))
K = (gamma - 3)/((rmin**(3 - gamma))*funcIntZ(beta)*m.sqrt(2*B))
logp= -np.log(K*((1 -(( 1/(2*B) )*((vn[i]**2)*rn[i]**(beta -
2))))**(q+1))*(rn[i]**(1-gamma +(beta/2))))
arr.append(logp.sum())
def logp_func(rn,vn):
return min(np.array(arr))
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP(model=basic_model)
step = pm.Metropolis()
basicmodeltrace = pm.sample(10000, step=step,
start=start,random_seed=1,progressbar=True)
print(pm.summary(basicmodeltrace))
map_estimate = pm.find_MAP(model=basic_model)
print(map_estimate)
I am getting the following error message:
ValueError: Cannot compute test value: input 0 (theta) of Op
Elemwise{cos,no_inplace}(theta) missing default value.
Backtrace when that variable is created:
I am unable to get the output since the numerical integration is not working. I have used custom theano op for numerical integration code which i got from Custom Theano Op to do numerical integration . The integration works if I run it seperately inputting a particular value of beta, but not within the model.
I made a few changes to your code, this still does not work, but I hope it is closer to a solution. Please check this thread, as someone is trying so solve essentially the same problem.
class integrateOut(theano.Op):
def __init__(self, f, t, t0, tf,*args, **kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self, *inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f, self.fvars)
except:
self.gradF = None
return theano.Apply(self, self.fvars, [tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t] + self.fvars,self.f)
output_storage[0][0] = quad(f, self.t0, self.tf, args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g, self.t, self.t0, self.tf)(*inputs)*grads[0] \
for g in self.gradF]
gamma = 3.77
G = 4.302E-6
rmin = 3.0
R = 95.7
vr = data[:,1]
r = data[:,0]
h = np.pi
interval = 1E10
vnew = []
rnew = []
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
rnew.append(r[j]+(0.05*r[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
vn = np.array(vnew)
rn = np.array(rnew)
def integ(gamma, beta, theta):
z = tt.cos(theta)**(2*((gamma/(beta - 2)) - 3/2) + 3)
return integrateOut(z, theta, -(np.pi)/2, (np.pi)/2)(beta)
with pm.Model() as basic_model:
M = pm.Uniform('M', lower=0.5*10**12, upper=3.50*10**12)
beta = pm.Uniform('beta', lower=2.001, upper=2.999)
theta = pm.Normal('theta', 0, 10**2)
def logp_func(rn,vn):
q = (gamma/(beta - 2)) - 3/2
B = (G*M*1E12) / ((beta -2 )*(R**(3 - beta)))
K = (gamma - 3) / ((rmin**(3 - gamma)) * integ(gamma, beta, theta) * (2*B)**0.5)
logp = - np.log(K*((1 -((1/(2*B))*((vn**2)*rn**(beta -
2))))**(q+1))*(rn**(1-gamma +(beta/2))))
return logp.sum()
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP()
#basicmodeltrace = pm.sample()
print(start)