Backpropagation (matrixis not aligned) - python-3.x

Well, the problem is with delta1, I've checked over math couple times, it seems good to me, everything should be correct with delta2, but it doesn't match with W2 transposed, here is backpropagation:
def backward(self, X, Y):
X = np.array(X)
Y = np.array(Y)
delta2 = -(Y - self.yHat) * self.deriv_sigmoid(self.a2)
dJdW2 = np.dot(self.a2.T, delta2)
delta1 = np.dot(delta2, self.W2.T)*self.deriv_sigmoid(self.a1)
dJdW1 = np.dot(X.T, delta1)
return dJdW1, dJdW2
here is forward propagation:
def forward(self, X):
self.X = X
self.a1 = np.dot(self.W1, X)
self.Z1 = self.sigmoid(self.a1)
self.a2 = np.dot(self.W2, self.Z1)
self.yHat = self.sigmoid(self.a2)
return self.yHat
And here is file from witch I call it:
NN = nn.Neural_Network(2, 3, 1)
X = [[1],[1],]
Y = [[1],]
yHat = NN.forward(X)
dJdW1, dJdW2 = NN.backward(X, Y)
I've tried checking placings in np.dot(), but it seems to be correct, and here is full code: https://hastebin.com/ikijahecaz.py

Related

Neural Network initialized with random weights always returns the same output with random inputs

I have a problem with pytorch in Spyder. A randomly initialized Neural Network returns always the same output also for random input tensor. I am currently using local GPU with Spyder. I made sure that the initialization of the weights is random and not all zeros.
Example:
x = torch.rand(1, 3, 360, 640)
x = self.stage_1(x)
x = self.stage_2(x)
x = self.stage_3(x)
x = self.stage_4(x)
x = self.stage_5(x)
x = self.stage_6(x)
x = torch.flatten(x, start_dim=1)
y = torch.rand(1, 3, 360, 640)
y = self.stage_1(y)
y = self.stage_2(y)
y = self.stage_3(y)
y = self.stage_4(y)
y = self.stage_5(y)
y = self.stage_6(y)
y = torch.flatten(y, start_dim=1)
This code returns always y == x
This is the stage class:
class VggStage(nn.Module):
def __init__(self,
input_channels: int,
output_channels: int) -> None:
"""
Parameters
----------
input_channels : int
DESCRIPTION.
output_channels : int
DESCRIPTION.
Returns
-------
None
DESCRIPTION.
"""
super().__init__()
self.conv1 = nn.Conv2d(in_channels=input_channels,
out_channels=output_channels,
kernel_size=(3, 3))
self.conv2 = nn.Conv2d(in_channels=output_channels,
out_channels=output_channels,
kernel_size=(3, 3))
self.max_pool = nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
def forward(self,
x: torch.Tensor) -> torch.Tensor:
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = self.max_pool(x)
return x

ViVIT PyTorch: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15

I am trying to run Video Vision Transformer (ViViT) code with my dataset but getting an error using CrossEntropyLoss from Pytorch as the Loss function.
There are 6 classes I have:
['Run', 'Sit', 'Walk', 'Wave', 'Sit', 'Stand']
Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-9, momentum=0.9)
Class Weights
tensor([0.0045, 0.0042, 0.0048, 0.0038, 0.0070, 0.0065])
Loss Function
loss_func = nn.CrossEntropyLoss(weight=class_weights.to(device))
Code Throwning Error
train_epoch(model, optimizer, train_loader, train_loss_history, loss_func)
Error
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
Code Calling the transformer
model = ViViT(224, 16, 100, 16).cuda()
Getting Video Frames
def get_frames(filename, n_frames=1):
frames = []
v_cap = cv2.VideoCapture(filename)
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = np.linspace(0, v_len - 1, n_frames + 1, dtype=np.int16)
frame_dims = np.array([224, 224, 3])
for fn in range(v_len):
success, frame = v_cap.read()
if success is False:
continue
if (fn in frame_list):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (frame_dims[0], frame_dims[1]))
frames.append(frame)
v_cap.release()
return frames, v_len
Dataset Preprocessing
class DatasetProcessing(data.Dataset):
def __init__(self, df, root_dir):
super(DatasetProcessing, self).__init__()
# List of all videos path
video_list = df["Video"].apply(lambda x: root_dir + '/' + x)
self.video_list = np.asarray(video_list)
self.df = df
def __getitem__(self, index):
# Ensure that the raw videos are in respective folders and folder name matches the output class label
video_label = self.video_list[index].split('/')[-2]
video_name = self.video_list[index].split('/')[-1]
video_frames, len_ = get_frames(self.video_list[index], n_frames = 15)
video_frames = np.asarray(video_frames)
video_frames = video_frames/255
class_list = ['Run', 'Walk', 'Wave', 'Sit', 'Turn', 'Stand']
class_id_loc = np.where(class_list == video_label)
label = class_id_loc
d = torch.as_tensor(np.array(video_frames).astype('float'))
l = torch.as_tensor(np.array(label).astype('float'))
return (d, l)
def __len__(self):
return self.video_list.shape[0]
Training Epochs
def train_epoch(model, optimizer, data_loader, loss_history, loss_func):
total_samples = len(data_loader.dataset)
model.train()
for i, (data, target) in enumerate(data_loader):
optimizer.zero_grad()
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
pred = model(data.float())
output = F.log_softmax(pred, dim=1)
loss = loss_func(output, target.squeeze(1))
loss.backward()
optimizer.step()
if i % 100 == 0:
print('[' + '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(total_samples) +
' (' + '{:3.0f}'.format(100 * i / len(data_loader)) + '%)] Loss: ' +
'{:6.4f}'.format(loss.item()))
loss_history.append(loss.item())
Evaluate Model
def evaluate(model, data_loader, loss_history, loss_func):
model.eval()
total_samples = len(data_loader.dataset)
correct_samples = 0
total_loss = 0
with torch.no_grad():
for data, target in data_loader:
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
output = F.log_softmax(model(data.float()), dim=1)
loss = loss_func(output, target)
_, pred = torch.max(output, dim=1)
total_loss += loss.item()
correct_samples += pred.eq(target).sum()
avg_loss = total_loss / total_samples
loss_history.append(avg_loss)
print('\nAverage test loss: ' + '{:.4f}'.format(avg_loss) +
' Accuracy:' + '{:5}'.format(correct_samples) + '/' +
'{:5}'.format(total_samples) + ' (' +
'{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
Transformer
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
super().__init__()
self.layers = nn.ModuleList([])
self.norm = nn.LayerNorm(dim)
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return self.norm(x)
ViViT Code
class ViViT(nn.Module):
def __init__(self, image_size, patch_size, num_classes, num_frames, dim = 192, depth = 4, heads = 3, pool = 'cls', in_channels = 3, dim_head = 64, dropout = 0.,
emb_dropout = 0., scale_dim = 4, ):
super().__init__()
assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
num_patches = (image_size // patch_size) ** 2
patch_dim = in_channels * patch_size ** 2
self.to_patch_embedding = nn.Sequential(
Rearrange('b t c (h p1) (w p2) -> b t (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
nn.Linear(patch_dim, dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_frames, num_patches + 1, dim))
self.space_token = nn.Parameter(torch.randn(1, 1, dim))
self.space_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.temporal_token = nn.Parameter(torch.randn(1, 1, dim))
self.temporal_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.dropout = nn.Dropout(emb_dropout)
self.pool = pool
self.mlp_head = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, num_classes)
)
def forward(self, x):
x = self.to_patch_embedding(x)
b, t, n, _ = x.shape
cls_space_tokens = repeat(self.space_token, '() n d -> b t n d', b = b, t=t)
x = torch.cat((cls_space_tokens, x), dim=2)
x += self.pos_embedding[:, :, :(n + 1)]
x = self.dropout(x)
x = rearrange(x, 'b t n d -> (b t) n d')
x = self.space_transformer(x)
x = rearrange(x[:, 0], '(b t) ... -> b t ...', b=b)
cls_temporal_tokens = repeat(self.temporal_token, '() n d -> b n d', b=b)
x = torch.cat((cls_temporal_tokens, x), dim=1)
x = self.temporal_transformer(x)
x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
return self.mlp_head(x)
Multi target appears to be a feature supported since version 1.10.0.
https://discuss.pytorch.org/t/crossentropyloss-vs-per-class-probabilities-target/138331
Please check your pytorch version.
Please refer to the example of using the UTF101 top5 dataset, which is available on my Colab. The version of pytorch is 1.12.0+cu113, and the code you listed was able to run the training almost exactly as it was written.

How to remove inplace operation error in Pytorch?

I get this error from the following Pytorch code:
RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.DoubleTensor [3]] is at version 10; expected version 9 instead.
As it is seen the code does not have inplace operations.
import torch
device = torch.device('cpu')
class MesNet(torch.nn.Module):
def __init__(self):
super(MesNet, self).__init__()
self.cov_lin = torch.nn.Sequential(torch.nn.Linear(6, 5)).double()
def forward(self, u):
z_cov = self.cov_lin(u.transpose(0, 2).squeeze(-1))
return z_cov
class UpdateModel(torch.nn.Module):
def __init__(self):
torch.nn.Module.__init__(self)
self.P_dim = 18
self.Id3 = torch.eye(3).double()
def run_KF(self):
N = 10
u = torch.randn(N, 6).double()
v = torch.zeros(N, 3).double()
model = MesNet()
measurements_covs_l = model(u.t().unsqueeze(0))
# remember to remove this afterwards
torch.autograd.set_detect_anomaly(True)
for i in range(1, N):
v[i] = self.update_pos(v[i].detach(), measurements_covs_l[i-1])
criterion = torch.nn.MSELoss(reduction="sum")
targ = torch.rand(10, 3).double()
loss = criterion(v, targ)
loss = torch.mean(loss)
loss.backward()
return v, p
def update_pos(self, v, measurement_cov):
Omega = torch.eye(3).double()
H = torch.ones((5, self.P_dim)).double()
R = torch.diag(measurement_cov)
Kt = H.t().mm(torch.inverse(R))
# it is indicating inplace error even with this:
# Kt = H.t().mm(R)
dx = Kt.mv(torch.ones(5).double())
dR = self.trans(dx[:9].clone())
v_up = dR.mv(v)
return v_up
def trans(self, xi):
phi = xi[:3].clone()
angle = torch.norm(phi.clone())
if angle.abs().lt(1e-10):
skew_phi = torch.eye(3).double()
J = self.Id3 + 0.5 * skew_phi
Rot = self.Id3 + skew_phi
else:
axis = phi / angle
skew_axis = torch.eye(3).double()
s = torch.sin(angle)
c = torch.cos(angle)
Rot = c * self.Id3
return Rot
net = UpdateModel()
net.run_KF()
I think the issue is that you are overwriting v[i] elements.
You could instead construct an auxiliary list v_ from the loop, then convert it tensor:
v_ = [v[0]]
for i in range(1, N):
v_.append(self.update_pos(v[i].detach(), measurements_covs_l[i-1]))
v = torch.stack(v_)

Resnet implementation: forward() takes 1 positional argument but 2 were given

I wrote this code and when I run it I get the following error: forward() takes 1 positional argument but 2 were given. As far as I know, I am passing only one argument to forward().
ResNet is a basic residual block
class ResNet(nn.Module):
def __init__(self, in_channels, mid_channels, mid2_channels ,out_channels):
super().__init__()
self.conv1 = nn.Conv2d(in_channels,mid_channels,kernel_size = 3, stride = 1, padding = 1)
self.conv1_bn = nn.BatchNorm2d(mid_channels)
self.conv2 = nn.Conv2d(mid_channels,mid2_channels,kernel_size = 3, stride = 1, padding = 1)
self.conv2_bn = nn.BatchNorm2d(mid2_channels)
self.conv3 = nn.Conv2d(mid2_channels,out_channels,kernel_size = 3, stride = 1, padding = 1)
self.conv3_bn = nn.BatchNorm2d(out_channels)
if (in_channels != out_channels):
self.conv_shortcut = nn.Conv2d(in_channels, out_channels, kernel_size = 1, stride = 1, padding = 0 )
def forward(self, X):
X_shortcut = X
X = F.relu(self.conv1(X))
X = self.conv1_bn(X)
X = F.relu(self.conv2(X))
X = self.conv2_bn(X)
X = F.relu(self.conv2(X))
X = self.conv2_bn(X)
if (in_channels == out_channels):
X = self.conv3(X) + X_shortcut
else:
X = self.conv3(X) + self.conv_shortcut(X_shortcut)
X = self.conv3_bn(F.relu(x))
return X
This the method for generating a model using the given layers.
class TotalNet(nn.Module):
def __init__(self, Layers):
super().__init__()
self.hidden = nn.ModuleList()
self.hidden.append(nn.BatchNorm2d(1))
for i in range(0,len(Layers)-1,3):
in_channels, mid_channels, mid2_channels, out_channels = Layers[i:(i+4)]
self.hidden.append(ResNet(in_channels, mid_channels, mid2_channels, out_channels))
self.hidden.append(nn.Flatten())
def forward(self, X):
X = self.hidden(X)
return X
the following is how I am calling the function:
test = TotalNet([9,2,9,9,9,9,9,9,9,9])
a = torch.rand((1,9,9), dtype = torch.float32)
test(a)
I realized that I was passing the X to the nn.ModuleList. This is incorrect that the right way would be to apply X to the elements of nn.ModuleList and updating the values of X.
In other words, the forward function of TotalNet should be the following:
for operation in self.hidden:
X = operation(X)
return X

PyMC3- Custom theano Op to do numerical integration

I am using PyMC3 for parameter estimation using a particular likelihood function which has to be defined. I googled it and found out that I should use the densitydist method for implementing the user defined likelihood functions but it is not working. How to incorporate a user defined likelihood function in PyMC3 and to find out the maximum a posteriori (MAP) estimate for my model? My code is given below. Here L is the analytic form of my Likelihood function. I have some observational data for the radial velocity(vr) and postion (r) for some objects, which is imported from excel file.
data_ = np.array(pandas.read_excel('aaa.xlsx',header=None))
gamma=3.77;
G = 4.302*10**-6;
rmin = 3.0;
R = 95.7;
vr=data_[:,1];
r= data_[:,0];
h= np.pi;
class integrateOut(theano.Op):
def __init__(self,f,t,t0,tf,*args,**kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self,*inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f,self.fvars)
except:
self.gradF = None
return theano.Apply(self,self.fvars,[tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t]+self.fvars,self.f)
output_storage[0][0] = quad(f,self.t0,self.tf,args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g,self.t,self.t0,self.tf)(*inputs)*grads[0] \
for g in self.gradF]
basic_model = pm.Model()
with basic_model:
M=[]
beta=[]
interval=0.01*10**12
M=pm.Uniform('M',
lower=0.5*10**12,upper=3.50*10**12,transform='interval')
beta=pm.Uniform('beta',lower=2.001,upper=2.999,transform='interval')
gamma=3.77
logp=[]
arr=[]
vnew=[]
rnew=[]
theta = tt.scalar('theta')
beta = tt.scalar('beta')
z = tt.cos(theta)**(2*( (gamma/(beta - 2)) - 3/2) + 3)
intZ = integrateOut(z,theta,-(np.pi)/2,(np.pi)/2)(beta)
gradIntZ = tt.grad(intZ,[beta])
funcIntZ = theano.function([beta],intZ)
funcGradIntZ = theano.function([beta],gradIntZ)
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
rnew.append(r[j]+(0.05*r[j]*float(dm.Decimal(rm.randrange(1,
20))/10)));
vn=np.array(vnew)
rn=np.array(rnew)
for beta in np.arange (2.01,2.99,0.01):
for M in np.arange (0.5,2.50,0.01):
i=np.arange(0,59,1)
q =( gamma/(beta - 2)) - 3/2
B = (G*M*10**12)/((beta -2 )*( R**(3 - beta)))
K = (gamma - 3)/((rmin**(3 - gamma))*funcIntZ(beta)*m.sqrt(2*B))
logp= -np.log(K*((1 -(( 1/(2*B) )*((vn[i]**2)*rn[i]**(beta -
2))))**(q+1))*(rn[i]**(1-gamma +(beta/2))))
arr.append(logp.sum())
def logp_func(rn,vn):
return min(np.array(arr))
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP(model=basic_model)
step = pm.Metropolis()
basicmodeltrace = pm.sample(10000, step=step,
start=start,random_seed=1,progressbar=True)
print(pm.summary(basicmodeltrace))
map_estimate = pm.find_MAP(model=basic_model)
print(map_estimate)
I am getting the following error message:
ValueError: Cannot compute test value: input 0 (theta) of Op
Elemwise{cos,no_inplace}(theta) missing default value.
Backtrace when that variable is created:
I am unable to get the output since the numerical integration is not working. I have used custom theano op for numerical integration code which i got from Custom Theano Op to do numerical integration . The integration works if I run it seperately inputting a particular value of beta, but not within the model.
I made a few changes to your code, this still does not work, but I hope it is closer to a solution. Please check this thread, as someone is trying so solve essentially the same problem.
class integrateOut(theano.Op):
def __init__(self, f, t, t0, tf,*args, **kwargs):
super(integrateOut,self).__init__()
self.f = f
self.t = t
self.t0 = t0
self.tf = tf
def make_node(self, *inputs):
self.fvars=list(inputs)
try:
self.gradF = tt.grad(self.f, self.fvars)
except:
self.gradF = None
return theano.Apply(self, self.fvars, [tt.dscalar().type()])
def perform(self,node, inputs, output_storage):
args = tuple(inputs)
f = theano.function([self.t] + self.fvars,self.f)
output_storage[0][0] = quad(f, self.t0, self.tf, args=args)[0]
def grad(self,inputs,grads):
return [integrateOut(g, self.t, self.t0, self.tf)(*inputs)*grads[0] \
for g in self.gradF]
gamma = 3.77
G = 4.302E-6
rmin = 3.0
R = 95.7
vr = data[:,1]
r = data[:,0]
h = np.pi
interval = 1E10
vnew = []
rnew = []
for j in np.arange(0,59,1):
vnew.append(vr[j]+(0.05*vr[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
rnew.append(r[j]+(0.05*r[j] * float(dm.Decimal(rm.randrange(1, 20))/10)))
vn = np.array(vnew)
rn = np.array(rnew)
def integ(gamma, beta, theta):
z = tt.cos(theta)**(2*((gamma/(beta - 2)) - 3/2) + 3)
return integrateOut(z, theta, -(np.pi)/2, (np.pi)/2)(beta)
with pm.Model() as basic_model:
M = pm.Uniform('M', lower=0.5*10**12, upper=3.50*10**12)
beta = pm.Uniform('beta', lower=2.001, upper=2.999)
theta = pm.Normal('theta', 0, 10**2)
def logp_func(rn,vn):
q = (gamma/(beta - 2)) - 3/2
B = (G*M*1E12) / ((beta -2 )*(R**(3 - beta)))
K = (gamma - 3) / ((rmin**(3 - gamma)) * integ(gamma, beta, theta) * (2*B)**0.5)
logp = - np.log(K*((1 -((1/(2*B))*((vn**2)*rn**(beta -
2))))**(q+1))*(rn**(1-gamma +(beta/2))))
return logp.sum()
logpvar = pm.DensityDist("logpvar", logp_func, observed={"rn": rn,"vn":vn})
start = pm.find_MAP()
#basicmodeltrace = pm.sample()
print(start)

Resources