NotImplementedError: You must implement the backward function for custom autograd.Function - pytorch

class Layer(nn.Module):
...
def forward(self,pxo):
p, x, o = pxo # (n,3),(n,in_planes),(b)
n, c = x.shape
npoints = self.npoints
v = self.linear_v(x) # (n, c)
sampling_offsets = self.sampling_offsets(x) # (n,npoints * 3)
sampling_offsets = sampling_offsets.reshape(n * npoints, 3) # (n * npoints, 3)
sampling_offsets /= self.resolution # (n * npoints,3)
sampling_offsets = sampling_offsets.reshape(n, npoints, 3) # (n, npoints, 3)
sampling_positions = (sampling_offsets + p.unsqueeze(dim=1)).reshape(-1, 3) # (n * npoints, 3)
attention_weights = self.attention_weights(x) # (n,npoints)
attention_weights = self.softmax(attention_weights) # (n, npoints)
new_o = o * npoints
v = pointops.interpolation(p, sampling_positions, v, o, new_o).reshape(n,npoints,c)
v = v.transpose(1,2).contiguous() #(n, c, npoints)
h_v = torch.matmul(v, attention_weights.unsqueeze(-1)).reshape(n, c) # (n, nheads * c)
x = self.linear_output(h_v)
return x
Once I use this layer, there will be an error. If I switch to another network, there will be not. I am curious about how this model causes this error?
def interpolation(xyz, new_xyz, feat, offset, new_offset, k=3):
"""
input: xyz: (m, 3), new_xyz: (n, 3), feat: (m, c), offset: (b), new_offset: (b)
output: (n, c)
"""
assert xyz.is_contiguous() and new_xyz.is_contiguous() and feat.is_contiguous()
idx, dist = knnquery(k, xyz, new_xyz, offset, new_offset) # (n, 3), (n, 3)
dist_recip = 1.0 / (dist + 1e-8) # (n, 3)
norm = torch.sum(dist_recip, dim=1, keepdim=True)
weight = dist_recip / norm # (n, 3)
new_feat = torch.cuda.FloatTensor(new_xyz.shape[0], feat.shape[1]).zero_()
for i in range(k):
new_feat += feat[idx[:, i].long(), :] * weight[:, i].unsqueeze(-1)
return new_feat
What I use is nn.CrossEntropyLoss().
I can get the loss, but loss.backward() will report error.

Related

DFS vs. Kruskal runtime (maze generation)

I have written two algorithms for creating unique mazes, one of them using depth-first-search (DFS) and the other using Kruskal's. The DFS algorithm performs as expected, however Kruskal's algorithm runs marginally slower than DFS and I do not know why.
I had written Kruskal's algorithm in Python.
I suspect the random.choice() function seems to be the underlying problem. The difference in runtime becomes noticeable when (r, c) > 30.
Here is the code for Kruskal's algorithm:
# Create a list of all possible edges
def create_edges(r, c):
edges = []
for y in range(r):
for x in range(c):
i = (y, x)
for d in ((0, 1), (0, -1), (1, 0), (-1, 0)):
p = tuple(map(sum, zip(d, i)))
py = p[0]
px = p[1]
if px in range(c) and py in range(r):
edges.append([i, p])
return edges
def kruskal(r, c, sz):
path = []
# Create a list of parent root nodes
roots = {(y, x) : [(y, x)] for y in range(r) for x in range(c)}
edges = create_edges(r, c)
while edges:
# Choose a random edge
edge = random.choice(edges)
parent = edge[0]
child = edge[1]
parent_set = get_set(roots, parent)
child_set = get_set(roots, child)
# Check if the parent / child are already in the same set
if parent_set == child_set:
rev_edge = edge.reverse()
if rev_edge in edges:
edges.remove(rev_edge)
edges.remove(edge)
continue
roots[parent_set] += roots[child_set]
roots.pop(child_set)
path.extend((parent, child))
rev_edge = edge.reverse()
if rev_edge in edges:
edges.remove(rev_edge)
edges.remove(edge)
return path
def get_set(roots, member):
s = None
for parent, children in roots.items():
if member in children:
s = parent
return s
def create_maze(t, r, c, sz):
maze = [['|_' for _ in range(c)] for _ in range(r)]
for cell in maze: cell.append('| ')
wd = {'DOWN' : ( 1, 0),
'UP' : (-1, 0),
'LEFT' : ( 0, -1),
'RIGHT': ( 0, 1)}
for n in range(len(t) - 1):
a = n
b = n + 1
p1 = t[a]
p2 = t[b]
ay, ax = p1[0], p1[1]
by, bx = p2[0], p2[1]
w = tuple(numpy.array(p2) - numpy.array(p1))
if w in wd.values():
k = list(wd.keys())[list(wd.values()).index(w)]
if k == 'DOWN': maze[ay][ax] = maze[ay][ax].replace('_', ' ')
if k == 'UP': maze[by][bx] = maze[by][bx].replace('_', ' ')
if k == 'LEFT': maze[ay][ax] = maze[ay][ax].replace('|', ' ')
if k == 'RIGHT': maze[by][bx] = maze[by][bx].replace('|', ' ')
return maze
def print_maze(maze, r, c, delay = 0):
s, l = min((r, c)), max((r, c))
a = 1 / (4 * r * c)
e = (1 / (s * l)) ** 2
delay = (a * 2.718 ** (-1 * e)) ** 0.5
time.sleep(delay)
print(' _' * c)
for iy in range(r):
for ix in range(c + 1):
print(maze[iy][ix], end = '')
print('')
print('')
def main():
r = 30
c = 30
sz = r * c
path = kruskal(r, c, sz)
maze = create_maze(path, r, c, sz)
print_maze(maze, r, c)
if __name__ == "__main__":
main()

ViVIT PyTorch: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15

I am trying to run Video Vision Transformer (ViViT) code with my dataset but getting an error using CrossEntropyLoss from Pytorch as the Loss function.
There are 6 classes I have:
['Run', 'Sit', 'Walk', 'Wave', 'Sit', 'Stand']
Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-9, momentum=0.9)
Class Weights
tensor([0.0045, 0.0042, 0.0048, 0.0038, 0.0070, 0.0065])
Loss Function
loss_func = nn.CrossEntropyLoss(weight=class_weights.to(device))
Code Throwning Error
train_epoch(model, optimizer, train_loader, train_loss_history, loss_func)
Error
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
Code Calling the transformer
model = ViViT(224, 16, 100, 16).cuda()
Getting Video Frames
def get_frames(filename, n_frames=1):
frames = []
v_cap = cv2.VideoCapture(filename)
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = np.linspace(0, v_len - 1, n_frames + 1, dtype=np.int16)
frame_dims = np.array([224, 224, 3])
for fn in range(v_len):
success, frame = v_cap.read()
if success is False:
continue
if (fn in frame_list):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (frame_dims[0], frame_dims[1]))
frames.append(frame)
v_cap.release()
return frames, v_len
Dataset Preprocessing
class DatasetProcessing(data.Dataset):
def __init__(self, df, root_dir):
super(DatasetProcessing, self).__init__()
# List of all videos path
video_list = df["Video"].apply(lambda x: root_dir + '/' + x)
self.video_list = np.asarray(video_list)
self.df = df
def __getitem__(self, index):
# Ensure that the raw videos are in respective folders and folder name matches the output class label
video_label = self.video_list[index].split('/')[-2]
video_name = self.video_list[index].split('/')[-1]
video_frames, len_ = get_frames(self.video_list[index], n_frames = 15)
video_frames = np.asarray(video_frames)
video_frames = video_frames/255
class_list = ['Run', 'Walk', 'Wave', 'Sit', 'Turn', 'Stand']
class_id_loc = np.where(class_list == video_label)
label = class_id_loc
d = torch.as_tensor(np.array(video_frames).astype('float'))
l = torch.as_tensor(np.array(label).astype('float'))
return (d, l)
def __len__(self):
return self.video_list.shape[0]
Training Epochs
def train_epoch(model, optimizer, data_loader, loss_history, loss_func):
total_samples = len(data_loader.dataset)
model.train()
for i, (data, target) in enumerate(data_loader):
optimizer.zero_grad()
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
pred = model(data.float())
output = F.log_softmax(pred, dim=1)
loss = loss_func(output, target.squeeze(1))
loss.backward()
optimizer.step()
if i % 100 == 0:
print('[' + '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(total_samples) +
' (' + '{:3.0f}'.format(100 * i / len(data_loader)) + '%)] Loss: ' +
'{:6.4f}'.format(loss.item()))
loss_history.append(loss.item())
Evaluate Model
def evaluate(model, data_loader, loss_history, loss_func):
model.eval()
total_samples = len(data_loader.dataset)
correct_samples = 0
total_loss = 0
with torch.no_grad():
for data, target in data_loader:
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
output = F.log_softmax(model(data.float()), dim=1)
loss = loss_func(output, target)
_, pred = torch.max(output, dim=1)
total_loss += loss.item()
correct_samples += pred.eq(target).sum()
avg_loss = total_loss / total_samples
loss_history.append(avg_loss)
print('\nAverage test loss: ' + '{:.4f}'.format(avg_loss) +
' Accuracy:' + '{:5}'.format(correct_samples) + '/' +
'{:5}'.format(total_samples) + ' (' +
'{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
Transformer
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
super().__init__()
self.layers = nn.ModuleList([])
self.norm = nn.LayerNorm(dim)
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return self.norm(x)
ViViT Code
class ViViT(nn.Module):
def __init__(self, image_size, patch_size, num_classes, num_frames, dim = 192, depth = 4, heads = 3, pool = 'cls', in_channels = 3, dim_head = 64, dropout = 0.,
emb_dropout = 0., scale_dim = 4, ):
super().__init__()
assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
num_patches = (image_size // patch_size) ** 2
patch_dim = in_channels * patch_size ** 2
self.to_patch_embedding = nn.Sequential(
Rearrange('b t c (h p1) (w p2) -> b t (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
nn.Linear(patch_dim, dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_frames, num_patches + 1, dim))
self.space_token = nn.Parameter(torch.randn(1, 1, dim))
self.space_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.temporal_token = nn.Parameter(torch.randn(1, 1, dim))
self.temporal_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.dropout = nn.Dropout(emb_dropout)
self.pool = pool
self.mlp_head = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, num_classes)
)
def forward(self, x):
x = self.to_patch_embedding(x)
b, t, n, _ = x.shape
cls_space_tokens = repeat(self.space_token, '() n d -> b t n d', b = b, t=t)
x = torch.cat((cls_space_tokens, x), dim=2)
x += self.pos_embedding[:, :, :(n + 1)]
x = self.dropout(x)
x = rearrange(x, 'b t n d -> (b t) n d')
x = self.space_transformer(x)
x = rearrange(x[:, 0], '(b t) ... -> b t ...', b=b)
cls_temporal_tokens = repeat(self.temporal_token, '() n d -> b n d', b=b)
x = torch.cat((cls_temporal_tokens, x), dim=1)
x = self.temporal_transformer(x)
x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
return self.mlp_head(x)
Multi target appears to be a feature supported since version 1.10.0.
https://discuss.pytorch.org/t/crossentropyloss-vs-per-class-probabilities-target/138331
Please check your pytorch version.
Please refer to the example of using the UTF101 top5 dataset, which is available on my Colab. The version of pytorch is 1.12.0+cu113, and the code you listed was able to run the training almost exactly as it was written.

convert Tensorflow1 to Tensorflow 2

there is a code written with tensorflow1 on this link.
https://github.com/carlthome/tensorflow-convlstm-cell/blob/master/cell.py
I want to use this class as a layer in TensorFlow.Keras. So it should be written with TensorFlow version 2.
How can do it?
this is this code:
import tensorflow as tf
class ConvLSTMCell(tf.nn.rnn_cell.RNNCell):
"""A LSTM cell with convolutions instead of multiplications.
Reference:
Xingjian, S. H. I., et al. "Convolutional LSTM network: A machine learning approach for precipitation nowcasting." Advances in Neural Information Processing Systems. 2015.
"""
def __init__(self, shape, filters, kernel, forget_bias=1.0, activation=tf.tanh, normalize=True, peephole=True, data_format='channels_last', reuse=None):
super(ConvLSTMCell, self).__init__(_reuse=reuse)
self._kernel = kernel
self._filters = filters
self._forget_bias = forget_bias
self._activation = activation
self._normalize = normalize
self._peephole = peephole
if data_format == 'channels_last':
self._size = tf.TensorShape(shape + [self._filters])
self._feature_axis = self._size.ndims
self._data_format = None
elif data_format == 'channels_first':
self._size = tf.TensorShape([self._filters] + shape)
self._feature_axis = 0
self._data_format = 'NC'
else:
raise ValueError('Unknown data_format')
#property
def state_size(self):
return tf.nn.rnn_cell.LSTMStateTuple(self._size, self._size)
#property
def output_size(self):
return self._size
def call(self, x, state):
c, h = state
x = tf.concat([x, h], axis=self._feature_axis)
n = x.shape[-1].value
m = 4 * self._filters if self._filters > 1 else 4
W = tf.get_variable('kernel', self._kernel + [n, m])
y = tf.nn.convolution(x, W, 'SAME', data_format=self._data_format)
if not self._normalize:
y += tf.get_variable('bias', [m], initializer=tf.zeros_initializer())
j, i, f, o = tf.split(y, 4, axis=self._feature_axis)
if self._peephole:
i += tf.get_variable('W_ci', c.shape[1:]) * c
f += tf.get_variable('W_cf', c.shape[1:]) * c
if self._normalize:
j = tf.contrib.layers.layer_norm(j)
i = tf.contrib.layers.layer_norm(i)
f = tf.contrib.layers.layer_norm(f)
f = tf.sigmoid(f + self._forget_bias)
i = tf.sigmoid(i)
c = c * f + i * self._activation(j)
if self._peephole:
o += tf.get_variable('W_co', c.shape[1:]) * c
if self._normalize:
o = tf.contrib.layers.layer_norm(o)
c = tf.contrib.layers.layer_norm(c)
o = tf.sigmoid(o)
h = o * self._activation(c)
state = tf.nn.rnn_cell.LSTMStateTuple(c, h)
return h, state

Failure to achieve an effective speed increase while Cythonizing the python 3 code

I cythonized the Python 3 code, but I failed to speeding up it. Time elapsed during the pure Python 3 code's execution is ~29 seconds while the cythonized code's is ~25 seconds (details are given below). Where did I go wrong in the cythonized code. I will be glad if you help me. I added below the pure Python 3 code, the cythonized code and the setup file, respectively.
Python version: 3.7.5
Cython version: 0.29.14
Editor: Pycharm
OS: Windows 10
The code runs 100 times in for loop. Size of the used arrays at each loop are below:
velos = 3300
V = 3300
S = 3300 x 3300
vels = 201
line_centers (in masks) = ~100
If necessary, I can add a sample data to this post.
import numpy as np
import numpy.linalg as la
def lsd(velos, V, S, vels, masks, Lambda=0.):
m, n = len(vels), len(velos)
Nmask = len(masks)
V = V - 1
M = np.zeros((n, m * len(masks)))
for N, (line_centers, weights) in enumerate(masks):
for l, lc in enumerate(line_centers):
vi = velos - lc
for j in range(m - 1):
w = np.argwhere((vi < vels[j + 1]) & (vi > vels[j])).T[0]
if len(w) == 0: continue
M[w, j + N * m] = weights[l] * (vels[j + 1] - vi[w]) / (vels[j + 1] - vels[j])
M[w, j + 1 + N * m] = weights[l] * (vi[w] - vels[j]) / (vels[j + 1] - vels[j])
if np.abs(np.sum(M)) < 1e-8:
return np.zeros((1, len(vels)))
if Lambda:
R = np.zeros((m * Nmask, m * Nmask))
for i in range(1, m-1):
R[i, i] = 2
R[i-1, i] = -1
R[i+1, i] = -1
R[0, 0] = 1
R[1, 0] = -1
R[-1, -1] = 1
R[-2, -1] = -1
X = np.matmul(M.T, (S**2))
XM = np.matmul(X, M)
if Lambda:
XM = XM + Lambda * R
cc = np.matmul(X, V)
Z, res, rank, s = la.lstsq(XM, cc, rcond=None)
# ZT = Z.T
# ccT = cc.T
# Z_ = []
# C_ = []
# for i in range(len(Z)):
# Z_.append([])
# C_.append([])
# for N in range(Nmask):
# Z_[-1].append(Z[i][N * m: (N + 1) * m])
# C_[-1].append(cc[i][N * m: (N + 1) * m])
return Z.T
import numpy as np
cimport numpy as np
import cython
# from libcpp.vector cimport vector
DTYPE = np.float
ctypedef np.double_t DTYPE_t
#cython.boundscheck(False)
# #cython.wraparound(False)
#cython.cdivision(False)
#cython.initializedcheck(True)
cpdef lsd(np.ndarray[DTYPE_t, ndim=1] velos, np.ndarray[DTYPE_t, ndim=2] V, np.ndarray[DTYPE_t, ndim=2] S,
np.ndarray[DTYPE_t, ndim=1] vels, np.ndarray[DTYPE_t, ndim=3] masks, float Lambda=0.):
cdef int m = vels.shape[0]
cdef int n = velos.shape[0]
cdef int Nmask = masks.shape[0]
cdef int N, l, j, i
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] M = np.zeros((n, m * Nmask), dtype=DTYPE)
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] R = np.zeros((m * Nmask, m * Nmask), dtype=DTYPE)
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] X
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] XM
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] cc
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] Z
cdef np.ndarray[DTYPE_t, ndim=1, mode='c'] line_centers, weights, vi
cdef np.ndarray[DTYPE_t, ndim=2, mode='c'] zeros = np.zeros((1, m), dtype=DTYPE)
cdef np.ndarray w
# cdef double lc
V = V - 1
for N in range(Nmask):
line_centers = masks[N][0]
weights = masks[N][1]
for l in range(len(line_centers)):
vi = velos - line_centers[l]
for j in range(m - 1):
# print(np.argwhere((vi < vels[j + 1]) & (vi > vels[j])).T[0])
w = np.argwhere((vi < vels[j + 1]) & (vi > vels[j])).T[0]
if len(w) == 0: continue
M[w, j + N * m] = weights[l] * (vels[j + 1] - vi[w]) / (vels[j + 1] - vels[j])
M[w, j + 1 + N * m] = weights[l] * (vi[w] - vels[j]) / (vels[j + 1] - vels[j])
if np.abs(np.sum(M)) < 1e-8:
return zeros
if Lambda:
for i in range(1, m-1):
R[i, i] = 2
R[i-1, i] = -1
R[i+1, i] = -1
R[0, 0] = 1
R[1, 0] = -1
R[-1, -1] = 1
R[-2, -1] = -1
X = np.matmul(M.T, (S**2))
XM = np.matmul(X, M)
if Lambda:
XM = XM + Lambda * R
cc = np.matmul(X, V)
Z, _, _, _ = np.linalg.lstsq(XM, cc, rcond=None)
# ZT = Z.T
# ccT = cc.T
# Z_ = []
# C_ = []
# for i in range(len(Z)):
# Z_.append([])
# C_.append([])
# for N in range(Nmask):
# Z_[-1].append(Z[i][N * m: (N + 1) * m])
# C_[-1].append(cc[i][N * m: (N + 1) * m])
return Z.T
from setuptools import setup
from Cython.Build import cythonize
import sys
import numpy
setup(
ext_modules=cythonize('LSD_Cythonize.pyx',
compiler_directives={'language_level' : sys.version_info[0]}),
include_dirs=[numpy.get_include()])

Implementing self attention

I am trying to implement self attention in Pytorch.
I need to calculate the following expressions.
Similarity function S (2 dimensional), P(2 dimensional), C'
S[i][j] = W1 * inp[i] + W2 * inp[j] + W3 * x1[i] * inp[j]
P[i][j] = e^(S[i][j]) / Sum for all j( e ^ (S[i]))
basically, P is a softmax function
C'[i] = Sum (for all j) P[i][j] * x1[j]
I tried the following code using for loops
for i in range(self.dim):
for j in range(self.dim):
S[i][j] = self.W1 * x1[i] + self.W2 * x1[j] + self.W3 * x1[i] * x1[j]
for i in range(self.dim):
for j in range(self.dim):
P[i][j] = torch.exp(S[i][j]) / torch.sum( torch.exp(S[i]))
# attend
for i in range(self.dim):
out[i] = 0
for j in range(self.dim):
out[i] += P[i][j] * x1[j]
Is there any faster way to implement this in Pytorch?
Here is an example of Self Attention I had implemented in Dual Attention for HSI Imagery
class PAM_Module(Module):
""" Position attention module https://github.com/junfu1115/DANet/blob/master/encoding/nn/attention.py"""
#Ref from SAGAN
def __init__(self, in_dim):
super(PAM_Module, self).__init__()
self.chanel_in = in_dim
self.query_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
self.key_conv = Conv2d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1)
self.value_conv = Conv2d(in_channels=in_dim, out_channels=in_dim, kernel_size=1)
self.gamma = Parameter(torch.zeros(1))
self.softmax = Softmax(dim=-1)
def forward(self, x):
"""
inputs :
x : input feature maps( B X C X H X W)
returns :
out : attention value + input feature
attention: B X (HxW) X (HxW)
"""
m_batchsize, C, height, width = x.size()
proj_query = self.query_conv(x).view(m_batchsize, -1, width*height).permute(0, 2, 1)
proj_key = self.key_conv(x).view(m_batchsize, -1, width*height)
energy = torch.bmm(proj_query, proj_key)
attention = self.softmax(energy)
proj_value = self.value_conv(x).view(m_batchsize, -1, width*height)
out = torch.bmm(proj_value, attention.permute(0, 2, 1))
out = out.view(m_batchsize, C, height, width)
out = self.gamma*out + x
#out = F.avg_pool2d(out, out.size()[2:4])
return out

Resources