element 0 of tensors does not require grad and does not have a grad_fn. How can I avoid this with a custom loss function in pytorch? - pytorch

I want to implement the NCELoss in pytorch, but I can't get to work it. Here is the code so far:
class NCELoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super(NCELoss, self).__init__()
def forward(self, inputs, targets, smooth=1, batch_size=4, t=0.05):
reshaped_base = inputs[0].reshape(inputs[0].shape[0]*inputs[0].shape[2],inputs[0].shape[1])
reshaped_aug_0 = targets[0].reshape(targets[0].shape[0]*targets[0].shape[2],targets[0].shape[1])
reshaped_aug_1 = targets[1].reshape(targets[1].shape[0]*targets[1].shape[2],targets[1].shape[1])
reshaped_aug_2 = targets[2].reshape(targets[2].shape[0]*targets[2].shape[2],targets[2].shape[1])
reshaped_aug_3 = targets[3].reshape(targets[3].shape[0]*targets[3].shape[2],targets[3].shape[1])
reshaped_aug_0 = pad(reshaped_aug_0, (0, 0, 256, 256), 'constant', 0)
reshaped_aug_1 = pad(reshaped_aug_1, (0, 0, 256, 256), 'constant', 0)
reshaped_aug_2 = pad(reshaped_aug_2, (0, 0, 256, 256), 'constant', 0)
reshaped_aug_3 = pad(reshaped_aug_3, (0, 0, 256, 256), 'constant', 0)
sim_0 = cosine_similarity(reshaped_aug_0.to(0),reshaped_base.to(0))
sim_0 = sim_0[sim_0 != 0].mean()
sim_1 = cosine_similarity(reshaped_aug_1.to(0),reshaped_base.to(0))
sim_1 = sim_1[sim_1 != 0].mean()
sim_2 = cosine_similarity(reshaped_aug_2.to(0),reshaped_base.to(0))
sim_2 = sim_2[sim_2 != 0].mean()
sim_3 = cosine_similarity(reshaped_aug_3.to(0),reshaped_base.to(0))
sim_3 = sim_3[sim_3 != 0].mean()
positive = torch.exp(sim_0/t)
negative_1 = torch.exp(sim_1/t)
negative_2 = torch.exp(sim_2/t)
negative_3 = torch.exp(sim_3/t)
loss = - torch.log((positive/(negative_1 + negative_2 + negative_3)))
return loss
Then loss.backward().
So I have two tensors, and first I reshape them, then I pad one of the tensors since they have different sizes, and without it I can't apply the cosine similarity function on them. After that, I do further operations, and return the loss. Howewer, I can't calculate the gradient. I can't understand the exact reason, since I'm only using torch operations.
Any help is highly appreciated. I want to note that I'm still a beginner and there might be mistakes.

Related

How do I test a complex CNN model on a new image?

I am learning CNN and I have found a script online that classifies building rooftops from satellite images. The script works just fine but I am not able to figure out a way to test the script on a new single image. I am showing the code briefly and then I will show what I have tried:
seq = iaa.Sequential([
iaa.imgcorruptlike.Fog(severity=1),
iaa.imgcorruptlike.Spatter(severity =1),
])
batch_size = 16
size = 512
epochs =50
version = 1 # version 2 for MobilV2unet
data_augmentation = True
model_type = 'UNet%d' % (version)
translearn = True
from tensorflow.keras.applications import MobileNetV2
def m_u_net(input_shape):
inputs = Input(shape=input_shape, name="input_image")
encoder = MobileNetV2(input_tensor=inputs, weights="imagenet", include_top=False, alpha=1.3)
#encoder.trainable=False
skip_connection_names = ["input_image", "block_1_expand_relu", "block_3_expand_relu", "block_6_expand_relu"]
encoder_output = encoder.get_layer("block_13_expand_relu").output
f = [16, 32, 48, 64]
x = encoder_output
for i in range(1, len(skip_connection_names)+1, 1):
x_skip = encoder.get_layer(skip_connection_names[-i]).output
x = UpSampling2D((2, 2))(x)
x = Concatenate()([x, x_skip])
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(f[-i], (3, 3), padding="same")(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(1, (1, 1), padding="same")(x)
x = Activation("sigmoid")(x)
model = Model(inputs, x)
return model
def load_rasters_simple(path, pathX, pathY ): # Subset from original raster with extent and upperleft coord
"""Load training data pairs (two high resolution images and two low resolution images)"""
pathXabs = os.path.join(path, pathX)
pathYabs = os.path.join(path, pathY)
le = len(os.listdir(pathXabs) )
stackX = []
stackY = []
for i in range(0, le):
fileX = os.path.join(pathXabs, os.listdir(pathXabs)[i])
fileY = os.path.join(pathYabs, os.listdir(pathXabs)[i])
dataX = gdal_array.LoadFile(fileX) #.astype(np.int),ysize=extent[1],xsize=extent[0]
stackX.append(dataX)
dataY = gdal_array.LoadFile(fileY) #.astype(np.int),ysize=extent[1],xsize=extent[0]
stackY.append(dataY)
stackX = np.array(stackX)
stackY = np.array(stackY)
return stackX, stackY
X, Y= load_rasters_simple('/Users/vaibhavsaxena/Desktop/segmentation/Classification/Satellite dataset ó± (global cities)','image','label')
def slice (arr, size, inputsize,stride):
result = []
if stride is None:
stride = size
for i in range(0, (inputsize-size)+1, stride):
for j in range(0, (inputsize-size)+1, stride):
s = arr[i:(i+size),j:(j+size), ]
result.append(s)
result = np.array(result)
return result
def batchslice (arr, size, inputsize, stride, num_img):
result = []
for i in range(0, num_img):
s= slice(arr[i,], size, inputsize, stride )
result.append(s )
result = np.array(result)
result = result.reshape(result.shape[0]*result.shape[1], result.shape[2], result.shape[3], -1)
return result
Y=batchslice(Y, size, Y.shape[1], size, Y.shape[0]).squeeze()
X_cl =batchslice(X_cl, size, X_cl.shape[1], size, X_cl.shape[0])
X_train = X_cl[:int(X_cl.shape[0]*0.8),]
Y_train = Y[:int(Y.shape[0]*0.8),]
X_test = X_cl[int(X_cl.shape[0]*0.8)+1:,]
Y_test = Y[int(Y.shape[0]*0.8)+1:,]
THEN the big unet model architecture. The whole script can be found here.
This model just works fine with the dataset. I am trying to test it with my own out of dataset image and this is what I have tried:
model = load_model('no_aug_unet_model.h5', custom_objects=dependencies)
model.compile(loss='binary_crossentropy', metrics=[iou],
optimizer=Adam(learning_rate=lr_schedule(0)))
from keras.preprocessing import image
test_image= image.load_img('bangkok_noi_2.jpg', target_size = (2000, 2000))
test_image = image.img_to_array(test_image)
test_image1 = test_image.reshape((1,2000,2000,3))
testpre = model.predict(test_image1)
img = Image.fromarray(test_image, 'RGB')
img.show()
The original shape of my test image is (1852, 3312, 3).
I am getting a weirdly predicted image that makes no sense unlike the expectations. I believe, I am doing the wrong preprocessing with my test image. Any help would be extremely appreciated.
The whole script can be found here.

How to create a concrete function for getting coordinates of a masked image and convert it to tensorflow lite model

How to create concrete function in tensorflow to get semantic segmentation image and calculate co-ordinates.
We are having current code for the same but not able to concrete function as we wanted to convert in tflite file for mobile application.
def predict_new_image(img_path, model):
img = load_img(img_path, grayscale=True)
x_img = img_to_array(img)
x_img = resize(x_img, (128, 128, 1), mode='constant', preserve_range=True)
X = np.zeros((1, 128, 128, 1), dtype=np.float32)
y_img = np.zeros((1, 128, 128, 1), dtype=np.float32)
X[0, ..., 0] = x_img.squeeze() / 255
pred = model.predict(X)
preds_img = (pred > 0.5).astype(np.uint8)
img_arr = preds_img[:,:,0]
# get the coordinates where the pixel isn't white (at a threshold)
black_thres = 1
idx = [(i,j) for i,x in enumerate(img_arr) for j,y in enumerate(x) if img_arr[i,j]==black_thres]
return idx

Keras layer asks for different shape than in the summary

I'm writing a U-net CNN in keras, and trying to use fit_generator for training. In order for this to work, I used a generator script, that could feed the images and labels for my network (simple fit function is working but I want to train a big dataset which cannot fit into the memory).
My problem is that in the model summary, it says correctly that, the output layer has a shape: (None, 288, 512, 4)
https://i.imgur.com/69xG8pO.jpg
but when I try actual training I get this error:
https://i.imgur.com/j7H6sHX.jpg
I don't get why keras wants (288, 512, 1) when in the summary it expects (288, 512, 4)
I tried it with my own unet code, and copied a working code from github also, but both of them has the exact same problem which leads me to believe that my generator script is the weak link. Below is the code I used (the image and label array functions used here were already working when I used them with "fit" in a previous CNN):
def generator(img_path, label_path, batch_size, height, width, num_classes):
input_pairs = get_pairs(img_path, label_path) # rewrite if param name changes
random.shuffle(input_pairs)
iterate_pairs = itertools.cycle(input_pairs)
while True:
X = []
Y = []
for _ in range(batch_size):
im, lab = next(iterate_pairs)
appended_im = next(iter(im))
appended_lab = next(iter(lab))
X.append(input_image_array(appended_im, width, height))
Y.append(input_label_array(appended_lab, width, height, num_classes, palette))
yield (np.array(X), np.array(Y))
I tried the generator out and the provided batches has the shapes of (for batch size of 15):
(15, 288, 512, 3)
(15, 288, 512, 4)
So I really do not know what could be the problem here.
EDIT: Here is the model code I used:
def conv_block(input_tensor, n_filter, kernel=(3, 3), padding='same', initializer="he_normal"):
x = Conv2D(n_filter, kernel, padding=padding, kernel_initializer=initializer)(input_tensor)
x = BatchNormalization()(x)
x = Activation("relu")(x)
x = Conv2D(n_filter, kernel, padding=padding, kernel_initializer=initializer)(x)
x = BatchNormalization()(x)
x = Activation("relu")(x)
return x
def deconv_block(input_tensor, residual, n_filter, kernel=(3, 3), strides=(2, 2), padding='same'):
y = Conv2DTranspose(n_filter, kernel, strides, padding)(input_tensor)
y = concatenate([y, residual], axis=3)
y = conv_block(y, n_filter)
return y
# NETWORK - n_classes is the desired number of classes, filters are fixed
def Unet(input_height, input_width, n_classes=4, filters=64):
# Downsampling
input_layer = Input(shape=(input_height, input_width, 3), name='input')
conv_1 = conv_block(input_layer, filters)
conv_1_out = MaxPooling2D(pool_size=(2, 2))(conv_1)
conv_2 = conv_block(conv_1_out, filters*2)
conv_2_out = MaxPooling2D(pool_size=(2, 2))(conv_2)
conv_3 = conv_block(conv_2_out, filters*4)
conv_3_out = MaxPooling2D(pool_size=(2, 2))(conv_3)
conv_4 = conv_block(conv_3_out, filters*8)
conv_4_out = MaxPooling2D(pool_size=(2, 2))(conv_4)
conv_4_drop = Dropout(0.5)(conv_4_out)
conv_5 = conv_block(conv_4_drop, filters*16)
conv_5_drop = Dropout(0.5)(conv_5)
# Upsampling
deconv_1 = deconv_block(conv_5_drop, conv_4, filters*8)
deconv_1_drop = Dropout(0.5)(deconv_1)
deconv_2 = deconv_block(deconv_1_drop, conv_3, filters*4)
deconv_2_drop = Dropout(0.5)(deconv_2)
deconv_3 = deconv_block(deconv_2_drop, conv_2, filters*2)
deconv_3 = deconv_block(deconv_3, conv_1, filters)
# Output - mapping each 64-component feature vector to number of classes
output = Conv2D(n_classes, (1, 1))(deconv_3)
output = BatchNormalization()(output)
output = Activation("softmax")(output)
# embed into functional API
model = Model(inputs=input_layer, outputs=output, name="Unet")
return model
Change your loss to categorical_crossentropy.
When using the sparse_categorical_crossentropy loss, your targets
should be integer targets.

Error: shape '[-1, 270000]' is invalid for input of size 1440000

I got an error:
shape '[-1, 270000]' is invalid for the input of size 1440000
while running my code for a CNN structure input tensor size is 64.
Class MyNet(nn.Module):
def __init__(self):
super(MyNet, self).__init__()
self.conv1 = nn.Conv2d(3, 48, 2)
self.conv2 = nn.Conv2d(48, 108, 2)
self.conv3 = nn.Conv2d(108, 192, 2)
self.conv4 = nn.Conv2d(192, 300, 2)
self.pool = nn.MaxPool2d(2, 2)
self.fc1 = nn.Linear(300* 30* 30, 864)
self.fc2 = nn.Linear(864, 288)
self.fc3 = nn.Linear(288, 2)
def forward(self, x):
x = F.relu(self.conv1(x))
x = F.relu(self.conv2(x))
x = F.relu(self.conv3(x))
x = F.relu(self.conv4(x))
#x = self.pool(F.relu(self.conv4(x)))
x = self.pool(x)
x = x.view(-1, 300 * 30* 30)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return F.log_softmax(x)
Any idea why I am getting above error?
Because after your max pooling layer, the shape of feature map is (300, width, height), and 300*width*height != 300*30*30. If you want to reshape the tensor, you must keep the same number of elements.
The view operation which should flatten x is throwing this error, since the size of 300*30*30 is not matching your activation size. Most likely your custom dataset has a different spatial size, such that the view is failing.
Based on the shape given in the error message, it looks like your activation should have the shape [batch_size=3, channels=300, height=40, width=40], which results in 1440000 values. Try to change the input size in your linear layer to 300*40*40 like this:
self.fc1 = nn.Linear(300*40*40, 864)
and the flattening to:
x = x.view(x.size(0), 300*40*40)
Please, notify me if this doesn't work.

How to fix specific weight at embedding layer

I want to implement cnn sentence classification and padding 0 for variation length sentences.
But it is not reasonalble for this task because 0 will be treated as a vocabulary.
To solve this problem, I want to fix weight all zero for 0 map to word-vector in embedding layer,and other vocabular still can be trained.
Reference paper: https://arxiv.org/abs/1408.5882
Following is my current code:
w2v_weight = np.array(list(data['id_vec'].values()))
# add zeros in first dim:
zero = np.zeros((1,w2v_weight.shape[1]))
w2v_weight = np.concatenate((zero,w2v_weight),axis=0)
embedding_layer = Embedding(len(data["word_id"]) + 1, 300, weights= [w2v_weight],
input_length=data['x_test'].shape[1], trainable=True)
embedding_layer2 = Embedding(len(data["word_id"]) + 1, 300, weights= [w2v_weight],
input_length=data['x_test'].shape[1], trainable=False)
model_input = Input(shape=(None,),dtype='int32')
embedded_sequences2 = embedding_layer2(model_input)
embedded_sequences1 = embedding_layer(model_input)
ebd_cct = Concatenate()([embedded_sequences1,embedded_sequences2])
conv1 = Convolution1D(filters=100,kernel_size = 3,padding="same")(ebd_cct)
conv2 = Convolution1D(filters=100,kernel_size = 4,padding="same")(ebd_cct)
conv3 = Convolution1D(filters=100,kernel_size = 5,padding="same")(ebd_cct)
conv_a = Concatenate()([conv1,conv2,conv3])
conv_a = Activation("relu")(conv_a)
conv_add = GlobalMaxPool1D()(conv_a)
z = Dropout(0.5)(conv_add)
model_output = Dense(4, activation="softmax",kernel_constraint = max_norm(3.))(z)
model_1_two = Model(model_input, model_output)
model_1_two.summary()
model_1_two.compile(loss="categorical_crossentropy", optimizer="Adadelta",
metrics=['acc'])
history_1_two = model_1_two.fit(data["x_train"], data["y_train"],shuffle=True,
callbacks = [EarlyStopping(monitor="val_acc",patience=50)],
batch_size=50, epochs=20000,validation_data=(data["x_test"], data["y_test"]))
From your code I assume you're using Keras. Then, you can define the use of 0 as the out-of-vocabulary (OOV) index by setting mask_zero to True:
embedding_layer = Embedding(len(data["word_id"]) + 1, 300, weights= [w2v_weight],
input_length=data['x_test'].shape[1], trainable=True,
mask_zero=True)
For more information, see the documentation for Embedding().

Resources