Keras Softmax issues - keras

I am new to Keras and am bit confused at the moment:
def get_compiled_model():
model = tf.keras.Sequential([
tf.keras.layers.Dense(1000, input_shape = (1000,), activation='relu', kernel_initializer = 'glorot_uniform'),
tf.keras.layers.Dense(1000, activation='relu', kernel_initializer = 'glorot_uniform'),
tf.keras.layers.Dense(41, activation='softmax', kernel_initializer = 'glorot_uniform')
])
model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])
return model
I then call my model as follows:
model = get_compiled_model()
for i in range(10):
model.fit(train_object, epochs=10)
test_loss, test_acc = model.evaluate(test_object, verbose=2)
I keep getting 0 accuracy even after a lot of training. I think it is because the model is hardmaxing from the start:
for row in test_object.take(1):
row
print(model.predict(row[0])[0])
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0.], dtype=float32)
This behavior happens even at the beginning which is confusing since we would expect something with decimals rather than all 0's and 1's.
Any help would be appreciated. To rephrase the question I am confused to why the model is hardmaxing instead of softmazing.
0
UPDATE: messed around with the size of the model, decreasing the size of the model gave us:
array([0.02439025, 0.02439031, 0.02439018, 0.02439029, 0.02439014, 0.02438815, 0.02439025, 0.02439022, 0.02439038, 0.02439022, 0.02439025, 0.02439038, 0.0243915 , 0.02439023, 0.02439109, 0.02438496, 0.02439068, 0.02439134, 0.02439025, 0.02439033, 0.02438724, 0.02439025, 0.02439067, 0.02439027, 0.02439025, 0.02439088, 0.02439021, 0.02439019, 0.02439023, 0.02439035, 0.02439059, 0.02439025, 0.02439438, 0.02439116, 0.02439019, 0.02439001, 0.02439013, 0.02439059, 0.02439025, 0.02439023, 0.02439026], dtype=float32)
which is the desired effect. Any idea why larger net causes it to hardmax?
UPDATE 2:
def get_compiled_model():
model = tf.keras.Sequential([
tf.keras.layers.Dense(124, input_shape = (1000,), activation='relu',
kernel_initializer = 'glorot_uniform'),
tf.keras.layers.Dropout(0.8),
tf.keras.layers.Dense(256, input_shape = (1000,), activation='relu', kernel_initializer = 'glorot_uniform'),
tf.keras.layers.Dropout(0.8),
tf.keras.layers.Dense(41, activation='relu', kernel_initializer = 'glorot_uniform'),
tf.keras.layers.Softmax(-1)
])
model.compile(optimizer='Adam',
loss='categorical_crossentropy',
metrics=['categorical_accuracy'])
return model
The current issues is that it converges to having the same weights on all the options all the time:

What is train_object? It's possible you forgot to specify targets, e.g., the y parameters to the fit function.
fit(x=None, y=None, batch_size=None, epochs=1, verbose=1, callbacks=None, validation_split=0.0, validation_data=None, shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None, validation_freq=1, max_queue_size=10, workers=1, use_multiprocessing=False)

Related

torch.mul causes param.grad to be NoneType

class Net(torch.nn.Module):
def __init__(self, D_u, D_i, D_t, D_m):
super(Net, self).__init__()
self.lin_u = nn.Linear(D_u, 1)
self.lin_i = nn.Linear(D_i, 1)
self.lin_t = nn.Linear(D_t, 1)
self.lin_m = nn.Linear(D_m, 1)
self.output = nn.Linear(4, 1)
def forward(self, args):
(u, i, t, m) = args
u = F.relu(self.lin_u(u))
i = F.relu(self.lin_i(i))
t = F.relu(self.lin_t(t))
m = F.relu(self.lin_m(m))
out = torch.mul(u, i)
out = torch.mul(out, t)
out = torch.mul(out, m)
return out
I have this simple model class which has four inputs, each with its own linear layer. I want the output to be the product of the four nodes, but for some reason no matter how I multiply them (with torch.mul or *), the grad is always Nonetype:
model = Net(N, 3, T, 1)
u_block, i_block, t_block, m_block, y_block = get_data_new(data)
loss_fn = torch.nn.MSELoss(reduction='sum')
learning_rate = 1e-4
for t in range(5000):
y_pred = model((u_block, i_block, t_block, m_block))
loss = loss_fn(y_pred, y_block)
if t % 100 == 99:
print(t, loss.item())
model.zero_grad()
loss.backward()
with torch.no_grad():
for param in model.parameters():
param -= learning_rate * param.grad
TypeError
---> param -= learning_rate * param.grad
TypeError: unsupported operand type(s) for *: 'float' and 'NoneType'
I've set the inputs to requires_grad=True, I think the issue is that out is not a leaf and therefore has no gradient, but I don't know how to fix this.
Edit:
The data u_block, i_block, t_block, m_block, y_block are shown below. u_block, i_block, and t_block are one-hot vectors.
u_block: tensor([[1., 0., 0., ..., 0., 0., 0.],
[1., 0., 0., ..., 0., 0., 0.],
[1., 0., 0., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 0., 0., 1.],
[0., 0., 0., ..., 0., 0., 1.],
[0., 0., 0., ..., 0., 0., 1.]], requires_grad=True)
i_block: tensor([[1., 0., 0.],
[1., 0., 0.],
[1., 0., 0.],
...,
[0., 1., 0.],
[0., 1., 0.],
[0., 1., 0.]], requires_grad=True)
t_block: tensor([[1., 0., 0., ..., 0., 0., 0.],
[0., 1., 0., ..., 0., 0., 0.],
[0., 0., 1., ..., 0., 0., 0.],
...,
[0., 0., 0., ..., 1., 0., 0.],
[0., 0., 0., ..., 0., 1., 0.],
[0., 0., 0., ..., 0., 0., 1.]], requires_grad=True)
m_block: tensor([[ 0.0335],
[ 0.0000],
[ 0.0000],
...,
[ 0.1515],
[-0.2261],
[-0.0402]], requires_grad=True)
y_block: tensor([[ 0.0000],
[ 0.0000],
[ 0.0000],
...,
[-0.2261],
[-0.0402],
[-0.1318]], requires_grad=True)```
Make following changes. You are not using self.output so I have commented. This makes gradient None as you are not using it in forward pass and the layer has requires_grad=True by default.
class Net(torch.nn.Module):
def __init__(self, D_u, D_i, D_t, D_m):
super(Net, self).__init__()
self.lin_u = nn.Linear(D_u, 1)
self.lin_i = nn.Linear(D_i, 1)
self.lin_t = nn.Linear(D_t, 1)
self.lin_m = nn.Linear(D_m, 1)
# self.output = nn.Linear(4, 1)
def forward(self, args):
(u, i, t, m) = args
u = F.relu(self.lin_u(u))
i = F.relu(self.lin_i(i))
t = F.relu(self.lin_t(t))
m = F.relu(self.lin_m(m))
out = torch.mul(u, i)
out = torch.mul(out, t)
out = torch.mul(out, m)
return out
I hope this will solve your problem.
Also, I have some suggestions,
change the name args to some other name or if you want to use it then use it in its full glory by changing to *args.
for input don't put requires_grad argument. as it will compute d_Loss/ d_input. (Do this only if its not your intention)

Why do 'loss.backward()' and 'weight.grad' return a tensor containing all zeros?

When I run 'loss.backward()' and 'weight.grad' I get a tensor containing all zeros. Also, 'weight.grad_fn' retruns NONE.
However, it all seems to return the correct result for the second layer 'w2'.
If I play with simple operations such as x*2 or x**2 'backward()' and '.grad' return correct results
Here's my code:
import torch
from torch import nn
import torch.nn.functional as F
from torchvision import datasets, transforms
# Getting MNIST data
num_workers = 0
batch_size = 64
transform = transforms.ToTensor()
train_data = datasets.MNIST(root='data', train=True, download=True, transform=transform)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers)
dataiter = iter(train_loader)
images, labels = dataiter.next()
#####################################
#####################################
#### NN Part
def activation(x):
return 1/(1+torch.exp(-x))
inputs = torch.from_numpy(images.view())
# Flatten the inputs format from (64,1,28,28) into (64,784)
inputs = inputs.reshape(images.shape[0], int(images.shape[1]*images.shape[2]*images.shape[3]))
w1 = torch.randn(784, 256, requires_grad=True)# n_input, n_hidden
b1 = torch.randn(256)# n_hidden
w2 = torch.randn(256, 10, requires_grad=True)# n_hidden, n_output
b2 = torch.randn(10)# n_output
h = activation(torch.mm(inputs, w1) + b1)
y = torch.mm(h, w2) + b2
#print(h)
#print(y)
y.sum().backward()
print(w1.grad)
print(w1.grad_fn)
#print(w2.grad)
#print(w2.grad_fn)
By the way it gives me the same problem if I try to run it this way also:
images = images.reshape(images.shape[0], -1)
model = nn.Sequential(nn.Linear(784, 128),
nn.ReLU(),
nn.Linear(128, 64),
nn.ReLU(),
nn.Linear(64, 10),
nn.LogSoftmax(dim=1))
logits = model(images)
criterion = nn.NLLLoss()
loss = criterion(logits, labels)
print(loss)
print(loss.grad_fn)
print('Before backward pass: ', model[0].weight.grad)
loss.backward()
print('After: ', model[0].weight.grad)
#print('After: ', model[2].weight.grad)
#print('After: ', model[4].weight.grad)
The gradients of w1 are not all zero, there are simply a lot of zeros, especially around the border, because the MNIST images have a lot of black pixels (zeros). When multiplying with zero, the resulting gradients are also zero.
By printing w1.grad you only see a very small part of the values (borders), and you just can't see the non-zero values.
w1.grad
# => tensor([[0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.],
# ...,
# [0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.]])
# Indices of non-zero elements
w1.grad.nonzero()
# => tensor([[ 71, 0],
# [ 71, 1],
# [ 71, 2],
# ...,
# [746, 253],
# [746, 254],
# [746, 255]])

How to get probability of each class instead of one hot encoded array with one value 1 and others 0?

My Sequential CNN model is trained on 39 classes as a multi-class classifier. As for predictions, it returns a one-hot encoded array like [0,0,...1,0,...] whereas I want something like [0.012,0.022,0.067,...,0.997,0.0004,...]
Is there a way to get this? if not what exactly should I make to get these?
The reason I want it this way is to verify how close are other classes, so if one says 0.98 and others say 0.96 then I am doing something wrong, data isn't enough, etc..
Thank you :)
My model is basically a keras.model resnet50 with following configs :
model = keras.applications.resnet.ResNet50(include_top=False, weights=None, input_tensor=None, input_shape=(64,64,1), pooling='avg', classes=39)
x = model.output
x = Dropout(0.7)(x)
num_classes = 39
predictions = Dense(num_classes, activation= 'softmax')(x)
model = Model(inputs = model.input, outputs = predictions)
optimizer = keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False)
model.compile(optimizer, loss='categorical_crossentropy', metrics=['categorical_accuracy'], loss_weights=None, sample_weight_mode=None, weighted_metrics=None, target_tensors=None)
Sample input :
import cv2
img = cv2.imread(IMAGE_PATH, 0)
img = cv2.resize(img, (64,64))
img = np.reshape(img, (1,64,64,1))
predicted_class_indices = np.argmax(model.predict(img, verbose = 1))
Sample output:
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
Desired output (numbers are hypothetical):
array([[0.022, 0.353, 0.0535, 0.52, 0212., 0.822, 0.532, 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0.]], dtype=float32)
One way to do so is to remove the last activation layer (related issue).
You can do so by using model.layers[-1].activation=None.
However, the softmax shouldn't output a one-hot vector but the prob distribution, you might want to check how your training is doing.

How to do one line flag embed in PyTorch (not nn.Embedding)?

With a generator I create the random batch like:
import torch
n = 10
batch_size = 2
x = torch.zeros((batch_size, n), dtype=torch.float)
in_flags = torch.randint(n, (batch_size,), dtype=torch.long)
for idx, row in enumerate(x):
row[in_flags[idx]] = 1.0
But the disadvantage of that is that loop runs in Python.
That is the original meaning of embedding (do not confuse that with PyTorch nn.embedding). Is it possible to do with one PyTorch operator to make it be executed native or in GPU?
You can do like this:
import torch
n = 10
batch_size = 2
in_flags = torch.randint(n, (batch_size,), dtype=torch.long)
x = torch.zeros((batch_size, n), dtype=torch.float)
# this is how you can do this
x[torch.arange(batch_size), in_flags] = 1.0
print(in_flags)
print(x)
Output:
tensor([8, 0])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])

Getting embedding matrix of all zeros after performing word embedding on any input data

I am trying to do word embeddings in Keras. I am using 'glove.6B.50d.txt' for the purpose. I am able to get correct output till the preparation of embedding index from the "glove.6B.50d.txt" file.
But I'm always getting embedding matrix full of zeros whenever I map the word from the input provided by me to that in the embedding index.
Here is the code:
#here is the example sentence given as input
line="The quick brown fox jumped over the lazy dog"
line=line.split(" ")
#this is my embedding file
EMBEDDING_FILE='glove.6B.50d.txt'
embed_size = 10 # how big is each word vector
max_features = 10000 # how many unique words to use (i.e num rows in embedding vector)
maxlen = 10 # max number of words in a comment to use
tokenizer = Tokenizer(num_words=max_features,split=" ",char_level=False)
tokenizer.fit_on_texts(list(line))
list_tokenized_train = tokenizer.texts_to_sequences(line)
sequences = tokenizer.texts_to_sequences(line)
word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
X_t = pad_sequences(list_tokenized_train, maxlen=maxlen)
print(sequences)
print(word_index)
print('Shape of data tensor:', X_t.shape)
#got correct output here as
# Found 8 unique tokens.
#[[1], [2], [3], [4], [5], [6], [1], [7], [8]]
#{'the': 1, 'quick': 2, 'brown': 3, 'fox': 4, 'jumped': 5, 'over': 6, 'lazy': 7, 'dog': 8}
# Shape of data tensor: (9, 10)
#loading the embedding file to prepare embedding index matrix
embeddings_index = {}
for i in open(EMBEDDING_FILE, "rb"):
values = i.split()
word = values[0]
#print(word)
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
print('Found %s word vectors.' % len(embeddings_index))
#Found 400000 word vectors.
#making the embedding matrix
embedding_matrix = np.zeros((len(word_index) + 1, embed_size))
for word, i in word_index.items():
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
# words not found in embedding index will be all-zeros.
embedding_matrix[i] = embedding_vector
Here when I print the embedding matrix ,I get all zeros in it (i.e not a single word in input is recognized).
array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
Also if I print the embeddings_index.get(word) for each iteration, it is unable to fetch the word and returns NONE.
Where am I going wrong in the code?
The embed size should be 50 not 10 (it indicates the dimensionality of the word embedding )
The number of features should >>50 (make it close to 10,000). Restricting it to 50 means a whole lot of the vectors will be missing
Got the problem solved today.
Seems like embeddings_index.get(word) was unable to get the word because of some encoding issues.
I changed for i in open(EMBEDDING_FILE, "rb"): present in the preparation of embedding matrix to for i in open(EMBEDDING_FILE, 'r', encoding='utf-8'):
and this solved the problem.

Resources