Pytorch: loss is not changing - pytorch

I created a neural network in PyTorch. My loss function is a weighted negative log-likelihood. The weights are determined by the output of my neural network and must be fixed. It means the weights depend on the output of the neural network but must be fixed so the network only calculates the gradient of log part and not the weights. Here is my code:
import torch
import torch.nn as nn
def extended_cumsum(x):
return torch.cat([torch.zeros(x.size()[1:3]).unsqueeze(0), torch.cumsum(x, 0)], 0)
def is_positive(x):
x_sign = torch.sign(x)
return torch.where(x_sign < 0, torch.zeros_like(x_sign), x_sign)
def coupling_transform(x1, x2, nn_output, k):
nn_output_normalized = torch.softmax(nn_output, 0)
bins_weights = torch.ones_like(nn_output_normalized)/k
knots_xs = extended_cumsum(bins_weights)
prev_bins = is_positive(x2.unsqueeze(0) - knots_xs)
current_bins = prev_bins[:-1,:,:] - prev_bins[1:,:,:]
q_sum = torch.sum(prev_bins[1:,:,:]*nn_output_normalized, 0)
q_current = torch.sum(current_bins*nn_output_normalized, 0)
w_sum = torch.sum(prev_bins[1:,:,:]*bins_weights, 0)
c_values = q_sum + k*(x2 - w_sum)*q_current
log_det = torch.log(torch.prod(k*q_current, 1))
return x1, c_values, log_det
def flipping_dims(n, d1, d2):
dims = []
for i in range(n):
if i%2 == 0:
dims.append(d1)
else:
dims.append(d2)
return dims
class Linear(nn.Module):
def __init__(self, d1, d2, k, hidden):
super().__init__()
self.d1, self.d2, self.k = d1, d2, k
self.net = nn.Sequential(nn.Linear(d1, hidden),
nn.ReLU(),
nn.Linear(hidden, hidden),
nn.ReLU(),
nn.Linear(hidden, hidden),
nn.ReLU(),
nn.Linear(hidden, k*d2))
def forward(self, x, log_prob, flip=False):
x1, x2 = x[:, :self.d1], x[:, self.d1:]
if flip:
x1, x2 = x2, x1
z1, z2, log_det = coupling_transform(x1, x2, torch.reshape(self.net(x1), (self.k, -1, self.d2)), self.k)
if flip:
z1, z2 = z2, z1
z = torch.cat([z1, z2], 1)
return z, log_prob - log_det
class stacked_Linear(nn.Module):
def __init__(self, d1, d2, k, hidden, n):
super().__init__()
self.layers = nn.ModuleList([
Linear(_d1, _d2, k, hidden=hidden) for _, _d1, _d2 in zip(range(n), flipping_dims(n, d1, d2), flipping_dims(n, d1, d2)[::-1])
])
self.flips = [True if i%2 else False for i in range(n)]
def forward(self, x, log_prev_prob):
for layer, f in zip(self.layers, self.flips):
x, log_prob = layer(x, log_prev_prob, flip=f)
log_prev_prob = log_prob
return x, log_prob
def f(x):
return torch.prod(torch.exp(x), 1)
def my_loss(weights, log_prob):
loss = -torch.mean(weights*log_prob)
return loss
model = stacked_Linear(3, 3, 32, 16, 4)
optim = torch.optim.Adam(model.parameters(), lr=1e-4)
losses = []
for _ in range(100):
x = torch.rand(1000, 6)
optim.zero_grad()
z, log_prob = model(x, torch.zeros(1000))
f_values = f(z)
weights = f_values/torch.exp(log_prob)
loss = my_loss(weights, log_prob)
losses.append(loss)
loss.backward()
But the loss value doesn't decrease and it doesn't change if I fix x:
losses = []
x = torch.rand(1000, 6)
for _ in range(100):
optim.zero_grad()
z, log_prob = model(x, torch.zeros(1000))
f_values = f(z)
weights = f_values/torch.exp(log_prob)
loss = my_loss(weights, log_prob)
losses.append(loss)
loss.backward()
[tensor(-0.1160, grad_fn=<NegBackward>),
tensor(-0.1160, grad_fn=<NegBackward>),
tensor(-0.1160, grad_fn=<NegBackward>),
tensor(-0.1160, grad_fn=<NegBackward>),
tensor(-0.1160, grad_fn=<NegBackward>), ...]

Related

ViVIT PyTorch: RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15

I am trying to run Video Vision Transformer (ViViT) code with my dataset but getting an error using CrossEntropyLoss from Pytorch as the Loss function.
There are 6 classes I have:
['Run', 'Sit', 'Walk', 'Wave', 'Sit', 'Stand']
Optimizer
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=1e-9, momentum=0.9)
Class Weights
tensor([0.0045, 0.0042, 0.0048, 0.0038, 0.0070, 0.0065])
Loss Function
loss_func = nn.CrossEntropyLoss(weight=class_weights.to(device))
Code Throwning Error
train_epoch(model, optimizer, train_loader, train_loss_history, loss_func)
Error
RuntimeError: multi-target not supported at /pytorch/aten/src/THCUNN/generic/ClassNLLCriterion.cu:15
Code Calling the transformer
model = ViViT(224, 16, 100, 16).cuda()
Getting Video Frames
def get_frames(filename, n_frames=1):
frames = []
v_cap = cv2.VideoCapture(filename)
v_len = int(v_cap.get(cv2.CAP_PROP_FRAME_COUNT))
frame_list = np.linspace(0, v_len - 1, n_frames + 1, dtype=np.int16)
frame_dims = np.array([224, 224, 3])
for fn in range(v_len):
success, frame = v_cap.read()
if success is False:
continue
if (fn in frame_list):
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
frame = cv2.resize(frame, (frame_dims[0], frame_dims[1]))
frames.append(frame)
v_cap.release()
return frames, v_len
Dataset Preprocessing
class DatasetProcessing(data.Dataset):
def __init__(self, df, root_dir):
super(DatasetProcessing, self).__init__()
# List of all videos path
video_list = df["Video"].apply(lambda x: root_dir + '/' + x)
self.video_list = np.asarray(video_list)
self.df = df
def __getitem__(self, index):
# Ensure that the raw videos are in respective folders and folder name matches the output class label
video_label = self.video_list[index].split('/')[-2]
video_name = self.video_list[index].split('/')[-1]
video_frames, len_ = get_frames(self.video_list[index], n_frames = 15)
video_frames = np.asarray(video_frames)
video_frames = video_frames/255
class_list = ['Run', 'Walk', 'Wave', 'Sit', 'Turn', 'Stand']
class_id_loc = np.where(class_list == video_label)
label = class_id_loc
d = torch.as_tensor(np.array(video_frames).astype('float'))
l = torch.as_tensor(np.array(label).astype('float'))
return (d, l)
def __len__(self):
return self.video_list.shape[0]
Training Epochs
def train_epoch(model, optimizer, data_loader, loss_history, loss_func):
total_samples = len(data_loader.dataset)
model.train()
for i, (data, target) in enumerate(data_loader):
optimizer.zero_grad()
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
pred = model(data.float())
output = F.log_softmax(pred, dim=1)
loss = loss_func(output, target.squeeze(1))
loss.backward()
optimizer.step()
if i % 100 == 0:
print('[' + '{:5}'.format(i * len(data)) + '/' + '{:5}'.format(total_samples) +
' (' + '{:3.0f}'.format(100 * i / len(data_loader)) + '%)] Loss: ' +
'{:6.4f}'.format(loss.item()))
loss_history.append(loss.item())
Evaluate Model
def evaluate(model, data_loader, loss_history, loss_func):
model.eval()
total_samples = len(data_loader.dataset)
correct_samples = 0
total_loss = 0
with torch.no_grad():
for data, target in data_loader:
x = data.cuda()
data = rearrange(x, 'b p h w c -> b p c h w').cuda()
target = target.type(torch.LongTensor).cuda()
output = F.log_softmax(model(data.float()), dim=1)
loss = loss_func(output, target)
_, pred = torch.max(output, dim=1)
total_loss += loss.item()
correct_samples += pred.eq(target).sum()
avg_loss = total_loss / total_samples
loss_history.append(avg_loss)
print('\nAverage test loss: ' + '{:.4f}'.format(avg_loss) +
' Accuracy:' + '{:5}'.format(correct_samples) + '/' +
'{:5}'.format(total_samples) + ' (' +
'{:4.2f}'.format(100.0 * correct_samples / total_samples) + '%)\n')
Transformer
class Transformer(nn.Module):
def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
super().__init__()
self.layers = nn.ModuleList([])
self.norm = nn.LayerNorm(dim)
for _ in range(depth):
self.layers.append(nn.ModuleList([
PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
]))
def forward(self, x):
for attn, ff in self.layers:
x = attn(x) + x
x = ff(x) + x
return self.norm(x)
ViViT Code
class ViViT(nn.Module):
def __init__(self, image_size, patch_size, num_classes, num_frames, dim = 192, depth = 4, heads = 3, pool = 'cls', in_channels = 3, dim_head = 64, dropout = 0.,
emb_dropout = 0., scale_dim = 4, ):
super().__init__()
assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'
assert image_size % patch_size == 0, 'Image dimensions must be divisible by the patch size.'
num_patches = (image_size // patch_size) ** 2
patch_dim = in_channels * patch_size ** 2
self.to_patch_embedding = nn.Sequential(
Rearrange('b t c (h p1) (w p2) -> b t (h w) (p1 p2 c)', p1 = patch_size, p2 = patch_size),
nn.Linear(patch_dim, dim),
)
self.pos_embedding = nn.Parameter(torch.randn(1, num_frames, num_patches + 1, dim))
self.space_token = nn.Parameter(torch.randn(1, 1, dim))
self.space_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.temporal_token = nn.Parameter(torch.randn(1, 1, dim))
self.temporal_transformer = Transformer(dim, depth, heads, dim_head, dim*scale_dim, dropout)
self.dropout = nn.Dropout(emb_dropout)
self.pool = pool
self.mlp_head = nn.Sequential(
nn.LayerNorm(dim),
nn.Linear(dim, num_classes)
)
def forward(self, x):
x = self.to_patch_embedding(x)
b, t, n, _ = x.shape
cls_space_tokens = repeat(self.space_token, '() n d -> b t n d', b = b, t=t)
x = torch.cat((cls_space_tokens, x), dim=2)
x += self.pos_embedding[:, :, :(n + 1)]
x = self.dropout(x)
x = rearrange(x, 'b t n d -> (b t) n d')
x = self.space_transformer(x)
x = rearrange(x[:, 0], '(b t) ... -> b t ...', b=b)
cls_temporal_tokens = repeat(self.temporal_token, '() n d -> b n d', b=b)
x = torch.cat((cls_temporal_tokens, x), dim=1)
x = self.temporal_transformer(x)
x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]
return self.mlp_head(x)
Multi target appears to be a feature supported since version 1.10.0.
https://discuss.pytorch.org/t/crossentropyloss-vs-per-class-probabilities-target/138331
Please check your pytorch version.
Please refer to the example of using the UTF101 top5 dataset, which is available on my Colab. The version of pytorch is 1.12.0+cu113, and the code you listed was able to run the training almost exactly as it was written.

PyTorch: GRU, one-to-many / many-to-one

I would like to implement a GRU able to encode a sequence of vectors to one vector (many-to-one), and then another GRU able to decode a vector to a sequence of vector (one-to-many). The size of the vectors wouldn't be changed. I would like to have an opinion about what I implemented.
Here is the code:
class AEGRU(nn.Module):
def __init__(self, opt):
super(AEGRU, self).__init__()
self.length = 256
self.latent_space = 256
self.num_layers = 1
self.GRU_enc = nn.GRU(input_size=3, hidden_size=self.latent_space, num_layers=self.num_layers, batch_first=True)
self.fc_enc = nn.Linear(self.latent_space, self.latent_space)
self.GRU_dec = nn.GRU(input_size=self.latent_space, hidden_size=3, num_layers=self.num_layers, batch_first=True)
self.fc_dec = nn.Linear(3, 3)
def enc(self, x):
# x has shape: Batch_size x self.length x 3
h0 = torch.zeros(self.num_layers, x.shape[0], self.latent_space).cuda()
out, _ = self.GRU_enc(x, h0)
out = out[:, -1, :]
out = self.fc_enc(out)
return out
def dec(self, x):
# x has shape: Batch_size x self.latent_space
x = x[:, None, :]
h = torch.zeros(self.num_layers, x.shape[0], 3).cuda()
# method 1 ??
'''outputs = torch.zeros(x.shape[0], self.length, 3).cuda()
for i in range(self.length):
out, h = self.GRU_dec(x, h)
outputs[:, i, :] = out[:, 0, :]'''
# method 2 ??
x = x.repeat(1, self.length, 1)
outputs, _ = self.GRU_dec(x, h)
# linear layer
outputs = self.fc_dec(outputs)
return outputs
def forward(self, x):
self.indices = []
latent = self.enc(x)
output = self.dec(latent)
return output
I am not sure whether this is the good way to do a one-to-many GRU. Could I have some opinions about this?
Thanks for reading!

Numpy implementation for regression using NN

I am implementing my own Neural Network model for regression using only NumPy, and I'm getting really weird results when I'm testing my model on m > 1 samples (for m=1 it works fine).. It seems like the model collapses and predicts only specific values for the whole batch:
Input:
X [[ 7.62316802 -6.12433912]
[ 1.11048966 4.97509421]]
Expected Output:
Y [[16.47952332 12.50288412]]
Model Output
y_hat [[10.42446234 10.42446234]]
Any idea what might cause this issue?
My code:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# np.seterr(all=None, divide=None, over=None, under=None, invalid=None)
data_x = np.random.uniform(0, 10, size=(2, 1))
data_y = (2 * data_x).sum(axis=0, keepdims=True)
# data_y = data_x[0, :] ** 2 + data_x[1, :] ** 2
# data_y = data_y.reshape((1, -1))
# # fig = plt.figure()
# # ax = fig.add_subplot(111, projection='3d')
# # ax.scatter(data_x[0, :], data_x[1, :], data_y)
# # plt.show()
memory = dict()
nn_architecture = [
{"input_dim": 2, "output_dim": 6, "activation": "sigmoid", "bias": True},
{"input_dim": 6, "output_dim": 4, "activation": "sigmoid", "bias": True},
{"input_dim": 4, "output_dim": 1, "activation": "relu", "bias": True}
]
def init_network_parameters(nn_architecture):
parameters = []
for idx, layer in enumerate(nn_architecture):
layer_params = {}
input_dim, output_dim, activation, bias = layer.values()
W = np.random.uniform(0, 1, (output_dim, input_dim))
B = np.zeros((output_dim, 1))
if bias:
B = np.ones((output_dim, 1))
activation_func = identity
backward_activation_func = identity_backward
if activation is 'sigmoid':
activation_func = sigmoid
backward_activation_func = sigmoid_backward
elif activation is 'relu':
activation_func = relu
backward_activation_func = relu_backward
else:
print(f"Activation function set to identity for layer {idx}")
layer_params[f"W"] = W
layer_params[f"B"] = B
layer_params[f"activation"] = activation_func
layer_params[f"backward_activation"] = backward_activation_func
layer_params[f"bias"] = bias
parameters.append(layer_params)
return parameters
def identity(z):
return z
def sigmoid(z):
return np.clip(1 / (1 + np.exp(-z)), -100, 100)
def relu(z):
output = np.array(z, copy=True)
output[z <= 0] = 0
return output
def identity_backward(z, dA):
return dA
def sigmoid_backward(z, dA):
return np.clip(z * (1-z) * dA, -100, 100)
def relu_backward(z, dA):
output = np.ones(z.shape)
output[z <= 0] = 0
return output * dA
def forward_single_layer(prev_A, parameters, idx):
W = parameters[f"W"]
B = parameters[f"B"]
activation = parameters[f"activation"]
if parameters["bias"]:
curr_Z = W.dot(prev_A) + B
else:
curr_Z = W.dot(prev_A)
curr_A = activation(curr_Z)
memory[f"Z{idx+1}"] = curr_Z
memory[f"A{idx+1}"] = curr_A
return curr_Z, curr_A
def forward(X, parameters):
prev_A = X
memory["A0"] = prev_A
for idx, layer_params in enumerate(parameters):
curr_Z, prev_A = forward_single_layer(prev_A=prev_A, parameters=layer_params, idx=idx)
return prev_A
def criteria(y_hat, y):
assert y_hat.shape == y.shape
n = y_hat.shape[0]
m = y_hat.shape[1]
loss = np.sum(y_hat - y, axis=1) / m
dA = (y_hat - y) / m
return loss, dA
def backward_single_layer(prev_A, dA, curr_W, curr_Z, backward_activation, idx):
m = prev_A.shape[1]
dZ = backward_activation(z=curr_Z, dA=dA)
dW = np.dot(dZ, prev_A.T) / m
dB = np.sum(dZ, axis=1, keepdims=True) / m
dA = np.dot(curr_W.T, dZ)
return dA, dW, dB
def backpropagation(parameters, dA):
grads = {}
for idx in reversed(range(len(parameters))):
layer = parameters[idx]
prev_A = memory[f"A{idx}"]
curr_Z = memory[f"Z{idx+1}"]
curr_W = layer["W"]
backward_activation = layer["backward_activation"]
dA, dW, dB = backward_single_layer(prev_A, dA, curr_W, curr_Z, backward_activation, idx)
grads[f"W{idx}"] = dW
grads[f"B{idx}"] = dB
return grads
def update_params(parameters, grads, lr=0.001):
new_params = []
for idx, layer in enumerate(parameters):
layer["W"] -= lr*grads[f"W{idx}"]
layer["B"] -= lr*grads[f"B{idx}"]
new_params.append(layer)
return new_params
X = np.random.uniform(-10, 10, (2, 2))
Y = 2*X[0, :] + X[1, :] ** 2
Y = Y.reshape((1, X.shape[1]))
parameters = init_network_parameters(nn_architecture)
n_epochs = 1000
lr = 0.01
loss_history = []
for i in range(n_epochs):
y_hat = forward(X, parameters)
loss, dA = criteria(y_hat, Y)
loss_history.append(loss)
grads = backpropagation(parameters, dA)
parameters = update_params(parameters, grads, lr)
if not i % 10:
print(f"Epoch {i}/{n_epochs} loss={loss}")
print("X", X)
print("Y", Y)
print("y_hat", y_hat)
There wasn't a problem with my implementation, just overfitting.
More information can be found here.

Backpropogation with self defined loss function and network

I want to create a CNN network for object localization (It is given that there is only one object). For this I am using some general layers and in the end I want to get the nearest and farthest corner to origin. I am also using self defined loss function which is (100 - intersaction over union in %). Loss is not converging. What may be the problem? Wheather backpropogation will work with this network or some other problem? Below is the code:
For illustraion purpose see .
Network:
class convnet(nn.Module):
def __init__(self):
super(convnet, self).__init__()
self.conv1 = nn.Conv2d(1, 4, kernel_size=5)
self.pool1 = nn.MaxPool2d(kernel_size=3,stride=3)
self.conv2 = nn.Conv2d(4, 8, kernel_size=5)
self.pool2 = nn.MaxPool2d(kernel_size=3,stride=3)
self.conv3 = nn.Conv2d(8, 16, kernel_size=5)
self.pool3 = nn.MaxPool2d(kernel_size=3,stride=3)
self.fc1 = nn.Linear(5040, 1000)
self.fc2 = nn.Linear(1000, 84)
self.fc3 = nn.Linear(84, 4)
def forward(self, x):
x = F.relu(self.conv1(x))
x = self.pool1(x)
x = F.relu(self.conv2(x))
x = self.pool2(x)
x = F.relu(self.conv3(x))
x = self.pool3(x)
x = x.view(-1, 5040)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
a = self.sigmoid(self.fc3(x))
c = torch.zeros(a.shape[0], 2)
for idx, x in enumerate(a):
d1 = x[0] ** 2 + x[1] ** 2
d2 = x[2] ** 2 + x[3] ** 2
d3 = x[0] ** 2 + x[3] ** 2
d4 = x[2] ** 2 + x[1] ** 2
dmin = min(d1, d2, d3, d4)
if d1 == dmin:
c[idx] = torch.tensor([x[0], x[1]])
elif d2 == dmin:
c[idx] = torch.tensor([x[2], x[3]])
elif d3 == dmin:
c[idx] = torch.tensor([x[0], x[3]])
elif d4 == dmin:
c[idx] = torch.tensor([x[2], x[1]])
m = torch.tensor([[640, 480, 640, 480]]).type(torch.DoubleTensor).cuda()
return c*m
def sigmoid(self, z):
return 1/(1+torch.exp(-z))
Loss function:
def iou(box_a, box_b):
A = box_a.size(0)
B = box_b.size(0)
max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
box_b[:, :2].unsqueeze(0).expand(A, B, 2))
inter = torch.clamp((max_xy - min_xy), min=0)
inter =inter[:, :, 0] * inter[:, :, 1]
area_a = ((box_a[:, 2]-box_a[:, 0]) *
(box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)
area_b = ((box_b[:, 2]-box_b[:, 0]) *
(box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)
union = area_a + area_b - inter
return ((inter / union)*100/float(A*A)).sum()
def criterion(output, labels):
return (100-iou(output, labels))
You can check full code here: link

TensorFlow, losses after training the model are different than losses printed during the last Epoch of Stochastic Gradient Descent.

I'm trying to do binary classification on two spirals. For testing, I am feeding my neural network the exact spiral data with no noise, and the model seems to work as the losses near 0 during SGD. However, after using my model to infer the exact same data points after SGD has completed, I get completely different losses than what was printed during the last epoch of SGD.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
np.set_printoptions(threshold=np.nan)
# get the spiral points
t_p = np.linspace(0, 4, 1000)
x1_p = t_p * np.cos(t_p*2*np.pi)
y1_p = t_p * np.sin(t_p*2*np.pi)
x2_p = t_p * np.cos(t_p*2*np.pi + np.pi)
y2_p = t_p * np.sin(t_p*2*np.pi + np.pi)
plt.plot(x1_p, y1_p, x2_p, y2_p)
# generate data points
x1_dat = x1_p
y1_dat = y1_p
x2_dat = x2_p
y2_dat = y2_p
def model_variable(shape, name, initializer):
variable = tf.get_variable(name=name,
dtype=tf.float32,
shape=shape,
initializer=initializer
)
tf.add_to_collection('model_variables', variable)
return variable
class Model():
#layer specifications includes bias nodes
def __init__(self, sess, data, nEpochs, learning_rate, layer_specifications):
self.sess = sess
self.data = data
self.nEpochs = nEpochs
self.learning_rate = learning_rate
if layer_specifications[0] != 2 or layer_specifications[-1] != 1:
raise ValueError('First layer only two nodes, last layer only 1 node')
else:
self.layer_specifications = layer_specifications
self.build_model()
def build_model(self):
# x is the two nodes that will be layer one, will input an x, y coordinate
# and need to classify which spiral is it on, the non phase shifted or the phase
# shifted one.
# y is the output of the model
self.x = tf.placeholder(tf.float32, shape=[2, 1])
self.y = tf.placeholder(tf.float32, shape=[])
self.thetas = []
self.biases = []
for i in range(1, len(self.layer_specifications)):
self.thetas.append(model_variable([self.layer_specifications[i], self.layer_specifications[i-1]], 'theta'+str(i), tf.random_normal_initializer(stddev=0.1)))
self.biases.append(model_variable([self.layer_specifications[i], 1], 'bias'+str(i), tf.constant_initializer()))
#forward propagation
intermediate = self.x
for i in range(0, len(self.layer_specifications)-1):
if i != (len(self.layer_specifications) - 2):
intermediate = tf.nn.elu(tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i]))
else:
intermediate = tf.add(tf.matmul(self.thetas[i], intermediate), self.biases[i])
self.yhat = tf.squeeze(intermediate)
self.loss = tf.nn.sigmoid_cross_entropy_with_logits(self.yhat, self.y);
def train_init(self):
model_variables = tf.get_collection('model_variables')
self.optim = (
tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate)
.minimize(self.loss, var_list=model_variables)
)
self.check = tf.add_check_numerics_ops()
self.sess.run(tf.initialize_all_variables())
# here is where x and y combine to get just x in tf with shape [2, 1] and where label becomes y in tf
def train_iter(self, x, y):
loss, _, _ = sess.run([self.loss, self.optim, self.check],
feed_dict = {self.x: x, self.y: y})
print('loss: {0} on:{1}'.format(loss, x))
# here x and y are still x and y coordinates, label is separate
def train(self):
for _ in range(self.nEpochs):
for x, y, label in self.data():
print(label)
self.train_iter([[x], [y]], label)
print("NEW ONE:\n")
# here x and y are still x and y coordinates, label is separate
def infer(self, x, y, label):
return self.sess.run((tf.sigmoid(self.yhat), self.loss), feed_dict={self.x : [[x], [y]], self.y : label})
def data():
#so first spiral is label 0, second is label 1
for _ in range(len(x1_dat)-1, -1, -1):
for dat in range(2):
if dat == 0:
yield x1_dat[_], y1_dat[_], 0
else:
yield x2_dat[_], y2_dat[_], 1
layer_specifications = [2, 100, 100, 100, 1]
sess = tf.Session()
model = Model(sess, data, nEpochs=10, learning_rate=1.1e-2, layer_specifications=layer_specifications)
model.train_init()
model.train()
inferrences_1 = []
inferrences_2 = []
losses = 0
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x1_p[i], y1_p[i], 0)
if infer >= 0.5:
print('loss: {0} on point {1}, {2}'.format(loss, x1_p[i], y1_p[i]))
losses = losses + 1
inferrences_1.append('r')
else:
inferrences_1.append('g')
for i in range(len(t_p)-1, -1, -1):
infer, loss = model.infer(x2_p[i], y2_p[i], 1)
if infer >= 0.5:
inferrences_2.append('r')
else:
print('loss: {0} on point {1}, {2}'.format(loss, x2_p[i], y2_p[i]))
losses = losses + 1
inferrences_2.append('g')
print('total losses: {}'.format(losses))
plt.scatter(x1_p, y1_p, c=inferrences_1)
plt.scatter(x2_p, y2_p, c=inferrences_2)
plt.show()

Resources