I have a PublicTest function that runs every epoch for validation and there is a transform test variable that transforms the validation data as above:
transform_test = transforms.Compose([
transforms.TenCrop(cut_size),
transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])),
])
def PublicTest(epoch):
global PublicTest_acc
global best_PublicTest_acc
global best_PublicTest_acc_epoch
net.eval()
PublicTest_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(PublicTestloader):
bs, ncrops, c, h, w = np.shape(inputs)
inputs = inputs.view(-1, c, h, w)
if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = Variable(inputs, volatile=True), Variable(targets)
outputs = net(inputs)
outputs_avg = outputs.view(bs, ncrops, -1).mean(1) # avg over crops
loss = criterion(outputs_avg, targets)
PublicTest_loss += loss.item()
_, predicted = torch.max(outputs_avg.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
utils.progress_bar(batch_idx, len(PublicTestloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (PublicTest_loss / (batch_idx + 1), 100. * correct / total, correct, total))
I'd like to modify the code so instead of Tencrop each image would be validated once. I changed the transform_test like this
transform_test = transforms.Compose([transforms.ToTensor()])
I run the code but had the realized that PublicTest have ncrops as a parameter and received the error:
File "mainpro_FER.py", line 147, in PublicTest
bs, ncrops, c, h, w = np.shape(inputs)
ValueError: not enough values to unpack (expected 5, got 4)
removed ncrops parameter in the function, tried again had this error:
IndexError: Dimension out of range (expected to be in range of [-1, 0], but got 1)
So I need a way to make this function work for single image for every images in batch unlike TenCrop.
Thanks.
Nevermind guys, I figured out just like this:
def PublicTest(epoch):
global PublicTest_acc
global best_PublicTest_acc
global best_PublicTest_acc_epoch
net.eval()
PublicTest_loss = 0
correct = 0
total = 0
for batch_idx, (inputs, targets) in enumerate(PublicTestloader):
bs, c, h, w = np.shape(inputs)
inputs = inputs.view(-1, c, h, w)
if use_cuda:
inputs, targets = inputs.cuda(), targets.cuda()
inputs, targets = Variable(inputs, volatile=True), Variable(targets)
outputs = net(inputs)
# outputs_avg = outputs.view(bs, ncrops, -1).mean(1) # avg over crops
loss = criterion(outputs, targets)
PublicTest_loss += loss.item()
_, predicted = torch.max(outputs.data, 1)
total += targets.size(0)
correct += predicted.eq(targets.data).cpu().sum()
utils.progress_bar(batch_idx, len(PublicTestloader), 'Loss: %.3f | Acc: %.3f%% (%d/%d)'
% (PublicTest_loss / (batch_idx + 1), 100. * correct / total, correct, total))
Related
I would like to create a custom loss that does not directly use the output of my network. Indeed, I need to create a loss that returns the difference between the result of a function f(x) (where x is the output of my network) and max(f(x)). Unfortunately my code doesn't work and I don't know how to proceed... Here is my code:
def forward(self, x, y, hidden):
c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size))
y = torch.reshape(y, (y.shape[0], 1, 1))
tmp = torch.cat((x, y), 2)
output, (hn, cn) = self.lstm(tmp, (hidden, c_0))
out = self.fc(output)
return out, hn
def _train(self):
num_epochs = 10
num_iteration = 10
save_loss_global = []
save_loss_epoch = []
for epoch in range(num_epochs):
print("NOUVELLE EPOCH")
X_train, Y_train = donneesAleatoires()
self.maxRes = 0
self.hidden = Variable(torch.zeros(self.num_layers, 1, self.hidden_size))
tabY = torch.Tensor()
tabY = torch.cat((tabY, Y_train), 1)
for iteration in range(num_iteration):
x_i = X_train[0]
x_i = torch.reshape(x_i, (x_i.shape[0], 1, x_i.shape[1]))
y_i = Y_train[0]
outputs, self.hidden = self(x_i, y_i, self.hidden)
YiPlus1 = self.function(outputs.detach().numpy().reshape(1, -1))
self.optimizer.zero_grad()
Yadd = Variable(torch.Tensor(YiPlus1))
tabY = torch.cat((tabY, Yadd), 1)
loss = self.my_loss(tabY, iteration)
if YiPlus1 > self.maxRes:
self.maxRes = YiPlus1
if y_i.detach().numpy() > self.maxRes:
self.maxRes = y_i.detach().numpy()
#loss = Variable(loss, requires_grad=True)
loss.backward(retain_graph=True)
X_train = outputs
Y_train = YiPlus1
Y_train = Variable(torch.Tensor(Y_train))
self.optimizer.step()
save_loss_global.append(loss.item())
if iteration == num_iteration -1:
save_loss_epoch.append(loss.item())
print(X_train)
def my_loss(self, target, epoch):
if isinstance(target, np.ndarray):
target = Variable(torch.Tensor(target))
tmp = self.maxRes
loss = target[0][0] - tmp
if epoch > 0:
for i in range(1, epoch + 1):
loss = loss + (target[0][i] - tmp)
loss = -loss
return loss / (epoch+1)
To calculate gradients based on loss, toolchain needs computation graph. Said graph is builded implicitly on forward pass, but to do so, all computations must use toolchain's tensors (no .numpy()s!) with preserved gradients (no .detach()s!). Try to rewrite your code accordingly, don't wory about doing computations outside forward, it is normal.
You can check your tensors are computed right way, printing them, should look like
print( myTensor )
tensor([[-2.9016, -2.8739, ... ,-2.8929, -2.9033]], grad_fn=<AliasBackward0>)
I implemented a model with batch normalization:
class FFNet(torch.nn.Module):
def __init__(self, D_in, H_1, H_2, D_out):
super(FFNet, self).__init__()
self.linear1 = torch.nn.Linear(D_in, H_1)
self.linear2 = torch.nn.Linear(H_1, H_2)
self.bn2 = torch.nn.BatchNorm1d(H_2)
self.linear4 = torch.nn.Linear(H_2, D_out)
def forward(self, x):
h_relu_1=F.relu(self.linear1(x))
h_relu_2=F.relu(self.bn2(self.linear2(h_relu_1)))
y_pred=self.linear4(h_relu_2)
return y_pred
Also, I wrote the training loop:
for epoch in range(epoches):
running_loss = 0.0
cnt = 0
for i, data in enumerate(train_data, 0):
local_X, local_y = data
y_pred = model.forward(local_X)
loss = criterion(y_pred, local_y)
optimizer.zero_grad()
#loss = criterion(y_pred, Y_local_output)
loss.backward() # back props
optimizer.step()
running_loss = running_loss + loss.item()
cnt+=1
Validation_loss = 0.0
cnt2 = 0
# Validation
for i, data in enumerate(validation_data, 0):
Val_X, Val_Y = data
y_pred = model.forward(Val_X)
loss=criterion(y_pred, Val_Y)
Validation_loss = Validation_loss + loss.item()
cnt2+=1
I have two questions:
1. Is there no need to use model.train() in this code?
2. How to evaluate this model using eval? I have one data sample whose size is (1xD_in), and batch size is more than 1. When I use the below code, there is an error:
test_single = torch.tensor([aa, ab, ac, ad, ae, af, ag])
test_single = test_single.unsqueeze(0)
model.eval()
[bb,cc] = model.forward(test_single)
The error is 'not enough values to unpack (expected 2, got 1)'
If you have batch normalization, then you do need to use model.train() and model.eval() while training and evaluating respectively.
The 2nd part (the unsqueezing code) is not wrong. However, there is only one output of your model (see the return statement of your model's forward function), which causes the error i.e. you try to unpack 2 values whereas there is only one. So, you can't do
[bb,cc] = model.forward(test_single)
You have to do
out = model.forward(test_single)
I tried this and it works.
I am trying to train a simple MLP to approximate y=f(a,b,c).
My code is as below.
import torch
import torch.nn as nn
from torch.autograd import Variable
# hyper parameters
input_size = 3
output_size = 1
num_epochs = 50
learning_rate = 0.001
# Network definition
class FeedForwardNet(nn.Module):
def __init__(self, l1_size, l2_size):
super(FeedForwardNet, self).__init__()
self.fc1 = nn.Linear(input_size, l1_size)
self.relu1 = nn.ReLU()
self.fc2 = nn.Linear(l1_size, l2_size)
self.relu2 = nn.ReLU()
self.fc3 = nn.Linear(l2_size, output_size)
def forward(self, x):
out = self.fc1(x)
out = self.relu1(out)
out = self.fc2(out)
out = self.relu2(out)
out = self.fc3(out)
return out
model = FeedForwardNet(5 , 3)
# sgd optimizer
optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=0.9)
for epoch in range(11):
print ('Epoch ', epoch)
for i in range(trainX_light.shape[0]):
X = Variable( torch.from_numpy(trainX_light[i]).view(-1, 3) )
Y = Variable( torch.from_numpy(trainY_light[i]).view(-1, 1) )
# forward
optimizer.zero_grad()
output = model(X)
loss = (Y - output).pow(2).sum()
print (output.data[0,0])
loss.backward()
optimizer.step()
totalnorm = 0
for p in model.parameters():
modulenorm = p.grad.data.norm()
totalnorm += modulenorm ** 2
totalnorm = math.sqrt(totalnorm)
print (totalnorm)
# validation code
if (epoch + 1) % 5 == 0:
print (' test points',testX_light.shape[0])
total_loss = 0
for t in range(testX_light.shape[0]):
X = Variable( torch.from_numpy(testX_light[t]).view(-1, 3) )
Y = Variable( torch.from_numpy(testY_light[t]).view(-1, 1) )
output = model(X)
loss = (Y - output).pow(2).sum()
print (output.data[0,0])
total_loss += loss
print ('epoch ', epoch, 'avg_loss ', total_loss.data[0] / testX_light.shape[0])
print ('Done')
The problem that I have now is, the validation code
output = model(X)
is always producing an exact same output value (I guess this value is some sort of garbage). I am not sure what mistake I am doing in this part. Could some help me figure out the mistake in my code?
The reason that network produced random values (and inf later) was the exploding gradient problem. Clipping the gradient (torch.nn.utils.clip_grad_norm(model.parameters(), 0.1)) helped.
I took the dynamic RNN example from aymericdamian: https://github.com/aymericdamien/TensorFlow-Examples/blob/master/examples/3_NeuralNetworks/dynamic_rnn.py
and modified it a little to fit my data. The data is a list of 7500 data sets of 60 entries.
There are 5 labels as output data.
The code runs perfect and I get an accuracy of 75%.
Now I want to feed the model with a data set and get a predicted label back, but I get the following error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'Placeholder_2' with dtype int32
The code is listed below and the two last lines is where I want to get the prediction back.
What am I doing wrong?
# ==========
# MODEL
# ==========
# Parameters
learning_rate = 0.01
training_iters = 1000000
batch_size = 128
display_step = 10
# Network Parameters
seq_max_len = 60 # Sequence max length
n_hidden = 64 # hidden layer num of features
n_classes = 5 # large rise, small rise, almost equal, small drop, large drop
trainset = ToySequenceData(n_samples=7500, max_seq_len=seq_max_len)
testset = copy.copy(trainset)
# take 50% of total data to use for training
trainpart = int(0.2 * trainset.data.__len__())
pred_data = testset.data[testset.data.__len__() - 2:testset.labels.__len__() - 1][:]
pred_label = testset.labels[testset.labels.__len__() - 1:][:]
trainset.data = trainset.data[:trainpart][:]
testset.data = testset.data[trainpart:testset.data.__len__() - 2][:]
trainset.labels = trainset.labels[:trainpart][:]
testset.labels = testset.labels[trainpart:testset.labels.__len__() - 2][:]
trainset.seqlen = trainset.seqlen[:trainpart][:]
testset.seqlen = testset.seqlen[trainpart:testset.seqlen.__len__() - 2]
# tf Graph input
x = tf.placeholder("float", [None, seq_max_len, 1])
y = tf.placeholder("float", [None, n_classes])
# A placeholder for indicating each sequence length
seqlen = tf.placeholder(tf.int32, [None])
# Define weights
weights = {
'out': tf.Variable(tf.random_normal([n_hidden, n_classes]))
}
biases = {
'out': tf.Variable(tf.random_normal([n_classes]))
}
def dynamic_rnn(x, seqlen, weights, biases):
# Prepare data shape to match `rnn` function requirements
# Current data input shape: (batch_size, n_steps, n_input)
# Required shape: 'n_steps' tensors list of shape (batch_size, n_input)
# Permuting batch_size and n_steps
x = tf.transpose(x, [1, 0, 2])
# Reshaping to (n_steps*batch_size, n_input)
x = tf.reshape(x, [-1, 1])
# Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
x = tf.split(0, seq_max_len, x)
# Define a lstm cell with tensorflow
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(n_hidden)
# Get lstm cell output, providing 'sequence_length' will perform dynamic
# calculation.
outputs, states = tf.nn.rnn(lstm_cell, x, dtype=tf.float32,
sequence_length=seqlen)
# When performing dynamic calculation, we must retrieve the last
# dynamically computed output, i.e., if a sequence length is 10, we need
# to retrieve the 10th output.
# However TensorFlow doesn't support advanced indexing yet, so we build
# a custom op that for each sample in batch size, get its length and
# get the corresponding relevant output.
# 'outputs' is a list of output at every timestep, we pack them in a Tensor
# and change back dimension to [batch_size, n_step, n_input]
outputs = tf.pack(outputs)
outputs = tf.transpose(outputs, [1, 0, 2])
# Hack to build the indexing and retrieve the right output.
batch_size = tf.shape(outputs)[0]
# Start indices for each sample
index = tf.range(0, batch_size) * seq_max_len + (seqlen - 1)
# Indexing
outputs = tf.gather(tf.reshape(outputs, [-1, n_hidden]), index)
# Linear activation, using outputs computed above
return tf.matmul(outputs, weights['out']) + biases['out']
pred = dynamic_rnn(x, seqlen, weights, biases)
# Define loss and optimizer
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(cost)
# Evaluate model
correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
# Initializing the variables
init = tf.initialize_all_variables()
# Launch the graph
with tf.Session() as sess:
sess.run(init)
step = 1
# Keep training until reach max iterations
while step * batch_size < training_iters:
batch_x, batch_y, batch_seqlen = trainset.next(batch_size)
# Run optimization op (backprop)
sess.run(optimizer, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
if step % display_step == 0:
# Calculate batch accuracy
acc = sess.run(accuracy, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
# Calculate batch loss
loss = sess.run(cost, feed_dict={x: batch_x, y: batch_y,
seqlen: batch_seqlen})
print("Iter " + str(step*batch_size) + ", Minibatch Loss= " +
"{:.6f}".format(loss) + ", Training Accuracy= " +
"{:.5f}".format(acc))
step += 1
print("Optimization Finished!")
# Calculate accuracy
test_data = testset.data
test_label = testset.labels
test_seqlen = testset.seqlen
print("Testing Accuracy:",
sess.run(accuracy, feed_dict={x: test_data, y: test_label,
seqlen: test_seqlen}))
print(pred.eval(feed_dict={x: pred_data}))
print(pred_label)
In TensorFlow, when you do not provide a name to tf.placeholder, it assumes the default name "Placeholder". The next placeholder created is named "Placeholder_1" and the third one is called "Placeholder_2".
This is done to uniquely identify each placeholder. Now in your last line, you are trying to get the value of pred.eval(). Looking at your dynamic_rnn code, it seems like you need a value in the seq_len placeholder, which is the third placeholder defined (that's why "Placeholder_2". Simply add the following key-value to your feed_dict,
print(pred.eval(feed_dict={x: pred_data, seqlen: pred_seqlen}))
Of course, you will need to define pred_seqlen properly like you defined the other two seq_len variables.
I am trying to use a Tensorflow DNN for a Kaggle Competion. The data is about 100 columns of categorical data, 29 columns of numerical data, and 1 column for the output. What I did was I split it into training and testing with X and y using Scikit's train test split function, where X is a list of each rows without the "id" or the value that needs to be predicted, and y is the value that is needed to be predicted. I then built the model, shown below:
import tensorflow as tf
import numpy as np
import time
import pickle
with open('pickle.pickle', 'rb') as f:
trainX, trainy, testX, testy = pickle.load(f)
trainX = np.array(trainX)
trainy = np.array(trainy)
trainy = trainy.reshape(trainy.shape[0], 1)
testX = np.array(testX)
testy = np.array(testy)
print (trainX.shape)
print (trainy.shape)
testX = testX.reshape(testX.shape[0], 130)
testy = testy.reshape(testy.shape[0], 1)
print (testX.shape)
print (testy.shape)
n_nodes_hl1 = 256
n_nodes_hl2 = 256
n_nodes_hl3 = 256
n_classes = 1
batch_size = 100
# Matrix = h X w
X = tf.placeholder('float', [None, len(trainX[0])])
y = tf.placeholder('float')
def model(data):
hidden_1_layer = {'weights':tf.Variable(tf.random_normal([trainX.shape[1], n_nodes_hl1])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl1]))}
hidden_2_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl1, n_nodes_hl2])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl2]))}
hidden_3_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl2, n_nodes_hl3])),
'biases':tf.Variable(tf.random_normal([n_nodes_hl3]))}
output_layer = {'weights':tf.Variable(tf.random_normal([n_nodes_hl3, n_classes])),
'biases':tf.Variable(tf.random_normal([n_classes]))}
# (input_data * weights) + biases
l1 = tf.add(tf.matmul(data, hidden_1_layer['weights']), hidden_1_layer['biases'])
l1 = tf.nn.sigmoid(l1)
l2 = tf.add(tf.matmul(l1, hidden_2_layer['weights']), hidden_2_layer['biases'])
l2 = tf.nn.sigmoid(l2)
l3 = tf.add(tf.matmul(l2, hidden_3_layer['weights']), hidden_3_layer['biases'])
l3 = tf.nn.sigmoid(l3)
output = tf.matmul(l3, output_layer['weights']) + output_layer['biases']
return output
def train(x):
pred = model(x)
#loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(pred, y))
loss = tf.reduce_mean(tf.square(pred - y))
optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
epochs = 1
with tf.Session() as sess:
sess.run(tf.initialize_all_variables())
print ('Beginning Training \n')
for e in range(epochs):
timeS = time.time()
epoch_loss = 0
i = 0
while i < len(trainX):
start = i
end = i + batch_size
batch_x = np.array(trainX[start:end])
batch_y = np.array(trainy[start:end])
_, c = sess.run([optimizer, loss], feed_dict = {x: batch_x, y: batch_y})
epoch_loss += c
i += batch_size
done = time.time() - timeS
print ('Epoch', e + 1, 'completed out of', epochs, 'loss:', epoch_loss, "\nTime:", done, 'seconds\n')
correct = tf.equal(tf.arg_max(pred, 1), tf.arg_max(y, 1))
acc = tf.reduce_mean(tf.cast(correct, 'float'))
print("Accuracy:", acc.eval({x:testX, y:testy}))
train(X)
Output for 1 epoch:
Epoch 1 completed out of 1 loss: 1498498282.5
Time: 1.3765859603881836 seconds
Accuracy: 1.0
I do realize that the loss is very high, and I am using 1 epoch just for testing purposes, and yes, I know my code is quite messy. But all I want to do is print out a prediction. How would I do that? I know that I need to feed a list of features for X, but I just don't understand how to do it. I also don't quite understand why my accuracy is at 1.0, so if you have any suggestions for that, or any ways to change my code, I would be more that happy to listen to any ideas.
Thanks in advance
To get a prediction you just have to evaluate pred, which is the operation that defines the output of the model.
How to do it? With pred.eval(). But you need an input to evalaute its prediction, so you have to provide a feed_dict dictionary to eval() with the sample (or samples) you want to process.
The resulting code looks like:
predictions = pred.eval(feed_dict = {x:testX})
Notice how this is very similar to acc.eval({x:testX, y:testy}), because the idea is the same. You have an operation (acc in this case) which needs some input to be evaluated, and you can evaluate it either by calling acc.eval() or sess.run(acc) with the corresponding feed_dict with the necessary inputs.
The simplest way would be to use the existing session while training (between iterations):
print (sess.run(model, {x:X_example}))
where X_example is some numpy example tensor.
The below line will give you probability scores for every class for example is you 3 classes then the below line will give you a array of shape of 1x3
Considering you want prediction of a single data point X_test you can do the following:
output = sess.run(pred, {x:X_test})
the maximum number in the above variable output will be you prediction so for that we will modify the above statement :
output = sess.run(tf.argmax(pred, 1), {x:X_test})
print("your prediction for X_test is :", output[0])
Other thing you can do is :
output = sess.run(pred, {x:X_test})
output = np.argmax(output)
print("your prediction for X_test is :", output)