Semantic Segmentation runtime error at loss function - pytorch

I am using a costume model for segmentation (SETRModel). The model output shape is (nBatch, 256, 256) and the code below confirms it (note that the channel is squeezed out). The target shape is the same (It’s a PILMask).
When I start training, I get a runtime error (see below) related to the loss function. What am I doing wrong?
```
size = 480
half= (256, 256)
splitter = FuncSplitter(lambda o: Path(o).parent.name == 'validation')
dblock = DataBlock(blocks=(ImageBlock, MaskBlock(codes)),
get_items=get_relevant_images,
splitter=splitter,
get_y=get_mask,
item_tfms=Resize((size,size)),
batch_tfms=[*aug_transforms(size=half), Normalize.from_stats(*imagenet_stats)])
dls = dblock.dataloaders(path/'images', bs=4)
model = SETRModel(patch_size=(32, 32),
in_channels=3,
out_channels=1,
hidden_size=1024,
num_hidden_layers=8,
num_attention_heads=16,
decode_features=[512, 256, 128, 64])
# Create a Learner using a custom model
loss = nn.BCEWithLogitsLoss()
learn = Learner(dls, model, loss_func=loss, lr=1.0e-4, cbs=callbacks, metrics=[Dice()])
# Let's test and make sure the loss function is happy with its inputs
learn.eval()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
t1 = torch.rand(4, 3, 256, 256).to(device)
print("input: " + str(t1.shape))
pred = learn.model(t1).to(device)
print("output: " + str(pred.shape))
# prints this:
# input: torch.Size([4, 3, 256, 256])
# output: torch.Size([4, 256, 256])
target = next(iter(learn.dls.train))[1]
target = target.type(torch.float32).to(device)
target.size(), pred.size()
# prints this:
# (torch.Size([4, 256, 256]), torch.Size([4, 256, 256]))
loss(pred, target)
# prints this:
# TensorMask(0.6844, device='cuda:0', grad_fn=<AliasBackward>)
# so, the loss function is happy with its inputs
learn.fine_tune(50)
# prints this:
# ---------------------------------------------------------------------------
# RuntimeError Traceback (most recent call last)
# <ipython-input-114-0e514c73651a> in <module>()
# ----> 1 learn.fine_tune(50)
# 19 frames
# /usr/local/lib/python3.7/dist-packages/torch/nn/functional.py in binary_cross_entropy_with_logits(input, target, weight, size_average, reduce, reduction, pos_weight)
# 2827 pixel_shuffle = _add_docstr(torch.pixel_shuffle, r"""
# 2828 Rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)` to a
# -> 2829 tensor of shape :math:`(*, C, H \times r, W \times r)`.
# 2830
# 2831 See :class:`~torch.nn.PixelShuffle` for details.
# RuntimeError: result type Float can't be cast to the desired output type Long

This is something that happens when you use PyTorch inside fastai (I believe this should be fixed).
Just create custom loss_func. For example:
def loss_func(output, target): return CrossEntropyLossFlat()(out, targ.long())
and pass it when creating the DataBlock:
dblock = DataBlock(... , loss_func=loss_func, ...)

Related

TypeError: Inputs to a layer should be tensors. Got: None for U-net

Error:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-41-da7e85621955> in <module>
1 # Call the helper function for defining the layers for the model, given the input image size
----> 2 unet = UNetCompiled(input_size=(128,128,3), n_filters=32, n_classes=3)
3 frames
/usr/local/lib/python3.7/dist-packages/keras/engine/input_spec.py in assert_input_compatibility(input_spec, inputs, layer_name)
195 # have a `shape` attribute.
196 if not hasattr(x, 'shape'):
--> 197 raise TypeError(f'Inputs to a layer should be tensors. Got: {x}')
198
199 if len(inputs) != len(input_spec):
TypeError: Inputs to a layer should be tensors. Got: None
Code:
def UNetCompiled(input_size=(128, 128, 3), n_filters=32, n_classes=3):
"""
Combine both encoder and decoder blocks according to the U-Net research paper
Return the model as output
"""
# Input size represent the size of 1 image (the size used for pre-processing)
inputs = Input(input_size)
# Encoder includes multiple convolutional mini blocks with different maxpooling, dropout and filter parameters
# Observe that the filters are increasing as we go deeper into the network which will increase the # channels of the image
cblock1 = EncoderMiniBlock(inputs, n_filters,dropout_prob=0, max_pooling=True)
cblock2 = EncoderMiniBlock(cblock1[0],n_filters*2,dropout_prob=0, max_pooling=True)
cblock3 = EncoderMiniBlock(cblock2[0], n_filters*4,dropout_prob=0, max_pooling=True)
cblock4 = EncoderMiniBlock(cblock3[0], n_filters*8,dropout_prob=0.3, max_pooling=True)
cblock5 = EncoderMiniBlock(cblock4[0], n_filters*16, dropout_prob=0.3, max_pooling=False)
# Decoder includes multiple mini blocks with decreasing number of filters
# Observe the skip connections from the encoder are given as input to the decoder
# Recall the 2nd output of encoder block was skip connection, hence cblockn[1] is used
ublock6 = DecoderMiniBlock(cblock5[0], cblock4[1], n_filters * 8)
ublock7 = DecoderMiniBlock(ublock6, cblock3[1], n_filters * 4)
ublock8 = DecoderMiniBlock(ublock7, cblock2[1], n_filters * 2)
ublock9 = DecoderMiniBlock(ublock8, cblock1[1], n_filters)
# Complete the model with 1 3x3 convolution layer (Same as the prev Conv Layers)
# Followed by a 1x1 Conv layer to get the image to the desired size.
# Observe the number of channels will be equal to number of output classes
conv9 = Conv2D(n_filters,
3,
activation='relu',
padding='same',
kernel_initializer='he_normal')(ublock9)
conv10 = Conv2D(n_classes, 1, padding='same')(conv9)
# Define the model
model = tf.keras.Model(inputs=inputs, outputs=conv10)
return model
Function called:
# Call the helper function for defining the layers for the model, given the input image size
unet = UNetCompiled(input_size=(128,128,3), n_filters=32, n_classes=3)
I am trying to implement an image segmentation model using U-Net. Masked images are in png format and 4 channeled and original images are in jpg format.
Define the desired shape
target_shape_img = [128, 128, 3] target_shape_mask = [128, 128,1]

LSTM Autoencoder set-up for multiple features using Pytorch

I am building an LSTM autoencoder to denoise signals and will take more than 1 feature as it's input.
I have setup the model Encoder part as follows which works for single feature inputs (i.e. sequences with just one feature):
class Encoder(nn.Module):
def __init__(self, seq_len, n_features, num_layers=1, embedding_dim=64):
super(Encoder, self).__init__()
self.seq_len = seq_len
self.n_features = n_features
self.num_layers = num_layers
self.embedding_dim = embedding_dim
self.hidden_dim = 2 * embedding_dim
# input: batch_size, seq_len, features
self.lstm1 = nn.LSTM(
input_size=self.n_features,
hidden_size=self.hidden_dim,
num_layers=self.num_layers,
batch_first=True
) # output: batch size, seq_len, hidden_dim
# input: batch_size, seq_len, hidden_dim
self.lstm2 = nn.LSTM(
input_size=self.hidden_dim,
hidden_size = self.embedding_dim,
num_layers = self.num_layers,
batch_first=True
) # output: batch_size, seq_len, embedding_dim
def forward(self, x):
print(x)
x = x.reshape((1, self.seq_len, self.n_features))
print(x.shape)
x, (_, _) = self.lstm1(x)
print(x.shape)
x, (hidden_n, _) = self.lstm2(x)
print(x.shape, hidden_n.shape)
print(hidden_n)
return hidden_n.reshape((self.n_features, self.embedding_dim))
When I test this setup as follows:
model = Encoder(1024, 1)
model.forward(torch.randn(1024, 1))
with the 1 representing a single feature all is well. However, when I do the following (where 2 represents a sequence of 2 features):
model = Encoder(1024, 2)
model.forward(torch.randn(1024, 2))
I get the following error:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
Input In [296], in <cell line: 1>()
----> 1 model.forward(torch.randn(1024, 2))
Input In [294], in Encoder.forward(self, x)
36 print(hidden_n)
37 # print(hidden_n.reshape((self.n_features, self.embedding_dim)).shape)
---> 39 return hidden_n.reshape((self.n_features, self.embedding_dim))
RuntimeError: shape '[2, 64]' is invalid for input of size 64
The hidden_n shape comes out as torch.Size([1, 1, 64]). I would like to understand that if we have more than 1 feature, e.g. 2, do we want to get that shape into the format of 1, 2, 64 such that the hidden state has weights for both features?
Can someone please explain why reshape is not liking the way I'm trying to restructure the output of the Encoder and how I should do it such that the model is able to take any feature size into account.
What am I missing/ perhaps misunderstanding here?

Found input variables with inconsistent numbers of samples: [2, 8382]

my method has 2 input data model
network
branches:
49The first branch is composed of an embedding followed by simple Multi-layer
Perceptron (MLP) designed to handle input of the product description.
The second branch is a CNN to operate over the product image data.
These branches are then be concatenated together to form the final.
The problem is when we try to split the data with train_test_split by Cross Validation, It given as this error.
ValueError: Found input variables with inconsistent numbers of samples: [2, 8382]
MLP and CNN
def create_mlp(dim, regress=False):
# define our MLP network
model = Sequential()
model.add(Dense(8, input_dim=dim, activation="relu"))
model.add(Dense(4, activation="relu"))
# check to see if the regression node should be added
if regress:
model.add(Dense(1, activation="linear"))
# return our model
return model
def create_cnn(width, height, depth, filters=(64, 32, 16), regress=False):
# initialize the input shape and channel dimension, assuming
# TensorFlow/channels-last ordering
inputShape = (height, width, depth)
chanDim = -1
# define the model input
inputs = Input(shape=inputShape)
# loop over the number of filters
for (i, f) in enumerate(filters):
# if this is the first CONV layer then set the input
# appropriately
if i == 0:
x = inputs
# CONV => RELU => BN => POOL
x = Conv2D(f, (3, 3), padding="same")(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# flatten the volume, then FC => RELU => BN => DROPOUT
x = Flatten()(x)
x = Dense(16)(x)
x = Activation("relu")(x)
x = BatchNormalization(axis=chanDim)(x)
x = Dropout(0.5)(x)
# apply another FC layer, this one to match the number of nodes
# coming out of the MLP
x = Dense(4)(x)
x = Activation("relu")(x)
# check to see if the regression node should be added
if regress:
x = Dense(1, activation="linear")(x)
# construct the CNN
model = Model(inputs, x)
# return the CNN
return model
mlp = create_mlp(trainEmbedX.shape[1], regress=False)
cnn = create_cnn(64, 64, 3, regress=False)
combinedInput = concatenate([mlp.output, cnn.output])
x = Dense(4, activation="relu")(combinedInput)
x = Dense(1, activation="sigmoid")(x)
model = Model(inputs=[mlp.input, cnn.input], outputs=x)
model.compile(loss="binary_crossentropy", metrics=['accuracy'], optimizer="adam") # binary_crossentropy
The error occurs here
n_folds=3
epochs=3
batch_size=128
#save the model history in a list after fitting so that we can plot later
model_history = []
for i in range(n_folds):
print("Training on Fold: ",i+1)
t_x, val_x, t_y, val_y = train_test_split([trainEmbedX,trainImagesX], trainY, test_size = 0.2, random_state = np.random.randint(1,1000, 1)[0])
model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size))
print("======="*12, end="\n\n\n")
Training on Fold: 1
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-84-651638774259> in <module>
7 for i in range(n_folds):
8 print("Training on Fold: ",i+1)
----> 9 t_x, val_x, t_y, val_y = train_test_split([trainEmbedX,trainImagesX], trainY, test_size = 0.2, random_state = np.random.randint(1,1000, 1)[0])
10 model_history.append(fit_and_evaluate(t_x, val_x, t_y, val_y, epochs, batch_size))
11 print("======="*12, end="\n\n\n")
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/model_selection/_split.py in train_test_split(*arrays, **options)
2182 test_size = 0.25
2183
-> 2184 arrays = indexable(*arrays)
2185
2186 if shuffle is False:
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/utils/validation.py in indexable(*iterables)
258 else:
259 result.append(np.array(X))
--> 260 check_consistent_length(*result)
261 return result
262
~/anaconda3/envs/baron/lib/python3.6/site-packages/sklearn/utils/validation.py in check_consistent_length(*arrays)
233 if len(uniques) > 1:
234 raise ValueError("Found input variables with inconsistent numbers of"
--> 235 " samples: %r" % [int(l) for l in lengths])
236
237
ValueError: Found input variables with inconsistent numbers of samples: [2, 8382]
This error happens with mismatching dimensions of X and Y in train_test_split.
By looking at your snippet, you try to concatenate two arrays by [trainEmbedX,trainImagesX] which will add a dimension if the original arrays trainEmbedX and trainImagesX are not 1D, hence you have the shape [2, 8382] in the error.
So instead of [trainEmbedX,trainImagesX], I suggest to use np.concatenate to merge these two arrays by np.concatenate((trainEmbedX,trainImagesX),axis=1).

size mismatch, m1: [3584 x 28], m2: [784 x 128] at /pytorch/aten/src/TH/generic/THTensorMath.cpp:940

I have executed the following code and getting the error shown at extreme bottom. I would like to know how to resolve this. thanks
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torchvision import transforms
_tasks = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
from torchvision.datasets import MNIST
mnist = MNIST("data", download=True, train=True, transform=_tasks)
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
create training and validation split
split = int(0.8 * len(mnist))
index_list = list(range(len(mnist)))
train_idx, valid_idx = index_list[:split], index_list[split:]
create sampler objects using SubsetRandomSampler
tr_sampler = SubsetRandomSampler(train_idx)
val_sampler = SubsetRandomSampler(valid_idx)
create iterator objects for train and valid datasets
trainloader = DataLoader(mnist, batch_size=256, sampler=tr_sampler)
validloader = DataLoader(mnist, batch_size=256, sampler=val_sampler)
Creating model for execution
class Model(nn.Module):
def init(self):
super().init()
self.hidden = nn.Linear(784, 128)
self.output = nn.Linear(128, 10)
def forward(self, x):
x = self.hidden(x)
x = F.sigmoid(x)
x = self.output(x)
return x
model = Model()
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, weight_decay= 1e-6, momentum = 0.9, nesterov = True)
for epoch in range(1, 11): ## run the model for 10 epochs
train_loss, valid_loss = [], []
#training part
model.train()
for data, target in trainloader:
optimizer.zero_grad()
#1. forward propagation
output = model(data)
#2. loss calculation
loss = loss_function(output, target)
#3. backward propagation
loss.backward()
#4. weight optimization
optimizer.step()
train_loss.append(loss.item())
# evaluation part
model.eval()
for data, target in validloader:
output = model(data)
loss = loss_function(output, target)
valid_loss.append(loss.item())
Executing this I am getting the following error :
RuntimeError Traceback (most recent call last) in ()
----> 1 output = model(data) 2 3 ## 2. loss calculation 4 loss = loss_function(output, target) 5
/usr/local/lib/python3.6/dist-packages/torch/nn/modules/module.py in
call(self, *input, **kwargs) 487 result = self._slow_forward(*input,
**kwargs)
/usr/local/lib/python3.6/dist-packages/torch/nn/functional.py in
linear(input, weight, bias) 1352 ret =
torch.addmm(torch.jit._unwrap_optional(bias), input, weight.t()) 1353
else:
-> 1354 output = input.matmul(weight.t()) 1355 if bias is not None: 1356 output += torch.jit._unwrap_optional(bias)
RuntimeError: size mismatch, m1: [3584 x 28], m2: [784 x 128] at
/pytorch/aten/src/TH/generic/THTensorMath.cpp:940
Your input MNIST data has shape [256, 1, 28, 28] corresponding to [B, C, H, W]. You need to flatten the input images into a single 784 long vector before feeding it to the Linear layer Linear(784, 128) such that the input becomes [256, 784] corresponding to [B, N], where N is 1x28x28, your image size. This can be done as follows:
for data, target in trainloader:
# Flatten MNIST images into a 784 long vector
data = data.view(data.shape[0], -1)
optimizer.zero_grad()
...
The same is needed to be done in the validation loop.

Tensor Flow queue not closing. Problems with tf.train.start_queue_runners(sess)

While running a test CNN I'm always getting this error when trying to close the session with sess.close(), or requesting toe coordinator to stop and collect the threads. Apparently, the session is trying to close while there is still threads running. I just can't find a way to stop this to happens. Or if there is better/correct way to use queues and threads in tensor flow...
Thanks in advance!
There is always a bunch of:
2017-10-24 15:48:02.625448: W C:\tf_jenkins\home\workspace\rel-win\M\windows-gpu\PY\36\tensorflow\core\kernels\queue_base.cc:295] _20_input_p
roducer/input_producer: Skipping cancelled enqueue attempt with queue not closed
Followed by:
ERROR:tensorflow:Exception in QueueRunner: Enqueue operation was cancelled
[[Node: batch/fifo_queue_enqueue = QueueEnqueueV2[Tcomponents=[DT_FLOAT, DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica:0
/task:0/cpu:0"](batch/fifo_queue, Squeeze_1/_13, input_producer_1/Gather_1/_15)]]
Traceback (most recent call last):
File "<stdin>", line 30, in <module>
ERROR:tensorflow:Exception in QueueRunner: Enqueue operation was cancelled
[[Node: batch_1/fifo_queue_enqueue = QueueEnqueueV2[Tcomponents=[DT_FLOAT, DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica
:0/task:0/cpu:0"](batch_1/fifo_queue, Squeeze/_37, input_producer/Gather_1/_39)]]
ERROR:tensorflow:Exception in QueueRunner: Enqueue operation was cancelled
[[Node: batch_1/fifo_queue_enqueue = QueueEnqueueV2[Tcomponents=[DT_FLOAT, DT_FLOAT], timeout_ms=-1, _device="/job:localhost/replica
:0/task:0/cpu:0"](batch_1/fifo_queue, Squeeze/_37, input_producer/Gather_1/_39)]]
Exception in thread Thread-53:
Traceback (most recent call last):
File "C:\Program Files\Anaconda3\lib\threading.py", line 916, in _bootstrap_inner
self.run()
File "C:\Program Files\Anaconda3\lib\threading.py", line 864, in run
self._target(*self._args, **self._kwargs)
File "C:\Program Files\Anaconda3\lib\site-packages\tensorflow\python\training\queue_runner_impl.py", line 238, in _run
enqueue_callable()
File "C:\Program Files\Anaconda3\lib\site-packages\tensorflow\python\client\session.py", line 1235, in _single_operation_run
target_list_as_strings, status, None)
File "C:\Program Files\Anaconda3\lib\contextlib.py", line 89, in __exit__
next(self.gen)
File "C:\Program Files\Anaconda3\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 466, in raise_exception_on_not_ok_stat
us
Below is the code that wrote based on examples from tf manual and GitHub:
"""My general framework to construct a tensor flow data set of images for regression.
The genetal idea is to: create a list of image names (i.e the path to each image).
The image list must have also labels. In the case of a regression this can be multiple variables.
"""
import csv
import os
import sys
import plotly as py
import plotly.graph_objs as go
import math
import numpy as np
import tensorflow as tf
#First neet to get image paths and their respective labels
chn = 1
im_h = 424
im_w = 511
#resize image
size = 0.1
#size of test set
p = 0.25
def imtensors(im_path, chn, im_h, im_w, size):
im_h = int(im_h*size)
im_w = int(im_w*size)
ima_tensors = tf.read_file(im_path)
ima_tensors = tf.image.decode_png(ima_tensors, channels=chn)
ima_tensors = tf.image.resize_images(ima_tensors, [im_h, im_w])
return ima_tensors
dbname = 'simpRDB.csv'
imagepaths, y = list(), list()
#read the csv as a dictionary
with open(dbname, newline='') as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
imagepaths.append(row['path'])
y.append(float(row['w']))
n = len(y)
ntest = int(n*p)
#remember that in py the index starts at 0 and x[a:d] -> a,b,c
impath_test = imagepaths[0:ntest]
y_test = y[0:ntest]
impath_train = imagepaths[ntest+1:n]
y_train = y[ntest+1:n]
#now convert to tensors
impath_test = tf.convert_to_tensor(impath_test, dtype=tf.string)
y_test = tf.convert_to_tensor(y_test, dtype=tf.float32)
im_test, y_test = tf.train.slice_input_producer([impath_test, y_test])
im_test = imtensors(im_test, chn, im_h, im_w, size)
impath_train = tf.convert_to_tensor(impath_train, dtype=tf.string)
y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
im_train, y_train = tf.train.slice_input_producer([impath_train, y_train])
im_train = imtensors(im_train, chn, im_h, im_w, size)
##################################################
# -----------------------------------------------
# This is a classic CNN with some spice.
# The basic change isat the output node. Instead of
# use a softmax or other multiclass we a re using a
# a fully regressor estimator to the last layer of
# nodes.
# -----------------------------------------------
# Parameters
learning_rate = 0.001
num_steps = 10000
b_size = 8
display_step = 100
# Network Parameters
dropout = 0.3 # rate to drop input
#create batched train set
#Use small batchs because CPU/GPU can run out of memory
X, Y = tf.train.batch([im_train, y_train], batch_size=b_size, capacity=b_size*4, num_threads=4,
allow_smaller_final_batch=True)
X_test, Y_test = tf.train.batch([im_test, y_test], batch_size=b_size, capacity=b_size*4,
num_threads=4, allow_smaller_final_batch=True)
#First lets define the weights and bias in a more sistematic fashion.
#The weights are going to be initialized as random weights wit values near zero. This is a good
#practice for neuralnets in general
inp_h = int(im_h*size)
inp_w = int(im_w*size)
#placeholders for model cheking
x = tf.placeholder(tf.float32, shape=[None, inp_h, inp_w, 1])
y_ = tf.placeholder(tf.float32, shape=[None, 1])
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
#The bias neurons are normaly initialized slightly positive for a ReLU activation function in order
# to prevent "dead neurons"
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
#Now convolution layers: They are going to have size one with zero pad.
# The arguent strides deifne the size of the window. So the output is of the same size.
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
#And the polling are going to be 2x2 blocks side by side.
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
#Define weights and biases
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
#after 2 max_poling 2x2 the 'image'size is reduce by 4. Need to use ceil (intrinsics of TF)
h_val = math.ceil(math.ceil(inp_h/2)/2)
w_val = math.ceil(math.ceil(inp_w/2)/2)
W_fc1 = weight_variable([h_val * w_val * 64, 1024])
b_fc1 = bias_variable([1024])
W_d1 = weight_variable([1024, 100])
b_d1 = bias_variable([100])
w_out = weight_variable([100, 1])
b_out = bias_variable([1])
# Create model
def conv_net(x, dropout, reuse, is_training):
# Define a scope for reusing the variables
with tf.variable_scope('ConvNet', reuse=reuse):
#no need for dropout if evaluating the model
# Convolution Layer with 32 filters and a kernel size of
conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
# Max Pooling (down-sampling) with strides of 2 and kernel size of 2
pool1 = max_pool_2x2(conv1)
#In order to build a deep network, we stack several layers of this type.
# The second layer will have 64 features for each 5x5 patch.
conv2 = tf.nn.relu(conv2d(pool1, W_conv2) + b_conv2)
# Max Pooling (down-sampling) with strides of 2 and kernel size of 2
pool2 = max_pool_2x2(conv2)
#DENSELY CONNECTED LAYER
#Now that the image size has been reduced to nxm, we add a fully-connected layer
# with 1024(32X32) neurons to allow processing on the entire image.
# Flatten the data to a 1-D vector for the fully connected layer
fc1 = tf.contrib.layers.flatten(pool2)
# Fully connected layer
fc1_flat = tf.nn.relu(tf.matmul(fc1, W_fc1) + b_fc1)
# Apply Dropout (if is_training is False, dropout is not applied)
fc1_drop = tf.layers.dropout(fc1_flat, rate=dropout, training=is_training)
# dense layer with size reduction for input to final prediction layer
d1 = tf.nn.relu(tf.matmul(fc1_drop, W_d1) + b_d1)
# Only one output aka regression
out = tf.matmul(d1, w_out)+b_out
return out
#Create graph for trainig and a graph for prediction sharing the same weights
out_train = conv_net(X, dropout, reuse=False, is_training=True)
#no drop out at evaluation
out_test = conv_net(X_test, dropout, reuse=True, is_training=False)
# Define loss and optimizer.
#reduce absulute sum of squares
loss_op = tf.reduce_mean(tf.abs(out_train - Y))
loss_ts = tf.reduce_mean(tf.abs(out_test - Y_test))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)
#Save the loss for training and testing for plot latter
losses_op = []
losses_ts = []
steps = []
# Initialize the variables (i.e. assign their default value)
init = tf.global_variables_initializer()
# Saver object
saver = tf.train.Saver()
##################
# Start training
with tf.Session() as sess:
# Run the initializer
sess.run(init)
# Start the data queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess)
# Training cycle
try:
for step in range(1, num_steps+1):
if step % display_step == 0:
# Run optimization and calculate batch loss and accuracy
_, loss, loss2 = sess.run([train_op, loss_op, loss_ts])
print("Step " + str(step) + ", Minibatch Loss = " + \
"{:.4f}".format(loss) + ", Loss Testbatch = " + "{:.4f}".format(loss2))
steps.append(step)
losses_op.append(loss)
losses_ts.append(loss2)
else:
# Only run the optimization op (backprop)
sess.run(train_op)
print("Optimization Finished!")
except Exception as e:
coord.request_stop(e)
finally:
#Something about it is safer to call twice
coord.request_stop()
coord.join(threads)
saver.save(sess, model_path)
#plota of loss over steps
p_loss1 = go.Scatter(
x=steps,
y=losses_op,
mode='lines',
name='Loss training minibatch'
)
p_loss2 = go.Scatter(
x=steps,
y=losses_ts,
mode='lines',
name='Loss evaluation minibatch'
)
data = [p_loss1, p_loss2]
# Save your model
#Loss of final model on the test set
#no dropout for the test set
m_name = 'bw_model_1'
save_path = os.path.join(os.getcwd(), 'bw_models')
if not os.path.exists(save_path):
os.mkdir(save_path)
model_path = os.path.join(save_path, m_name)
#save a graph of the loss over time
py.offline.plot(data, filename=model_path)
#end
Sorry about my dumbness! lol
Found a big error, the problem was that I was creating the coordinator but not calling it inside the queue runner. It was like this:
# Start the data queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess)
And it should be this:
# Start the data queue
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess, coord)
Now the code is stopping the threads and collecting it at the end!

Resources