How to find loss function values in unsupervised learning with Autoencoder? - python-3.x

To simplify my story: I was trying to test dimensionality reduction on my UNLABELED data with the encoder method using keras/tensorflow.
So I looked at the internet and found a nice code that might be useful for me. Here's the link: https://github.com/IvanBongiorni/TensorFlow2.0_Notebooks/blob/master/TensorFlow2.0__02.01_Autoencoder_for_Dimensionality_Reduction.ipynb
Although, I'm interested just in the encoder part. So here I added part of that code to mine, but I can't figure out how the code calculates loss function values if I didn't give any targets/labels. I'm new using keras/tensorflow and thought loss function values could only be generated if you give true and predicted labels.
data = np.random.randint(1, 100, 500)
df = pd.DataFrame({'f1':data, 'f2':data**2, 'f3':data*0.33, 'f4':data/20})
scaler = StandardScaler()
scaled_df = scaler.fit_transform(df)
scaled_df = pd.DataFrame(scaled_df, columns=['f1','f2','f3','f4'])
n_input_layer = scaled_df.shape[1]
n_encoding_layer = 1
n_output_layer = n_input_layer
# AUTOENCODER
autoencoder = tf.keras.models.Sequential([
# ENCODER
Dense(n_input_layer, input_shape = (n_input_layer,), activation = 'elu'),
# CENTRAL LAYER
Dense(n_encoding_layer, activation = 'elu', name = 'central_layer'),
# DECODER
Dense(n_output_layer, activation = 'elu')])
n_epochs = 5000
loss = tf.keras.losses.MeanSquaredError()
optimizer = tf.optimizers.Adam(learning_rate = 0.001, decay = 0.0001, clipvalue = 0.5)
loss_history = [] # save loss improvement
for epoch in range(n_epochs):
with tf.GradientTape() as tape:
current_loss = loss(autoencoder(scaled_df.values), scaled_df.values)
gradients = tape.gradient(current_loss, autoencoder.trainable_variables)
optimizer.apply_gradients(zip(gradients, autoencoder.trainable_variables))
loss_history.append(current_loss.numpy()) # save current loss in its history
# show loss improvement every 200 epochs
if (epoch+1) % 200 == 0:
print(str(epoch+1) + '.\tLoss: ' + str(current_loss.numpy()))
Could anyone show me what I am missing? Thanks

Related

How to retrieve attention weight alignment for tokens using transformer (BERT) model?

I am working on textclassification with transformer models (PyTorch, Huggingface, running on GPU).
I have already my model and my training loop and it works fine but to better understand wrong predictions of the model, I want to "dig a little deeper" and look at the attention weights the model gave the tokens of the misclassified predictions (for evaluation during training and later for predictions on the test set).
I tried already so many things and I already found a way the model outputs attention weights (will attach an example), BUT a) it outputs too many attention weights for each misclassified example (can it be because it gives the weights for several layers?) and b) the attention weights are not connected to the tokens. I want it in a more interpretable way, like coloring the tokens in a darker color the more weight is on that token.
This is my code so far (irrelevant snippets left out):
model = AutoModelForSequenceClassification.from_pretrained(checkpoint,
num_labels=len(label_dict),
output_attentions=True,
output_hidden_states=True)
def get_wrong_predictions(predictions, true_vals):
wrong_predictions = []
for i in range(len(true_vals)):
if np.argmax(predictions[i]) != true_vals[i]:
wrong_predictions.append((true_vals[i], np.argmax(predictions[i])))
return wrong_predictions
seed_val = 17
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)
def evaluate(dataloader_val):
model.eval()
loss_val_total = 0
predictions, true_vals = [], []
attentions = []
for batch in dataloader_val:
batch = tuple(b.to(device) for b in batch)
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'labels': batch[2],
}
with torch.no_grad():
outputs = model(**inputs)
loss = outputs[0]
logits = outputs[1]
attention_scores = outputs.attentions[-1].detach().cpu().numpy() # extract the attention scores from the last layer
loss_val_total += loss.item()
logits = logits.detach().cpu().numpy()
label_ids = inputs['labels'].cpu().numpy()
predictions.append(logits)
true_vals.append(label_ids)
attentions.append(attention_scores)
loss_val_avg = loss_val_total/len(dataloader_val)
predictions = np.concatenate(predictions, axis=0)
true_vals = np.concatenate(true_vals, axis=0)
attentions = np.concatenate(attentions, axis=0)
preds = np.argmax(predictions, axis=1)
misclassified = np.where(preds != true_vals)[0]
texts = df["description"].tolist()
global misclassified_examples
misclassified_examples = []
#misclassified = []
for idx in misclassified[:min(len(attention_scores), len(misclassified))]:
text = texts[idx]
true_label = true_vals[idx]
pred_label = preds[idx]
attention_weights = attentions[idx] # get the attention weights for the misclassified examples
misclassified_examples.append({
'text': text,
'true_label': true_label,
'pred_label': pred_label,
'attention_weights' : attention_weights
})
return loss_val_avg, predictions, true_vals
train_losses = [] #to plot later
val_losses = []
overall_accuracy = []
for epoch in tqdm(range(1, epochs+1)):
model.train()
loss_train_total = 0
progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
for batch in progress_bar:
model.zero_grad()
batch = tuple(b.to(device) for b in batch)
inputs = {'input_ids': batch[0],
'attention_mask': batch[1],
'labels': batch[2],
}
outputs = model(**inputs)
loss = outputs[0]
loss_train_total += loss.item()
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optimizer.step()
scheduler.step()
progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
loss_train_avg = loss_train_total/len(dataloader_train) #to plot later
train_losses.append(loss_train_avg) # to plot later
tqdm.write(f'\nEpoch {epoch}')
loss_train_avg = loss_train_total/len(dataloader_train)
tqdm.write(f'Training loss: {loss_train_avg}')
val_loss, predictions, true_vals = evaluate(dataloader_validation)
val_f1 = f1_score_func(predictions, true_vals)
tqdm.write(f'Validation loss: {val_loss}')
tqdm.write(f'F1 Score (Weighted): {val_f1}')
val_overallaccuracy = overallaccuracy(true_vals, predictions)
tqdm.write(f'Overall Accuracy: {val_overallaccuracy}')
val_mcc = mcc_score_func(true_vals, predictions)
tqdm.write(f'MCC: {val_mcc}')
val_acc = accuracy_per_class(predictions, true_vals)
tqdm.write(f'Accuracy per class: {val_acc}')
val_losses.append(val_loss)
overall_accuracy.append(val_overallaccuracy)
The image shows exemplarily what kind of output I get, but that is "too much" attention as I expected one number for one token and then of course some way to relate that number of attention weight back to that token.
[enter image description here](https://i.stack.imgur.com/ICmvp.png)
Does anyone know how I could adjust my code to include attention weight alignment for the tokens? I would be VERY thankful!!

TensorFlow 2.0 GradientTape with EarlyStopping

I am using Python 3.7.5 and TensorFlow 2.0's 'GradientTape' API for classification of MNIST dataset using 300 100 dense fully connected architecture. I would like to use TensorFlow's 'EarlyStopping' with GradientTape() so that the training stops according to the variable being watched or monitored and according to patience parameters.
The code I have is below:
# Use tf.data to batch and shuffle the dataset
train_ds = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(100).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)
# Choose an optimizer and loss function for training-
loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(lr = 0.001)
def create_nn_gradienttape():
"""
Function to create neural network for use
with GradientTape API following MNIST
300 100 architecture
"""
model = Sequential()
model.add(
Dense(
units = 300, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal,
input_shape = (784,)
)
)
model.add(
Dense(
units = 100, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal
)
)
model.add(
Dense(
units = 10, activation = 'softmax'
)
)
return model
# Instantiate the model to be trained using GradientTape-
model = create_nn_gradienttape()
# Select metrics to measure the error & accuracy of model.
# These metrics accumulate the values over epochs and then
# print the overall result-
train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
# Use tf.GradientTape to train the model-
#tf.function
def train_step(data, labels):
"""
Function to perform one step of Gradient
Descent optimization
"""
with tf.GradientTape() as tape:
# 'training=True' is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
# predictions = model(data, training=True)
predictions = model(data)
loss = loss_fn(labels, predictions)
# 'gradients' is a list variable!
gradients = tape.gradient(loss, model.trainable_variables)
# IMPORTANT:
# Multiply mask with computed gradients-
# List to hold element-wise multiplication between-
# computed gradient and masks-
grad_mask_mul = []
# Perform element-wise multiplication between computed gradients and masks-
for grad_layer, mask in zip(gradients, mask_model_stripped.trainable_weights):
grad_mask_mul.append(tf.math.multiply(grad_layer, mask))
# optimizer.apply_gradients(zip(gradients, model.trainable_variables))
optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
#tf.function
def test_step(data, labels):
"""
Function to test model performance
on testing dataset
"""
# training=False is only needed if there are layers with different
# behavior during training versus inference (e.g. Dropout).
predictions = model(data)
t_loss = loss_fn(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 15
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for x, y in train_ds:
train_step(x, y)
for x_t, y_t in test_ds:
test_step(x_t, y_t)
template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'
print(template.format(epoch + 1,
train_loss.result(), train_accuracy.result()*100,
test_loss.result(), test_accuracy.result()*100))
# Count number of non-zero parameters in each layer and in total-
# print("layer-wise manner model, number of nonzero parameters in each layer are: \n")
model_sum_params = 0
for layer in model.trainable_weights:
# print(tf.math.count_nonzero(layer, axis = None).numpy())
model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()
print("Total number of trainable parameters = {0}\n".format(model_sum_params))
In the code above, How can I use 'tf.keras.callbacks.EarlyStopping' with GradientTape() API ?
Thanks!

LSTM Time-Series produces shifted forecast?

I am doing a time-series forecast with a LSTM NN and Keras. As input features there are two variables (precipitation and temperature) and the one target to be predicted is groundwater-level.
It seems to be working quite all right, though there is a serious offset between the actual data and the output (see image).
Now I've read that this is can be a classic sign of the network not working, as it seems to be mimicing the output and
what the model is actually doing is that when predicting the value at
time “t+1”, it simply uses the value at time “t” as its prediction https://towardsdatascience.com/how-not-to-use-machine-learning-for-time-series-forecasting-avoiding-the-pitfalls-19f9d7adf424
However, this is not actually possible in my case, as the target-values are not used as input variable. I am using a multi variate time-series with two features, independent of the output feature.
Also, the predicted values are not offset in future (t+1) but rather seem to lag behind (t-1).
Does anyone know what could cause this problem?
This is the complete code of my network:
# Split in Input and Output Data
x_1 = data[['MeanT']].values
x_2 = data[['Precip']].values
y = data[['Z_424A_6857']].values
# Scale Data
x = np.hstack([x_1, x_2])
scaler = MinMaxScaler(feature_range=(0, 1))
x = scaler.fit_transform(x)
scaler_out = MinMaxScaler(feature_range=(0, 1))
y = scaler_out.fit_transform(y)
# Reshape Data
x_1, x_2, y = H.create2feature_data(x_1, x_2, y, window)
train_size = int(len(x_1) * .8)
test_size = int(len(x_1)) # * .5
x_1 = np.expand_dims(x_1, 2) # 3D tensor with shape (batch_size, timesteps, input_dim) // (nr. of samples, nr. of timesteps, nr. of features)
x_2 = np.expand_dims(x_2, 2)
y = np.expand_dims(y, 1)
# Split Training Data
x_1_train = x_1[:train_size]
x_2_train = x_2[:train_size]
y_train = y[:train_size]
# Split Test Data
x_1_test = x_1[train_size:test_size]
x_2_test = x_2[train_size:test_size]
y_test = y[train_size:test_size]
# Define Model Input Sets
inputA = Input(shape=(window, 1))
inputB = Input(shape=(window, 1))
# Build Model Branch 1
branch_1 = layers.GRU(16, activation=act, dropout=0, return_sequences=False, stateful=False, batch_input_shape=(batch, 30, 1))(inputA)
branch_1 = layers.Dense(8, activation=act)(branch_1)
#branch_1 = layers.Dropout(0.2)(branch_1)
branch_1 = Model(inputs=inputA, outputs=branch_1)
# Build Model Branch 2
branch_2 = layers.GRU(16, activation=act, dropout=0, return_sequences=False, stateful=False, batch_input_shape=(batch, 30, 1))(inputB)
branch_2 = layers.Dense(8, activation=act)(branch_2)
#branch_2 = layers.Dropout(0.2)(branch_2)
branch_2 = Model(inputs=inputB, outputs=branch_2)
# Combine Model Branches
combined = layers.concatenate([branch_1.output, branch_2.output])
# apply a FC layer and then a regression prediction on the combined outputs
comb = layers.Dense(6, activation=act)(combined)
comb = layers.Dense(1, activation="linear")(comb)
# Accept the inputs of the two branches and then output a single value
model = Model(inputs=[branch_1.input, branch_2.input], outputs=comb)
model.compile(loss='mse', optimizer='adam', metrics=['mse', H.r2_score])
model.summary()
# Training
model.fit([x_1_train, x_2_train], y_train, epochs=epoch, batch_size=batch, validation_split=0.2, callbacks=[tensorboard])
model.reset_states()
# Evaluation
print('Train evaluation')
print(model.evaluate([x_1_train, x_2_train], y_train))
print('Test evaluation')
print(model.evaluate([x_1_test, x_2_test], y_test))
# Predictions
predictions_train = model.predict([x_1_train, x_2_train])
predictions_test = model.predict([x_1_test, x_2_test])
predictions_train = np.reshape(predictions_train, (-1,1))
predictions_test = np.reshape(predictions_test, (-1,1))
# Reverse Scaling
predictions_train = scaler_out.inverse_transform(predictions_train)
predictions_test = scaler_out.inverse_transform(predictions_test)
# Plot results
plt.figure(figsize=(15, 6))
plt.plot(orig_data, color='blue', label='True GWL')
plt.plot(range(train_size), predictions_train, color='red', label='Predicted GWL (Training)')
plt.plot(range(train_size, test_size), predictions_test, color='green', label='Predicted GWL (Test)')
plt.title('GWL Prediction')
plt.xlabel('Day')
plt.ylabel('GWL')
plt.legend()
plt.show()
I am using a batch size of 30 timesteps, a lookback of 90 timesteps, with a total data size of around 7500 time steps.
Any help would be greatly appreciated :-) Thank you!
Probably my answer is not relevant two years later, but I had a similar issue when experimenting with LSTM encoder-decoder model. I solved my problem by scaling the input data in the range -1 .. 1 instead of 0 .. 1 as in your example.

Tensorflow -- Iterating over training and validation sequencially

I have been going throught the Dataset API of tensorflow to feed different dataset with ease to an RNN model.
I got everything working following the not so many blogs together with the docs in the tensorflow website. My working example did the following:
--- Train on X epochs in a training dataset -> validate after all the training has concluded in a validation dataset.
However, I'm unable to develop the following example:
--- Train on X epochs in a training dataset -> validate in each epoch the training model with a validation dataset (a bit like what Keras does)
The problematic issue comes because of the following piece of code:
train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE, drop_remainder=True).repeat()
val_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE_VAL, drop_remainder=True).repeat()
itr = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
train_init_op = itr.make_initializer(train_dataset)
validation_init_op = itr.make_initializer(val_dataset)
When I create the iterator from_structure, I need to specify an output_shape. Obviously, the output shape of the train dataset and the validation dataset is not the same as they have a different batch_size. However, the validation_init_op is throwing the following error, which it seems counterintuitive because validation sets have always different batch_size:
TypeError: Expected output shapes compatible with (TensorShape([Dimension(256), Dimension(11), Dimension(74)]), TensorShape([Dimension(256), Dimension(3)])) but got dataset with output shapes (TensorShape([Dimension(28), Dimension(11), Dimension(74)]), TensorShape([Dimension(28), Dimension(3)])).
I want to do this second approach to evaluate my model and see the common train and validation plots developed at the same time, to see how can I improve it (stopping the learning early and etc). However, with the first simple approach I don't get all this.
So, the question is: ¿Am I doing something wrong? ¿Does my second approach has to be tackled differently? I can think of creating two iterators, but I don't know if that is the right approach. Also, this answer by #MatthewScarpino points out to a feedable iterator because switching between reinitializable ones makes them to start all over again; however, the above error is not related with that part of the code -- ¿Maybe the reinitializable iterator is not intended to set a different batch size for the validation set and to only iterate it once after training whatever the size it is and without setting it in the .batch() method?
Any help is very much appreciated.
Full code for reference:
N_TIMESTEPS_X = xt.shape[0] ## The stack number
BATCH_SIZE = 256
#N_OBSERVATIONS = xt.shape[1]
N_FEATURES = xt.shape[2]
N_OUTPUTS = yt.shape[1]
N_NEURONS_LSTM = 128 ## Number of units in the LSTMCell
N_EPOCHS = 350
LEARNING_RATE = 0.001
### Define the placeholders anda gather the data.
xt = xt.transpose([1,0,2])
xval = xval.transpose([1,0,2])
train_data = (xt, yt)
validation_data = (xval, yval)
N_BATCHES = train_data[0].shape[0] // BATCH_SIZE
print('The number of batches is: {}'.format(N_BATCHES))
BATCH_SIZE_VAL = validation_data[0].shape[0] // N_BATCHES
print('The validation batch size is: {}'.format(BATCH_SIZE_VAL))
## We define the placeholders as a trick so that we do not break into memory problems, associated with feeding the data directly.
'''As an alternative, you can define the Dataset in terms of tf.placeholder() tensors, and feed the NumPy arrays when you initialize an Iterator over the dataset.'''
batch_size = tf.placeholder(tf.int64)
x = tf.placeholder(tf.float32, shape=[None, N_TIMESTEPS_X, N_FEATURES], name='XPlaceholder')
y = tf.placeholder(tf.float32, shape=[None, N_OUTPUTS], name='YPlaceholder')
# Creating the two different dataset objects.
train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE, drop_remainder=True).repeat()
val_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE_VAL, drop_remainder=True).repeat()
# Creating the Iterator type that permits to switch between datasets.
itr = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
train_init_op = itr.make_initializer(train_dataset)
validation_init_op = itr.make_initializer(val_dataset)
next_features, next_labels = itr.get_next()
After investigating the best way to do this, I came across with this final implementation that works well on my end. Surely not be the best. So as to maintain the state, I used a feedable iterator.
AIM: This code is intented to be used when you want to train and validate at the same time, preserving the state of each iterator (i.e. validate with the newest model parameters). Together with that, the code saves the model and other stuff, like some information about the hyperparameters and summaries to visualize the training and validation in Tensorboard.
Also, don't get confused: you don't need to have a different batch size for the training set and for the validation set. This is a misconception that I have. The batch sizes must be the same AND you have to deal with the different number of batches, just passing when no more batches are left. This is a requirement so that you can create the iterator, regarding having both datasets the same data type and shape.
Hope it helps others. Just ignore the code that does not relate to your objectives. Many thanks for #kvish for all the help and time.
Code:
def RNNmodelTF(xt, yt, xval, yval, xtest, ytest):
N_TIMESTEPS_X = xt.shape[0] ## The stack number
BATCH_SIZE = 256
#N_OBSERVATIONS = xt.shape[1]
N_FEATURES = xt.shape[2]
N_OUTPUTS = yt.shape[1]
N_NEURONS_LSTM = 128 ## Number of units in the LSTMCell
N_EPOCHS = 350
LEARNING_RATE = 0.001
### Define the placeholders anda gather the data.
xt = xt.transpose([1,0,2])
xval = xval.transpose([1,0,2])
train_data = (xt, yt)
validation_data = (xval, yval)
N_BATCHES = train_data[0].shape[0] // BATCH_SIZE
## We define the placeholders as a trick so that we do not break into memory problems, associated with feeding the data directly.
'''As an alternative, you can define the Dataset in terms of tf.placeholder() tensors, and feed the NumPy arrays when you initialize an Iterator over the dataset.'''
batch_size = tf.placeholder(tf.int64)
x = tf.placeholder(tf.float32, shape=[None, N_TIMESTEPS_X, N_FEATURES], name='XPlaceholder')
y = tf.placeholder(tf.float32, shape=[None, N_OUTPUTS], name='YPlaceholder')
# Creating the two different dataset objects.
train_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE, drop_remainder=True).repeat()
val_dataset = tf.data.Dataset.from_tensor_slices((x,y)).batch(BATCH_SIZE, drop_remainder=True).repeat()
#################### Creating the Iterator type that permits to switch between datasets.
handle = tf.placeholder(tf.string, shape = [])
iterator = tf.data.Iterator.from_string_handle(handle, train_dataset.output_types, train_dataset.output_shapes)
next_features, next_labels = iterator.get_next()
train_val_iterator = tf.data.Iterator.from_structure(train_dataset.output_types, train_dataset.output_shapes)
train_iterator = train_val_iterator.make_initializer(train_dataset)
val_iterator = train_val_iterator.make_initializer(val_dataset)
###########################
### Create the graph
cellType = tf.nn.rnn_cell.LSTMCell(num_units=N_NEURONS_LSTM, name='LSTMCell')
inputs = tf.unstack(next_features, axis=1)
'''inputs: A length T list of inputs, each a Tensor of shape [batch_size, input_size]'''
RNNOutputs, _ = tf.nn.static_rnn(cell=cellType, inputs=inputs, dtype=tf.float32)
out_weights = tf.get_variable("out_weights", shape=[N_NEURONS_LSTM, N_OUTPUTS], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer())
out_bias = tf.get_variable("out_bias", shape=[N_OUTPUTS], dtype=tf.float32, initializer=tf.zeros_initializer())
predictionsLayer = tf.matmul(RNNOutputs[-1], out_weights) + out_bias
### Define the cost function, that will be optimized by the optimizer.
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=predictionsLayer, labels=next_labels, name='Softmax_plus_Cross_Entropy'))
optimizer_type = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE, name='AdamOptimizer')
optimizer = optimizer_type.minimize(cost)
### Model evaluation
correctPrediction = tf.equal(tf.argmax(predictionsLayer,1), tf.argmax(next_labels,1))
accuracy = tf.reduce_mean(tf.cast(correctPrediction,tf.float32))
confusionMatrix1 = tf.confusion_matrix(tf.argmax(next_labels,1), tf.argmax(predictionsLayer,1), num_classes=3, name='ConfMatrix')
## Saving variables so that we can restore them afterwards.
saver = tf.train.Saver()
save_dir = '/media/SecondDiskHDD/8classModels/DLmodels/tfModels/{}_{}'.format(cellType.__class__.__name__, datetime.now().strftime("%Y%m%d%H%M%S"))
#save_dir = '/home/Desktop/tfModels/{}_{}'.format(cellType.__class__.__name__, datetime.now().strftime("%Y%m%d%H%M%S"))
os.mkdir(save_dir)
varDict = {'nTimeSteps': N_TIMESTEPS_X, 'BatchSize': BATCH_SIZE, 'nFeatures': N_FEATURES,
'nNeuronsLSTM': N_NEURONS_LSTM, 'nEpochs': N_EPOCHS,
'learningRate': LEARNING_RATE, 'optimizerType': optimizer_type.__class__.__name__}
varDicSavingTxt = save_dir + '/varDict.txt'
modelFilesDir = save_dir + '/modelFiles'
os.mkdir(modelFilesDir)
logDir = save_dir + '/TBoardLogs'
os.mkdir(logDir)
acc_summary = tf.summary.scalar('Accuracy', accuracy)
loss_summary = tf.summary.scalar('Cost_CrossEntropy', cost)
summary_merged = tf.summary.merge_all()
with open(varDicSavingTxt, 'w') as outfile:
outfile.write(repr(varDict))
with tf.Session() as sess:
tf.set_random_seed(2)
sess.run(tf.global_variables_initializer())
train_writer = tf.summary.FileWriter(logDir + '/train', sess.graph)
validation_writer = tf.summary.FileWriter(logDir + '/validation')
# initialise iterator with data
train_val_string = sess.run(train_val_iterator.string_handle())
cm1Total = None
cm2Total = None
print('¡Training starts!')
for epoch in range(N_EPOCHS):
batchAccList = []
batchAccListVal = []
tot_loss_train = 0
tot_loss_validation = 0
for batch in range(N_BATCHES):
sess.run(train_iterator, feed_dict = {x : train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
optimizer_output, loss_value, summary, accBatch, cm1 = sess.run([optimizer, cost, summary_merged, accuracy, confusionMatrix1], feed_dict = {handle: train_val_string})
npArrayPred = predictionsLayer.eval(feed_dict= {handle: train_val_string})
predLabEnc = np.apply_along_axis(thresholdSet, 1, npArrayPred, value=0.5)
npArrayLab = next_labels.eval(feed_dict= {handle: train_val_string})
labLabEnc = np.argmax(npArrayLab, 1)
cm2 = confusion_matrix(labLabEnc, predLabEnc)
tot_loss_train += loss_value
batchAccList.append(accBatch)
try:
sess.run(val_iterator, feed_dict = {x: validation_data[0], y: validation_data[1], batch_size: BATCH_SIZE})
valLoss, valAcc, summary_val = sess.run([cost, accuracy, summary_merged], feed_dict = {handle: train_val_string})
tot_loss_validation += valLoss
batchAccListVal.append(valAcc)
except tf.errors.OutOfRangeError:
pass
if cm1Total is None and cm2Total is None:
cm1Total = cm1
cm2Total = cm2
else:
cm1Total += cm1
cm2Total += cm2
if batch % 10 == 0:
train_writer.add_summary(summary, batch)
validation_writer.add_summary(summary_val, batch)
epochAcc = tf.reduce_mean(batchAccList)
sess.run(train_iterator, feed_dict = {x : train_data[0], y: train_data[1], batch_size: BATCH_SIZE})
epochAcc_num = sess.run(epochAcc, feed_dict = {handle: train_val_string})
epochAccVal = tf.reduce_mean(batchAccListVal)
sess.run(val_iterator, feed_dict = {x: validation_data[0], y: validation_data[1], batch_size: BATCH_SIZE})
epochAcc_num_Val = sess.run(epochAccVal, feed_dict = {handle: train_val_string})
if epoch%10 == 0:
print("Epoch: {}, Loss: {:.4f}, Accuracy: {:.3f}".format(epoch, tot_loss_train / N_BATCHES, epochAcc_num))
print('Validation Loss: {:.4f}, Validation Accuracy: {:.3f}'.format(tot_loss_validation / N_BATCHES, epochAcc_num_Val))
cmLogFile1 = save_dir + '/cm1File.txt'
with open(cmLogFile1, 'w') as outfile:
outfile.write(repr(cm1Total))
cmLogFile2 = save_dir + '/cm2File.txt'
with open(cmLogFile2, 'w') as outfile:
outfile.write(repr(cm2Total))
saver.save(sess, modelFilesDir + '/model.ckpt')

TF | How to predict from CNN after training is done

Trying to work with the framework provided in the course Stanford cs231n, given the code below.
I can see the accuracy getting better and the net is trained however after the training process and checking the results on the validation set, how would I go to input one image into the model and see its prediction?
I have searched around and couldn't find some built in predict function in tensorflow as there is in keras.
Initializing the net and its parameters
# clear old variables
tf.reset_default_graph()
# setup input (e.g. the data that changes every batch)
# The first dim is None, and gets sets automatically based on batch size fed in
X = tf.placeholder(tf.float32, [None, 30, 30, 1])
y = tf.placeholder(tf.int64, [None])
is_training = tf.placeholder(tf.bool)
def simple_model(X,y):
# define our weights (e.g. init_two_layer_convnet)
# setup variables
Wconv1 = tf.get_variable("Wconv1", shape=[7, 7, 1, 32]) # Filter of size 7x7 with depth of 3. No. of filters is 32
bconv1 = tf.get_variable("bconv1", shape=[32])
W1 = tf.get_variable("W1", shape=[4608, 360]) # 5408 is 13x13x32 where 13x13 is the output of 7x7 filter on 32x32 image with padding of 2.
b1 = tf.get_variable("b1", shape=[360])
# define our graph (e.g. two_layer_convnet)
a1 = tf.nn.conv2d(X, Wconv1, strides=[1,2,2,1], padding='VALID') + bconv1
h1 = tf.nn.relu(a1)
h1_flat = tf.reshape(h1,[-1,4608])
y_out = tf.matmul(h1_flat,W1) + b1
return y_out
y_out = simple_model(X,y)
# define our loss
total_loss = tf.losses.hinge_loss(tf.one_hot(y,360),logits=y_out)
mean_loss = tf.reduce_mean(total_loss)
# define our optimizer
optimizer = tf.train.AdamOptimizer(5e-4) # select optimizer and set learning rate
train_step = optimizer.minimize(mean_loss)
Function for evaluating the model whether for training or validation and plots the results:
def run_model(session, predict, loss_val, Xd, yd,
epochs=1, batch_size=64, print_every=100,
training=None, plot_losses=False):
# Have tensorflow compute accuracy
correct_prediction = tf.equal(tf.argmax(predict,1), y)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# shuffle indicies
train_indicies = np.arange(Xd.shape[0])
np.random.shuffle(train_indicies)
training_now = training is not None
# setting up variables we want to compute and optimize
# if we have a training function, add that to things we compute
variables = [mean_loss,correct_prediction,accuracy]
if training_now:
variables[-1] = training
# counter
iter_cnt = 0
for e in range(epochs):
# keep track of losses and accuracy
correct = 0
losses = []
# make sure we iterate over the dataset once
for i in range(int(math.ceil(Xd.shape[0]/batch_size))):
# generate indicies for the batch
start_idx = (i*batch_size)%Xd.shape[0]
idx = train_indicies[start_idx:start_idx+batch_size]
# create a feed dictionary for this batch
feed_dict = {X: Xd[idx,:],
y: yd[idx],
is_training: training_now }
# get batch size
actual_batch_size = yd[idx].shape[0]
# have tensorflow compute loss and correct predictions
# and (if given) perform a training step
loss, corr, _ = session.run(variables,feed_dict=feed_dict)
# aggregate performance stats
losses.append(loss*actual_batch_size)
correct += np.sum(corr)
# print every now and then
if training_now and (iter_cnt % print_every) == 0:
print("Iteration {0}: with minibatch training loss = {1:.3g} and accuracy of {2:.2g}"\
.format(iter_cnt,loss,np.sum(corr)/actual_batch_size))
iter_cnt += 1
total_correct = correct/Xd.shape[0]
total_loss = np.sum(losses)/Xd.shape[0]
print("Epoch {2}, Overall loss = {0:.3g} and accuracy of {1:.3g}"\
.format(total_loss,total_correct,e+1))
if plot_losses:
plt.plot(losses)
plt.grid(True)
plt.title('Epoch {} Loss'.format(e+1))
plt.xlabel('minibatch number')
plt.ylabel('minibatch loss')
plt.show()
return total_loss,total_correct
The functions calls that trains the model
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print('Training')
run_model(sess,y_out,mean_loss,x_train,y_train,1,64,100,train_step,True)
print('Validation')
run_model(sess,y_out,mean_loss,x_val,y_val,1,64)
You do not need to go far, you simply pass your new (test) feature matrix X_test into your network and perform a forward pass - the output layer is the prediction. So the code is something like this
session.run(y_out, feed_dict={X: X_test})

Resources