Is there a way can I visualize the complete training loop for the GAN architecture in TensorBoard using Pytorch? I think it's possible using TF, but I am having a hard time to figure out one using Pytorch.
You can use TensorboardX for this.
You can make use of SummaryWriter from TensorboardX to create an event file in a given directory and add summaries and events to it.
The code below is an example that you can use but you have to add in the loss values, the ground truth images and the generated images yourself. I commented where they would have to go.
from tensorboardX import SummaryWriter
import torchvision.utils as vutils
import numpy as np
REPORT_EVERY_ITER = 100
SAVE_IMAGE_EVERY_ITER = 1000
if __name__ == "__main__":
writer = SummaryWriter()
gen_losses = []
dis_losses = []
iter_no = 0
// looping over the batches in the environment
for batch_v in iterate_batches(envs):
// getting the outputs
// getting the generators loss
// getting the discriminators loss
iter_no += 1
// save the loss values for both generators and the discriminator every 100 steps
if iter_no % REPORT_EVERY_ITER == 0:
log.info(
"Iter %d: gen_loss=%.3e, dis_loss=%.3e",
iter_no,
np.mean(gen_losses),
np.mean(dis_losses),
)
writer.add_scalar("gen_loss", np.mean(gen_losses), iter_no)
writer.add_scalar("dis_loss", np.mean(dis_losses), iter_no)
gen_losses = []
dis_losses = []
// save the images being produced from both the ground truth and the generator
// it is saved every 1000 iterations
if iter_no % SAVE_IMAGE_EVERY_ITER == 0:
// save the generated images from the generator
writer.add_image(
"fake",
vutils.make_grid(gen_output_v.data[:64], normalize=True),
iter_no
)
// add the ground truth images here
// these will be the same throughout the cycle
writer.add_image(
"real",
vutils.make_grid(batch_v.data[:64], normalize=True),
iter_no
)
To view the results just run the command: tensorboard --logdir runs in the same directory where you ran the model training(runs contains the results from the training). A link will be shown which you can go to view the plots such as the one below. If you want to run Tensorboard on a remote server then you would have to add in the command --bind_all in the command line to access it from the outside.
Viewing the generated images
Viewing the loss values
Related
I am currently trying to optimize the hyperparameters of a gradient boosting method with the library hyperopt. When I was working on my own computer, I used the class Trials and I was able to save and reload my results with the library pickles. This allowed me to have a save of all the set of parameters I tested. My code looked like that :
from hyperopt import SparkTrials, STATUS_OK, tpe, fmin
from LearningUtils.LearningUtils import build_train_test, get_train_test, mean_error, rmse, mae
from LearningUtils.constants import MAX_EVALS, CV, XGBOOST_OPTIM_SPACE, PARALELISM
from sklearn.model_selection import cross_val_score
import pickle as pkl
if os.path.isdir(PATH_TO_TRIALS): #we reload the past results
with open(PATH_TO_TRIALS, 'rb') as trials_file:
trials = pkl.load(trials_file)
else : # We create the trials file
trials = Trials()
# classic hyperparameters optimization
def objective(space):
regressor = xgb.XGBRegressor(n_estimators = space['n_estimators'],
max_depth = int(space['max_depth']),
learning_rate = space['learning_rate'],
gamma = space['gamma'],
min_child_weight = space['min_child_weight'],
subsample = space['subsample'],
colsample_bytree = space['colsample_bytree'],
verbosity=0
)
regressor.fit(X_train, Y_train)
# Applying k-Fold Cross Validation
accuracies = cross_val_score(estimator=regressor, x=X_train, y=Y_train, cv=5)
CrossValMean = accuracies.mean()
return {'loss':1-CrossValMean, 'status': STATUS_OK}
best = fmin(fn=objective,
space=XGBOOST_OPTIM_SPACE,
algo=tpe.suggest,
max_evals=MAX_EVALS,
trials=trials,
return_argmin=False)
# Save the trials
pkl.dump(trials, open(PATH_TO_TRIALS, "wb"))
Now, I would like to make this code work on a distant serveur with more CPUs in order to allow parallelisation and gain time.
I saw that I can simply do that using the SparkTrials class of hyperopt instead ot Trials. But, SparkTrials objects cannot be saved with pickles. Do you have any idea on how I could save and reload my trials results stored in a Sparktrials object ?
so this might be a bit late, but after messing around a bit, I found a kind of hacky solution:
spark_trials= SparkTrials()
pickling_trials = dict()
for k, v in spark_trials.__dict__.items():
if not k in ['_spark_context', '_spark']:
pickling_trials[k] = v
pickle.dump(pickling_trials, open('pickling_trials.hyperopt', 'wb'))
The _spark_context and the _spark attributes of the SparkTrials instance are the culprits of not being able to serialize the object. It turns out that you dont need them if you want to re-use the object, because if you want to re-run the optimization again, a new spark context is created anyway, so you can re use the trials as:
new_sparktrials = SparkTrials()
for att, v in pickling_trials.items():
setattr(new_sparktrials, att, v)
best = fmin(loss_func,
space=search_space,
algo=tpe.suggest,
max_evals=1000,
trials=new_sparktrials)
voilĂ :)
I have been training a neural net with hyperparameters but am unable get results out as I am getting the following error message.
nn
Error message: 'int' object is not iterable
Code:
nn = H2OGridSearch(model=H2ODeepLearningEstimator,
hyper_params = {
'activation' :[ "Rectifier","Tanh","Maxout","RectifierWithDropout","TanhWithDropout","MaxoutWithDropout"],
'hidden':[[20,20],[50,50],[30,30,30],[25,25,25,25]], ## small network, runs faster
'epochs':1000000, ## hopefully converges earlier...
'rate' :[0.0005,0.001,0.0015,0.002,0.0025,0.003,0.0035,0.0040,0.0045,0.005],
'score_validation_samples':10000, ## sample the validation dataset (faster)
'stopping_rounds':2,
'stopping_metric':"misclassification", ## alternatives: "MSE","logloss","r2"
'stopping_tolerance':0.01})
nn.train(train1_x, train1_y,train1)
There is a slight problem with how you are defining the grid. You can only pass a dictionary of lists (of values to grid over for each hyperparamter) in the hyper_params argument. The reason you are seeing the Error message: 'int' object is not iterable error message is because you are trying to pass an integer instead of a list for both score_validation_samples and stopping_rounds.
If there are arguments that you don't intend to grid over, then they should be passed instead to the grid's train() method. I'd also recommend using a validation frame or cross-validation when doing grid search so you don't have to use training metrics to choose the best model. See example below.
import h2o
from h2o.estimators.deeplearning import H2ODeepLearningEstimator
from h2o.grid.grid_search import H2OGridSearch
h2o.init()
# Import a sample binary outcome training set into H2O
train = h2o.import_file("https://s3.amazonaws.com/erin-data/higgs/higgs_train_10k.csv")
# Identify predictors and response
x = train.columns
y = "response"
x.remove(y)
# For binary classification, response should be a factor
train[y] = train[y].asfactor()
# Execute a grid search (also do 5-fold CV)
grid = H2OGridSearch(model=H2ODeepLearningEstimator, hyper_params = {
'activation' :["Rectifier","Tanh","Maxout","RectifierWithDropout","TanhWithDropout","MaxoutWithDropout"],
'hidden':[[20,20],[50,50],[30,30,30],[25,25,25,25]]})
grid.train(x=x, y=y, training_frame=train, \
score_validation_samples=10000, \
stopping_rounds=2, \
stopping_metric="misclassification", \
stopping_tolerance=0.01, \
nfolds=5)
# Look at grid results
gridperf = grid.get_grid(sort_by='mean_per_class_error')
There are more examples of how to use grid search in the H2O Python Grid Search tutorial.
I am using keras with a tensorflow-gpu back end on a Ubuntu 17.04 VM.
I have created a custom generator to read inputs and classes from pickle files, but it seems to get the following error:
terminate called after throwing an instance of 'std::ba
d_alloc'
what(): std::bad_alloc
the code for loading data can be seen here:
def data_gen(self, pklPaths, batch_size=16):
while True:
data = []
labels = []
for i, pklPath in enumerate(pklPaths):
# print(pklPath)
image = pickle.load(open(pklPath, 'rb'))
for i in range(batch_size):
# Set a label
data.append(image[0][0])
labels.append(image[1][1])
yield np.array(data), np.array(labels)
then in the train section i'm using a fit generator:
vm_model.fit_generator(vm.data_gen(pkl_train), validation_data=vm.data_gen(pkl_validate), epochs=15, verbose=2,
steps_per_epoch=(5000/16), validation_steps=(1000/16), callbacks=[tb])
the generator should have better memory management than loading everything, however it doesn't seem to be the case! any ideas?
ok, so i found the issue so I'm answering my own question.
Basically, previous one had one unnecessary loop and also kept increasing the size of data and labels essentially loading the entire data in memory:
def data_gen(self, pklPaths, batch_size=16):
while True:
data = []
labels = []
for i, pklPath in enumerate(pklPaths):
# load pickle
image = pickle.load(open(pklPath, 'rb'))
# append
data.append(image[0][0])
labels.append(image[1])
# if batch is complete yield data and labels and reset
if i % batch_size == 0 and i != 0:
yield np.array(data), np.array(labels)
data.clear()
labels.clear()
I have one image ( i don't have dataset ) I want to train a model in tensorflow,
such that I can use that model to recognize the image fast.
I have implemented one such thing, but it doesn't work:
import tensorflow as tf
filenames = ['pic.jpg']
# step 2
filename_queue = tf.train.string_input_producer(filenames)
# step 3: read, decode and resize images
reader = tf.WholeFileReader()
filename, content = reader.read(filename_queue)
image = tf.image.decode_jpeg(content, channels=3)
image = tf.cast(image, tf.float32)
resized_image = tf.image.resize_images(image, [224, 224])
# step 4: Batching
image_batch = tf.train.batch([resized_image], batch_size=8)
Also, how vuforia is able to recognize with only one image so fast?. I want a similar implementation in tensorflow
This is not how machine learning and deep learning works. You can't just grab one element and build a model which explains this one element. If you will check a few NN tutorials, you will see that in order to train a reasonable model people use thousands or even millions of data points.
I've been experimenting with adversarial images and I read up on the fast gradient sign method from the following link https://arxiv.org/pdf/1412.6572.pdf...
The instructions explain that the necessary gradient can be calculated using backpropagation...
I've been successful at generating adversarial images but I have failed at attempting to extract the gradient necessary to create an adversarial image. I will demonstrate what I mean.
Let us assume that I have already trained my algorithm using logistic regression. I restore the model and I extract the number I wish to change into a adversarial image. In this case it is the number 2...
# construct model
logits = tf.matmul(x, W) + b
pred = tf.nn.softmax(logits)
...
...
# assign the images of number 2 to the variable
sess.run(tf.assign(x, labels_of_2))
# setup softmax
sess.run(pred)
# placeholder for target label
fake_label = tf.placeholder(tf.int32, shape=[1])
# setup the fake loss
fake_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=fake_label)
# minimize fake loss using gradient descent,
# calculating the derivatives of the weight of the fake image will give the direction of weights necessary to change the prediction
adversarial_step = tf.train.GradientDescentOptimizer(learning_rate=FLAGS.learning_rate).minimize(fake_loss, var_list=[x])
# continue calculating the derivative until the prediction changes for all 10 images
for i in range(FLAGS.training_epochs):
# fake label tells the training algorithm to use the weights calculated for number 6
sess.run(adversarial_step, feed_dict={fake_label:np.array([6])})
sess.run(pred)
This is my approach, and it works perfectly. It takes my image of number 2 and changes it only slightly so that when I run the following...
x_in = np.expand_dims(x[0], axis=0)
classification = sess.run(tf.argmax(pred, 1))
print(classification)
it will predict the number 2 as a number 6.
The issue is, I need to extract the gradient necessary to trick the neural network into thinking number 2 is 6. I need to use this gradient to create the nematode mentioned above.
I am not sure how can I extract the gradient value. I tried looking at tf.gradients but I was unable to figure out how to produce an adversarial image using this function. I implemented the following after the fake_loss variable above...
tf.gradients(fake_loss, x)
for i in range(FLAGS.training_epochs):
# calculate gradient with weight of number 6
gradient_value = sess.run(gradients, feed_dict={fake_label:np.array([6])})
# update the image of number 2
gradient_update = x+0.007*gradient_value[0]
sess.run(tf.assign(x, gradient_update))
sess.run(pred)
Unfortunately the prediction did not change in the way I wanted, and moreover this logic resulted in a rather blurry image.
I would appreciate an explanation as to what I need to do in order calculate and extract the gradient that will trick the neural network, so that if I were to take this gradient and apply it to my image as a nematode, it will result in a different prediction.
Why not let the Tensorflow optimizer add the gradients to your image? You can still evaluate the nematode to get the resulting gradients that were added.
I created a bit of sample code to demonstrate this with a panda image. It uses the VGG16 neural network to transform your own panda image into a "goldfish" image. Every 100 iterations it saves the image as PDF so you can print it losslessly to check if your image is still a goldfish.
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipyd
from libs import vgg16 # Download here! https://github.com/pkmital/CADL/tree/master/session-4/libs
pandaimage = plt.imread('panda.jpg')
pandaimage = vgg16.preprocess(pandaimage)
plt.imshow(pandaimage)
img_4d = np.array([pandaimage])
g = tf.get_default_graph()
input_placeholder = tf.Variable(img_4d,trainable=False)
to_add_image = tf.Variable(tf.random_normal([224,224,3], mean=0.0, stddev=0.1, dtype=tf.float32))
combined_images_not_clamped = input_placeholder+to_add_image
filledmax = tf.fill(tf.shape(combined_images_not_clamped), 1.0)
filledmin = tf.fill(tf.shape(combined_images_not_clamped), 0.0)
greater_than_one = tf.greater(combined_images_not_clamped, filledmax)
combined_images_with_max = tf.where(greater_than_one, filledmax, combined_images_not_clamped)
lower_than_zero =tf.less(combined_images_with_max, filledmin)
combined_images = tf.where(lower_than_zero, filledmin, combined_images_with_max)
net = vgg16.get_vgg_model()
tf.import_graph_def(net['graph_def'], name='vgg')
names = [op.name for op in g.get_operations()]
style_layer = 'prob:0'
the_prediction = tf.import_graph_def(
net['graph_def'],
name='vgg',
input_map={'images:0': combined_images},return_elements=[style_layer])
goldfish_expected_np = np.zeros(1000)
goldfish_expected_np[1]=1.0
goldfish_expected_tf = tf.Variable(goldfish_expected_np,dtype=tf.float32,trainable=False)
loss = tf.reduce_sum(tf.square(the_prediction[0]-goldfish_expected_tf))
optimizer = tf.train.AdamOptimizer().minimize(loss)
sess = tf.InteractiveSession()
sess.run(tf.global_variables_initializer())
def show_many_images(*images):
fig = plt.figure()
for i in range(len(images)):
print(images[i].shape)
subplot_number = 100+10*len(images)+(i+1)
plt.subplot(subplot_number)
plt.imshow(images[i])
plt.show()
for i in range(1000):
_, loss_val = sess.run([optimizer,loss])
if i%100==1:
print("Loss at iteration %d: %f" % (i,loss_val))
_, loss_val,adversarial_image,pred,nematode = sess.run([optimizer,loss,combined_images,the_prediction,to_add_image])
res = np.squeeze(pred)
average = np.mean(res, 0)
res = res / np.sum(average)
plt.imshow(adversarial_image[0])
plt.show()
print([(res[idx], net['labels'][idx]) for idx in res.argsort()[-5:][::-1]])
show_many_images(img_4d[0],nematode,adversarial_image[0])
plt.imsave('adversarial_goldfish.pdf',adversarial_image[0],format='pdf') # save for printing
Let me know if this helps you!