I have a multi-input network that uses a tf.bool tf.placeholder to manage how batch normalization is executed in training and validation / testing.
I’ve been trying to convert this trained model to CoreML via tf-coreml library with no success, with below error:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[26] does not have value
I understand that this error states that there is a certain node that’s missing a value so the converter can execute the model. I also understand this error is connected to control flow operations (linked to the batch normalization method creating operations like Switch and Merge). The source code shows this:
def testSwitchDeadBranch(self):
with self.cached_session():
data = constant_op.constant([1, 2, 3, 4, 5, 6], name="data")
ports = ops.convert_to_tensor(True, name="ports")
switch_op = control_flow_ops.switch(data, ports)
dead_branch = array_ops.identity(switch_op[0])
with self.assertRaisesWithPredicateMatch(
errors_impl.InvalidArgumentError,
lambda e: "Retval[0] does not have value" in str(e)):
self.evaluate(dead_branch)
Note that my error is Retval[26] (I’ve gotten [24], etc.), not Retval[0]. I’m assuming it tests the Switch “dead branch”, which should be the non-used branch for inference. The code also does the same with Merge “dead branch”.
Is there any detail I’m missing that may be causing this error (not the first error I’ve faced during conversion, of course)? The way the inference is done? The way batch normalization is implemented? The way the model is saved?
What I’ve done so far:
I’m using Tensorflow 1.14.0
I know tf.layers.batch_normalization creates operations Switch and Merge, which are not CoreML compatible
I’ve tried converting to Tensorflow Lite with similar issues
I’ve follow Facenet (this model uses the same tf.bool logic for training, validation, testing) conversion process with no success
I’ve tried the GraphTransforms library
I’ve tried scripts to remove / modify the control flow
I’ve created separate graphs to avoid the extra ops with no success
Note: I’ve abstracted big part of the code to post this question.
This is how batch normalization is implemented (within a convolution block).
training = tf.placeholder(tf.bool, shape = (), name = 'training')
def conv_layer(input, kernelSize, nFilters, poolSize, stride, input_channels = 1, name = 'conv'):
with tf.name_scope(name):
shape = [kernelSize, kernelSize, input_channels, nFilters]
weights = new_weights(shape = shape) biases = new_biases(length = nFilters)
conv = tf.nn.conv2d(input, weights, strides = [1, 2, 2, 1], padding = 'SAME', name = 'convL')
conv += biases
pool = tf.reduce_max(conv, reduction_indices=[3], keep_dims=True, name = 'pool')
pool = tf.nn.max_pool(conv, ksize = [1, poolSize, poolSize, 1], strides = shape, padding = 'SAME')
bnorm = tf.layers.batch_normalization(pool, training = training, center = True, scale = True, fused = False, reuse= False)
act = tf.nn.relu(bnorm)
return act
Below is the code to train and save the model.
saver = tf.train.Saver()
with tf.Session(config = config) as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
sess.run(init_train_op)
for epoch in range(MAX_EPOCHS):
for step in range(10):
l, _, se = sess.run(
[loss, train_op, mean_squared_error],
feed_dict = {training: True})
print('\nRunning validation operation...')
sess.run(init_val_op)
for _ in range(10):
val_out, val_l, val_se = sess.run(
[out, val_loss, val_mean_squared_error],
feed_dict = {training: False})
sess.run(init_train_op) # switch back to training set
#Save model
print('Saving Model...\n')
saver.save(sess, join(saveDir, './model_saver_validation'.format(modelIndex)), write_meta_graph = True)
Below is the code to load, update inputs, perform inference, and freeze the model.
# Dummy data for inference
b = np.zeros((1, 80, 160, 1), np.float32)
ill = np.ones((1,3), np.float32)
is_train = False
def freeze():
with tf.Graph().as_default():
with tf.Session() as sess:
bIn = tf.placeholder(dtype=tf.float32, shape=[
1, 80, 160, 1], name='bIn')
illumIn = tf.placeholder(dtype=tf.float32, shape=[
1, 3], name='illumIn')
training = tf.placeholder(tf.bool, shape=(), name = 'training')
# Load the model metagraph and checkpoint
meta_file = meta_graph #.meta file from saver.save()
ckpt_file = checkpoint_file #checkpoint file
# Load graph to redirect inputs from iterator to expected inputs
saver = tf.train.import_meta_graph(meta_file, input_map={
'IteratorGetNext:0': bIn,
'IteratorGetNext:3': illumIn,
'training:0': training}, clear_devices = True)
tf.get_default_session().run(tf.global_variables_initializer())
tf.get_default_session().run(tf.local_variables_initializer())
saver.restore(tf.get_default_session(), ckpt_file)
pred = tf.get_default_graph().get_tensor_by_name('Out:0')
tf.get_default_session().run(pred, feed_dict={'bIn:0': b, 'poseIn:0': po, 'training:0': is_train})
# Retrieve the protobuf graph definition and fix the batch norm nodes
input_graph_def = sess.graph.as_graph_def()
# Freeze the graph def
output_graph_def = freeze_graph_def(
sess, input_graph_def, output_node_names)
# Serialize and dump the output graph to the filesystem
with tf.gfile.GFile(frozen_graph, 'wb') as f:
f.write(output_graph_def.SerializeToString())
freeze()
Below is the code to convert to CoreML.
tfcoreml.convert(
tf_model_path=frozen_graph,
mlmodel_path='./coreml_model.mlmodel',
output_feature_names=['Out:0'],
input_name_shape_dict={
'bIn:0': [1, 80, 160, 1],
'illumIn:0': [1, 3],
'training:0': []})
Below is the error thrown by tf-coreml.
Loading the TF graph...
Graph Loaded.
Collecting all the 'Const' ops from the graph, by running it....
Traceback (most recent call last):
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1356, in _do_call
return fn(*args)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1341, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1429, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[26] does not have value
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "tf2opencv.py", line 392, in <module>
'illumIn:0': [1, 3], 'poseIn:0': [1, 16], 'training:0': []})
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tfcoreml/_tf_coreml_converter.py", line 586, in convert
custom_conversion_functions=custom_conversion_functions)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tfcoreml/_tf_coreml_converter.py", line 243, in _convert_pb_to_mlmodel
tensors_evaluated = sess.run(tensors, feed_dict=input_feed_dict)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 950, in run
run_metadata_ptr)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1173, in _run
feed_dict_tensor, options, run_metadata)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1350, in _do_run
run_metadata)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1370, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Retval[26] does not have value
Related
With TF 2.0.0, I can train an architecture with one input, I can train an architecture with one input using a custom generator, and I can train an architecture with two inputs. But I can't train an architecture with two inputs using a custom generator.
To keep it minimalist, here's a simple example, with no generator and no multiple inputs to start with:
from tensorflow.keras import layers, models, Model, Input, losses
from numpy import random, array, zeros
input1 = Input(shape=2)
dense1 = layers.Dense(5)(input1)
fullModel = Model(inputs=input1, outputs=dense1)
fullModel.summary()
# Generate random examples:
nbSamples = 21
X_train = random.rand(nbSamples, 2)
Y_train = random.rand(nbSamples, 5)
batchSize = 4
fullModel.compile(loss=losses.LogCosh())
fullModel.fit(X_train, Y_train, epochs=10, batch_size=batchSize)
It's a simple dense layer which takes in input vectors of size 2. The randomly generated dataset contains 21 examples and the batch size is 4. Instead of loading all the data and giving them to model.fit(), we can also give a custom generator in input. The main advantage (for RAM consumption) of this is to load only batch by batch rather that the whole dataset. Here is a simple example with the previous architecture and a custom generator:
import json
# Save the last dataset in a file:
with open("./dataset1input.txt", 'w') as file:
for i in range(nbSamples):
example = {"x": X_train[i].tolist(), "y": Y_train[i].tolist()}
file.write(json.dumps(example) + "\n")
def generator1input(datasetPath, batch_size, inputSize, outputSize):
X_batch = zeros((batch_size, inputSize))
Y_batch = zeros((batch_size, outputSize))
i=0
while True:
with open(datasetPath, 'r') as file:
for line in file:
example = json.loads(line)
X_batch[i] = array(example["x"])
Y_batch[i] = array(example["y"])
i+=1
if i % batch_size == 0:
yield (X_batch, Y_batch)
i=0
fullModel.compile(loss=losses.LogCosh())
my_generator = generator1input("./dataset1input.txt", batchSize, 2, 5)
fullModel.fit(my_generator, epochs=10, steps_per_epoch=int(nbSamples/batchSize))
Here, the generator opens the dataset file, but loads only batch_size examples (not nbSamples examples) each time it is called and slides into the file while looping.
Now, I can build a simple functional architecture with 2 inputs, and no generator:
input1 = Input(shape=2)
dense1 = layers.Dense(5)(input1)
subModel1 = Model(inputs=input1, outputs=dense1)
input2 = Input(shape=3)
dense2 = layers.Dense(5)(input2)
subModel2 = Model(inputs=input2, outputs=dense2)
averageLayer = layers.average([subModel1.output, subModel2.output])
fullModel = Model(inputs=[input1, input2], outputs=averageLayer)
fullModel.summary()
# Generate random examples:
nbSamples = 21
X1 = random.rand(nbSamples, 2)
X2 = random.rand(nbSamples, 3)
Y = random.rand(nbSamples, 5)
fullModel.compile(loss=losses.LogCosh())
fullModel.fit([X1, X2], Y, epochs=10, batch_size=batchSize)
Until here, all models compile and run, but I'm not able to use a generator with the last architecture and its 2 inputs... By trying the following code (which should logically work in my opinion):
# Save data in a file:
with open("./dataset.txt", 'w') as file:
for i in range(nbSamples):
example = {"x1": X1[i].tolist(), "x2": X2[i].tolist(), "y": Y[i].tolist()}
file.write(json.dumps(example) + "\n")
def generator(datasetPath, batch_size, inputSize1, inputSize2, outputSize):
X1_batch = zeros((batch_size, inputSize1))
X2_batch = zeros((batch_size, inputSize2))
Y_batch = zeros((batch_size, outputSize))
i=0
while True:
with open(datasetPath, 'r') as file:
for line in file:
example = json.loads(line)
X1_batch[i] = array(example["x1"])
X2_batch[i] = array(example["x2"])
Y_batch[i] = array(example["y"])
i+=1
if i % batch_size == 0:
yield ([X1_batch, X2_batch], Y_batch)
i=0
fullModel.compile(loss=losses.LogCosh())
my_generator = generator("./dataset.txt", batchSize, 2, 3, 5)
fullModel.fit(my_generator, epochs=10, steps_per_epoch=(nbSamples//batchSize))
I obtain the following error:
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\keras\engine\training.py", line 729, in fit
use_multiprocessing=use_multiprocessing)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 224, in fit
distribution_strategy=strategy)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 547, in _process_training_inputs
use_multiprocessing=use_multiprocessing)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py", line 606, in _process_inputs
use_multiprocessing=use_multiprocessing)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\keras\engine\data_adapter.py", line 566, in __init__
reassemble, nested_dtypes, output_shapes=nested_shape)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\data\ops\dataset_ops.py", line 540, in from_generator
output_types, tensor_shape.as_shape, output_shapes)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\data\util\nest.py", line 471, in map_structure_up_to
results = [func(*tensors) for tensors in zip(*all_flattened_up_to)]
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\data\util\nest.py", line 471, in <listcomp>
results = [func(*tensors) for tensors in zip(*all_flattened_up_to)]
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 1216, in as_shape
return TensorShape(shape)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 776, in __init__
self._dims = [as_dimension(d) for d in dims_iter]
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 776, in <listcomp>
self._dims = [as_dimension(d) for d in dims_iter]
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 718, in as_dimension
return Dimension(value)
File "C:\Anaconda\lib\site-packages\tensorflow_core\python\framework\tensor_shape.py", line 193, in __init__
self._value = int(value)
TypeError: int() argument must be a string, a bytes-like object or a number, not 'tuple'
As explain in the doc, x argument of model.fit() can be A generator or keras.utils.Sequence returning (inputs, targets), and The iterator should return a tuple of length 1, 2, or 3, where the optional second and third elements will be used for y and sample_weight respectively. Thus, I think that it can not take in input more than one generator. Perhaps multiple inputs are not possible with custom generator. Please, would you have an explanation? A solution?
(otherwise, it seems possible to go through tf.data.Dataset.from_generator() with a less custom approach, but I have difficulties to understand what to indicate in the output_signature argument)
[EDIT] Thank you for your response #Francis Tang. In fact, it's possible to use a custom generator, but it allowed me to understand that I just had to change the line:
yield ([X1_batch, X2_batch], Y_batch)
To:
yield (X1_batch, X2_batch), Y_batch
Nevertheless, it is indeed perhaps better to use tf.keras.utils.Sequence. But I find it a bit restrictive.
In particular, I understand in the example given (as well as in most of the examples I could find about Sequence) that __init__() is first used to load the full dataset, which is against the interest of the generator.
But maybe it was a particular example about Sequence(), and there is no need to use __init__() like that: you can directly read a file and load the desired batch into the __getitem__().
In this case, it seems to push to browse each time the data file, or else it is necessary to create a file per batch beforehand (not really optimal).
from tensorflow.python.keras.utils.data_utils import Sequence
class generator(Sequence):
def __init__(self,filename,batch_size):
data = pickle.load(open(filename,'rb'))
self.X1 = data['X1']
self.X2 = data['X2']
self.y = data['y']
self.bs = batch_size
def __len__(self):
return (len(self.y) - 1) // self.bs + 1
def __getitem__(self,idx):
start, end = idx * self.bs, (idx+1) * self.bs
return (self.X1[start:end], self.X2[start:end]), self.y[start:end]
You need to write a class using Sequence: https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence
I wrote a generator for Keras that uses Pytables for getting images from an HDF5 file (see code below).
It works fine, when calling it like so:
self._model.fit_generator(self.training_generator,
epochs=epochs,
validation_data=self.validation_generator,
verbose=1,
callbacks=[model_checkpoint, tensorboard_callback],
use_multiprocessing=True,
# workers=2 # uncommenting this and using more than 1 worker fails
)
However if I use multiple workers (see the commented line above), I get the error shown below. I suspect, that this is related to multiple threads attempting to access the HDF5 file. However, I thought that Pytables and HDF5 is able to handle this for read-only access. So what am I doing wrong?
Bonus-question: Will this code make sure, that during training the model sees a given sample only once for an epoch as mentioned here under Notes?:
Sequence are a safer way to do multiprocessing. This structure
guarantees that the network will only train once on each sample per
epoch which is not the case with generators.
This is the error that I get using more than one workers:
multiprocessing.pool.RemoteTraceback:
"""
Traceback (most recent call last):
File "/usr/lib/python3.7/multiprocessing/pool.py", line 121, in worker
result = (True, func(*args, **kwds))
File "/project/path/venv/lib/python3.7/site-packages/keras/utils/data_utils.py", line 401, in get_index
return _SHARED_SEQUENCES[uid][i]
File "/project/path/python_package/python_package/training_generators.py", line 41, in __getitem__
images, masks, weights = self.__data_generation(indexes)
File "/project/path/python_package/python_package/training_generators.py", line 52, in __data_generation
images, labels = self.__get_images(indexes)
File "/project/path/python_package/python_package/training_generators.py", line 79, in __get_images
labels[counter] = self.tables.root['labels'][i, ...]
File "/project/path/venv/lib/python3.7/site-packages/tables/array.py", line 662, in __getitem__
arr = self._read_slice(startl, stopl, stepl, shape)
File "/project/path/venv/lib/python3.7/site-packages/tables/array.py", line 766, in _read_slice
self._g_read_slice(startl, stopl, stepl, nparr)
File "tables/hdf5extension.pyx", line 1585, in tables.hdf5extension.Array._g_read_slice
tables.exceptions.HDF5ExtError: HDF5 error back trace
File "H5Dio.c", line 216, in H5Dread
can't read data
File "H5Dio.c", line 587, in H5D__read
can't read data
File "H5Dchunk.c", line 2276, in H5D__chunk_read
error looking up chunk address
File "H5Dchunk.c", line 3022, in H5D__chunk_lookup
can't query chunk address
File "H5Dbtree.c", line 1047, in H5D__btree_idx_get_addr
can't get chunk info
File "H5B.c", line 341, in H5B_find
unable to load B-tree node
File "H5AC.c", line 1763, in H5AC_protect
H5C_protect() failed
File "H5C.c", line 2565, in H5C_protect
can't load entry
File "H5C.c", line 6890, in H5C_load_entry
Can't deserialize image
File "H5Bcache.c", line 181, in H5B__cache_deserialize
wrong B-tree signature
End of HDF5 error back trace
Problems reading the array data.
"""
This is the code of my generator:
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, pytables_file_path=None, batch_size=32, shuffle=True, image_processor: ImageProcessor = None,
augment_params=None, image_type=None):
'Initialization'
self.batch_size = batch_size
self.image_type = image_type
self.pytable_file_path = pytables_file_path
self.tables = tables.open_file(self.pytable_file_path, 'r')
self.number_of_samples = self.tables.root[self.image_type].shape[0]
self.image_size = self.tables.root[self.image_type].shape[1:]
self.indexes = list(range(self.number_of_samples))
self.shuffle = shuffle
self.image_processor = image_processor
self.on_epoch_end()
self.augment_params = augment_params
def __del__(self):
self.tables.close()
def __len__(self):
'Denotes the number of batches per epoch'
return int(np.floor(self.number_of_samples / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
# Generate indexes of the batch
indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]
# Generate data
images, masks, weights = self.__data_generation(indexes)
mask_wei_arr = np.concatenate((masks, weights[:, :, :, np.newaxis]), axis=-1)
return (images, mask_wei_arr)
def on_epoch_end(self):
"""Run after each epoch."""
if self.shuffle:
np.random.shuffle(self.indexes) # Shuffle indexes after each epoch
def __data_generation(self, indexes):
'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
images, labels = self.__get_images(indexes)
if self.image_processor:
images = self.__process_images(images)
masks, weights = self.generate_masks_and_weights_from_labels(labels)
if self.augment_params:
[images, masks, weights] = self.augment_data(images, masks, weights)
images = images.astype('float32')
masks_new = masks.astype('float32')
weights_new = weights.astype('float32')
weights_new = weights_new[:, :, :, 0]
return images, masks_new, weights_new
def __process_images(self, images):
for ind, image in enumerate(images):
images[ind, ...] = self.image_processor.process(image)
return images
def __get_images(self, indexes):
images = np.empty((self.batch_size, *self.image_size))
labels = np.empty((self.batch_size, *self.image_size))
for counter, i in enumerate(indexes):
current_image = self.tables.root[self.image_type][i, ...]
images[counter] = current_image
labels[counter] = self.tables.root['labels'][i, ...]
return images, labels
def generate_masks_and_weights_from_labels(self, labels):
pass
max_lbl_val = int(np.max(labels))
edges = np.zeros_like(labels).astype(bool)
masks = np.asarray(labels > 0).astype(float)
weights = np.ones_like(labels)
se_size = 3 # use '3': to get 1 pixel dilation; use '5': to get 2 pixel dilation
structure = np.ones((1, se_size, se_size, 1))
for lbl_ind in range(1, max_lbl_val+1): # iterate over labels
label_mask = labels == lbl_ind
label_dilated_edges = scipy.ndimage.morphology.binary_dilation(label_mask, structure) & ~label_mask
label_eroded_edges = ~scipy.ndimage.morphology.binary_erosion(label_mask, structure) & label_mask
label_edges = np.bitwise_or(label_eroded_edges, label_dilated_edges)
edges = np.bitwise_or(edges, label_edges)
weights[edges] *= 10 # weight the edges more by factor 10
return masks, weights
def augment_data(self, images, masks, weights):
# for index, _ in enumerate(images):
# [images[index, :, :, 0], masks[index, :, :, 0], weights[index, :, :, 0]] = data_augmentation(
# [images[index, :, :, 0], masks[index, :, :, 0], weights[index, :, :, 0]], self.augment_params,
# order=[1, 0, 0])
for index, image in enumerate(images):
image = images[index, ...]
mask = masks[index, ...]
weight = weights[index, ...]
[image, mask, weight] = data_augmentation([image, mask, weight], self.augment_params, order=[1, 0, 0])
# fix, ax = plt.subplots(1, 3, figsize=(5, 15))
# ax[0].imshow(image[:, :, 0])
# ax[1].imshow(mask[:, :, 0])
# ax[2].imshow(weight[:, :, 0])
# plt.show()
images[index, ...] = image
masks[index, ...] = mask
weights[index, ...] = weight
return images, masks, weights
At the very start of declaring the Session with tf.Session(), I declare both tf.global_variables_initializer and tf.local_variables_initializer functions and unfortunately, keep receiving error messages detailing the use of "Uninitialized value Variable_1." Why?
I did some searching around and found this StackExchange Question, but the answer doesn't help my situation. So I looked through the TensorFlow API and found an operation that should return any uninitialized variables, tf.report_uninitialized_variables(). I printed the results and received an empty pair of square brackets, which doesn't make any sense considering the description of my error messages. So what's going on? I've been clawing my eyes out for a day now. Any help is appreciated.
import tensorflow as tf
import os
from tqdm import tqdm
#hyperparam
training_iterations = 100
PATH = "C:\\Users\\ratno\\Desktop\\honest chaos\\skin cam\\drive-download-20180205T055458Z-001"
#==================================import training_data=================================
def import_data(image_path):
image_contents = tf.read_file(filename=image_path)
modified_image = tf.image.decode_jpeg(contents=image_contents, channels=1)
image_tensor = tf.cast(tf.reshape(modified_image, [1, 10000]), dtype=tf.float32)
return image_tensor
#========================neural network================================
def neural_network(input_layer):
Weight_net_1 = {'weights': tf.Variable(tf.random_normal(shape=(10000, 16))),
'bias': tf.Variable(tf.random_normal(shape=(1, 1)))}
Weight_net_2 = {'weights': tf.Variable(tf.random_normal(shape=(16, 16))),
'bias': tf.Variable(tf.random_normal(shape=(1, 1)))}
Weight_net_3 = {'weights': tf.Variable(tf.random_normal(shape=(16, 16))),
'bias': tf.Variable(tf.random_normal(shape=(1, 1)))}
Weight_net_4 = {'weights': tf.Variable(tf.random_normal(shape=(16, 1))),
'bias': tf.Variable(tf.random_normal(shape=(1, 1)))}
#Input Layer
hypothesis = input_layer; x = hypothesis
#Hidden Layer 1
hypothesis = tf.nn.relu(tf.matmul(x, Weight_net_1['weights']) + Weight_net_1['bias']); x = hypothesis
#Hidden Layer 2
hypothesis = tf.nn.relu(tf.matmul(x, Weight_net_2['weights']) + Weight_net_2['bias']); x = hypothesis
#Hidden Layer 3
hypothesis = tf.nn.relu(tf.matmul(x, Weight_net_3['weights']) + Weight_net_3['bias']); x = hypothesis
# output cell
hypothesis = tf.nn.relu(tf.matmul(x, Weight_net_4['weights']) + Weight_net_4['bias'])
return hypothesis
#============================training the network=========================
def train(hypothesis):
LOSS = tf.reduce_sum(1 - hypothesis)
tf.train.AdamOptimizer(0.01).minimize(LOSS)
#Session==================================================================
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
sess.run(tf.local_variables_initializer())
image_list = [os.path.join(PATH, file_name) for file_name in os.listdir(PATH)]
for iteration in tqdm(range(training_iterations), desc="COMPLETION", ncols=80):
for i in image_list:
modified_image_tensor = sess.run(import_data(image_path=i))
hypo = sess.run(neural_network(input_layer=modified_image_tensor))
sess.run(train(hypothesis=hypo))
print("\n\nTraining completed.\nRunning test prediction.\n")
DIRECTORY = input("Directory: ")
test_input = sess.run(import_data(DIRECTORY))
prediction = sess.run(neural_network(input_layer=test_input))
print(prediction)
if prediction >= 0.5:
print ("Acne")
else:
print ("What")
And as for the error message:
Caused by op 'Variable/read', defined at:
File "C:/Users/ratno/Desktop/honest chaos/Hotdog/HDogntoHDog.py", line 75, in <module>
hypo = sess.run(neural_network(input_layer=modified_image_tensor))
File "C:/Users/ratno/Desktop/honest chaos/Hotdog/HDogntoHDog.py", line 23, in neural_network
Weight_net_1 = {'weights': tf.Variable(tf.random_normal(shape=(10000, 16))),
File "C:\Users\ratno\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variables.py", line 199, in __init__
expected_shape=expected_shape)
File "C:\Users\ratno\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\variables.py", line 330, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "C:\Users\ratno\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\gen_array_ops.py", line 1400, in identity
result = _op_def_lib.apply_op("Identity", input=input, name=name)
File "C:\Users\ratno\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\op_def_library.py", line 767, in apply_op
op_def=op_def)
File "C:\Users\ratno\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\ops.py", line 2630, in create_op
original_op=self._default_original_op, op_def=op_def)
File "C:\Users\ratno\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\ops.py", line 1204, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value Variable
[[Node: Variable/read = Identity[T=DT_FLOAT, _class=["loc:#Variable"], _device="/job:localhost/replica:0/task:0/cpu:0"](Variable)]]
Let's take a look at your main function, starting from with tf.Session() as sess:. This will be the first line executed when you run your program. The next thing that happens is that you are calling the variables_initializers -- but, you have not yet declared any variables! This is because you have not called any of the other functions you have defed. So this is why, when you then call, e.g., neural_network inside a sess.run call, it will create the (uninitialized) variables as neural_networkis called, and then attempt to use them for sess.run. Obviously this will not work since you have not initialized these newly-created variables.
You have to create your network and all necessary variables in the computational graph before calling the initializers. You could try something along these lines:
data = import_data(image_path)
out = neural_network(data)
tr = train(hypothesis=out)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
By the way, your function train also has no return value so it is unlikely it will work as you are expecting it to. Please re-read the tutorials for tensorflow to understand how to operate an optimizer.
Recently I built a customized deep neural net model using TFLearn, which claims to bring deep learning to the scikit-learn estimator API. I could train models and make predictions, but I couldn't get the scoring (evaluate) function to work, so I couldn't do cross-validation. I tried to ask questions about TFLearn in various places, but I got no responses.
It appears that TensorFlow itself has an estimator class. So I am putting TFLearn aside, and I'm trying to follow the guide at https://www.tensorflow.org/extend/estimators. Somehow I'm managing to get variables where they don't belong. Can anyone spot my problem? I will post code and the output.
Note: Of course, I can see the RuntimeWarning at the top of the output. I have found references to this warning online, but so far everyone claims it's harmless. Maybe it is not...
CODE:
import tensorflow as tf
from my_library import Database, l2_angle_distance
def my_model_function(topology, params):
# This function will eventually be a function factory. This should
# allow easy exploration of hyperparameters. For now, this just
# returns a single, fixed model_fn.
def model_fn(features, labels, mode):
# Input layer
net = tf.layers.conv1d(features["x"], topology[0], 3, activation=tf.nn.relu)
net = tf.layers.dropout(net, 0.25)
# The core of the network is here (convolutional layers only for now).
for nodes in topology[1:]:
net = tf.layers.conv1d(net, nodes, 3, activation=tf.nn.relu)
net = tf.layers.dropout(net, 0.25)
sh = tf.shape(features["x"])
net = tf.reshape(net, [sh[0], sh[1], 3, 2])
predictions = tf.nn.l2_normalize(net, dim=3)
# PREDICT EstimatorSpec
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(mode=mode,
predictions={"vectors": predictions})
# TRAIN or EVAL EstimatorSpec
loss = l2_angle_distance(labels, predictions)
optimizer = tf.train.GradientDescentOptimizer(learning_rate=params["learning_rate"])
train_op = optimizer.minimize(loss=loss, global_step=tf.train.get_global_step())
return tf.estimator.EstimatorSpec(mode, predictions, loss, train_op)
return model_fn
##===================================================================
window = "whole"
encoding = "one_hot"
db = Database("/home/bwllc/Documents/Files for ML/compact")
traindb, testdb = db.train_test_split()
train_features, train_labels = traindb.values(window, encoding)
test_features, test_labels = testdb.values(window, encoding)
# Create the model.
tf.logging.set_verbosity(tf.logging.INFO)
LEARNING_RATE = 0.01
topology = (60,40,20)
model_params = {"learning_rate": LEARNING_RATE}
model_fn = my_model_function(topology, model_params)
model = tf.estimator.Estimator(model_fn, model_params)
print("\nmodel_dir? No? Why not? ", model.model_dir, "\n") # This documents the error
# Input function.
my_input_fn = tf.estimator.inputs.numpy_input_fn({"x" : train_features}, train_labels, shuffle=True)
# Train the model.
model.train(input_fn=my_input_fn, steps=20)
OUTPUT
/usr/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
return f(*args, **kwds)
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': {'learning_rate': 0.01}, '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0b55279048>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
model_dir? No? Why not? {'learning_rate': 0.01}
INFO:tensorflow:Create CheckpointSaverHook.
Traceback (most recent call last):
File "minimal_estimator_bug_example.py", line 81, in <module>
model.train(input_fn=my_input_fn, steps=20)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py", line 302, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/estimator/estimator.py", line 756, in _train_model
scaffold=estimator_spec.scaffold)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/basic_session_run_hooks.py", line 411, in __init__
self._save_path = os.path.join(checkpoint_dir, checkpoint_basename)
File "/usr/lib/python3.6/posixpath.py", line 78, in join
a = os.fspath(a)
TypeError: expected str, bytes or os.PathLike object, not dict
------------------
(program exited with code: 1)
Press return to continue
I can see exactly what went wrong, model_dir (which I left as the default) somehow bound to the value I intended for model_params. How did this happen in my code? I can't see it.
If anyone has advice or suggestions, I would greatly appreciate them. Thanks!
Simply because you're feeding your model_param as a model_dir when you construct your Estimator.
From the tensorflow documentation :
Estimator __init__ function :
__init__(
model_fn,
model_dir=None,
config=None,
params=None
)
Notice how the second argument is the model_dir one. If you want to specify only the params one, you need to pass it as a keyword argument.
model = tf.estimator.Estimator(model_fn, params=model_params)
Or specify all the previous positional arguments :
model = tf.estimator.Estimator(model_fn, None, None, model_params)
I am attempting to add image distortion to my ConvNet model and am getting a really odd error. My data is in TFRecords format and I am using the color_distorter() function from the CFIAR10 code. Below is some dummy code I threw together to see that everything was doing what I expected it to do. When I view the images after they are distorted there is no problem. The problem appears to arise after flattening the tensor or after it is run. For some reason, it will execute once, but the second time it throws an error. Below is my code and the error it returns. My current suspicion is that it might be the tf.map_fn() but I don't know.
def color_distorer(image, thread_id=0, scope=None):
with tf.op_scope([image], scope, 'distort_color'):
color_ordering = thread_id % 2
if color_ordering == 0:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
elif color_ordering == 1:
image = tf.image.random_brightness(image, max_delta=32. / 255.)
image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
image = tf.image.random_hue(image, max_delta=0.2)
# The random_* ops do not necessarily clamp.
image = tf.clip_by_value(image, 0.0, 1.0)
return image
X_test_batch, y_test_batch = inputs(FLAGS.train_dir,
FLAGS.test_file,
FLAGS.batch_size,
FLAGS.n_epochs,
FLAGS.n_classes,
one_hot_labels=True,
imshape=160*160*3)
with tf.Session() as sess:
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# X = sess.run([X_test_batch])
for i in range(5): #epochs
image = tf.reshape(X_test_batch, [-1, 160, 160, 3])
result = tf.map_fn(lambda img: color_distorer(img), image)
X, y = sess.run([result, y_test_batch]) #to see what the distortion did
for i in range(50): #all the images look distorted..
if i%25 ==0:
plt.title(y[i])
plt.imshow(X[i])
plt.show()
print('******************') #This runs once... then breaks. Why?
result = tf.reshape(result, [-1, 76800])
dX, dy = sess.run([result, y_test_batch])
print(dX)
Error:
******************
[[ 1. 1. 1. ..., 1. 1. 1.]
[ 1. 1. 1. ..., 1. 1. 1.]
[ 1. 1. 1. ..., 1. 1. 1.]
...,
[ 1. 1. 1. ..., 1. 1. 1.]
[ 1. 1. 1. ..., 1. 1. 1.]
[ 1. 1. 1. ..., 1. 1. 1.]]
WARNING:tensorflow:tf.op_scope(values, name, default_name) is deprecated, use tf.name_scope(name, default_name, values)
Traceback (most recent call last):
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1021, in _do_call
return fn(*args)
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1003, in _run_fn
status, run_metadata)
File "/home/mcamp/anaconda3/lib/python3.5/contextlib.py", line 66, in __exit__
next(self.gen)
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/framework/errors_impl.py", line 469, in raise_exception_on_not_ok_status
pywrap_tensorflow.TF_GetCode(status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: The tensor returned for map_1/TensorArrayPack_1/TensorArrayGatherV2:0 was not valid.
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/media/mcamp/Local SSHD/Python Projects/Garage Door Project/FreshStart/ReadTFREcords.py", line 80, in <module>
dX, dy = sess.run([result, y_test_batch])
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 766, in run
run_metadata_ptr)
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 964, in _run
feed_dict_string, options, run_metadata)
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1014, in _do_run
target_list, options, run_metadata)
File "/home/mcamp/anaconda3/lib/python3.5/site-packages/tensorflow/python/client/session.py", line 1034, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: The tensor returned for map_1/TensorArrayPack_1/TensorArrayGatherV2:0 was not valid.
Ok so what I believe was happening was I resized X_test_batch back to the shape of an image. I then ran my distortion on it and called it result. It was this that I ran and viewed. After viewing, I resized it for the model back to being a flat image. During this, I change the names of some things calling it first image and then result. If the code just red from top to bottom I think it would have been fine but Tensorflow has the graph that it works from and so it was expecting result to be a 3d tensor when it got to the second iteration, but it was really a flat tensor that was trying to be put through tf.map_fn. tf.map_fn is something that resides on the graph and does not get executed until sess.run() and so this would be the cause of the error that I was seeing. I hope this makes sense and helps someone else down the road.
with tf.name_scope('TestingData'):
X_test_batch, y_test_batch = inputs(FLAGS.train_dir,
FLAGS.test_file,
FLAGS.batch_size,
FLAGS.n_epochs,
FLAGS.n_classes,
one_hot_labels=True,
imshape=160*160*3)
image = tf.reshape(X_test_batch, [-1, 160, 160, 3])
image = tf.map_fn(lambda img: color_distorer(img), image)
with tf.Session() as sess:
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
# X = sess.run([X_test_batch])
for i in range(5):
# dX, dy = sess.run([result, y_test_batch])
print('******************')
image = tf.reshape(image, [-1, 76800])
dX, dy = sess.run([image, y_test_batch])
distorted = np.reshape(dX, (-1, 160, 160, 3))
print(dX.shape)
print(distorted.shape)
for i in range(50):
if i%25 ==0:
print(distorted[i], dy[i])
plt.title(dy[i])
plt.imshow(distorted[i])
plt.show()
print(dX.shape)