Tensorflow: Too many open files - multithreading

My code structure is as follows -
I have a main file which calls two function in a loop ->
1- function1()(exist in separate file) which reads the image files from a folder say "Input" do some transformation on images and save these transformed images to a folder say "Output"
2- function2() train a neural network to map images in "Input" folder to images in "Output" folder
Main function is present in mainfile.py , function1() exist in separate file and function2() in different file also (so three files in total)
def function1():
images = reader.image(FLAGS.BATCH_SIZE, FLAGS.HEIGHT, FLAGS.WIDTH, FLAGS.TRAIN_IMAGES_PATH, subtract_mean=True)
\\loss function stuff
with tf.Session(config=config) as sess:
sess.run(init_op)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
start_time = time.time()
batches_count = 0
try:
while not coord.should_stop():
writer = tf.summary.FileWriter(FLAGS.summary_path, sess.graph)
sess.run([batch_assign_op,initial_assign_op])
batch_time = time.time()
for step in range(FLAGS.NUM_ITERATIONS):
_, loss_t = sess.run(
[train_op, total_loss])
summary_str = sess.run(summary)
writer.add_summary(summary_str, step)
image_t = sess.run(jpegs)
for i in range(image_t.shape[0]):
filename = '%s/%d.jpg' % (FLAGS.outputdir, (batches_count * FLAGS.BATCH_SIZE + i + 1))
with open(filename, 'wb') as f:
f.write(image_t[i])
f.close()
batches_count = batches_count + 1
after = process.memory_percent()
print("MEMORY CHANGE %.4f -> %.4f" % (before, after))
before = after
except tf.errors.OutOfRangeError:
print("final time elspased", (time.time() - start_time))
print('Done doing non paramteric part')
finally:
coord.request_stop()
coord.join(threads)
Next is the definition of the function 2
def function2():
tf.reset_default_graph()
run_id = FLAGS.MODEL_NAME if FLAGS.MODEL_NAME else str(uuid.uuid4())
model_path = '%s/%s' % (FLAGS.MODEL_DIR, run_id)
if not os.path.exists(model_path):
os.makedirs(model_path)
images, target_ig = reader.net_batch(FLAGS.BATCH_SIZE, FLAGS.HEIGHT, FLAGS.WIDTH, FLAGS.TRAIN_IMAGES_PATH,
FLAGS.TARGET_PATH, epochs=FLAGS.EPOCHS, subtract_mean=False,zero_one=False
,input_mean=FLAGS.input_mean,target_mean=FLAGS.output_mean)
ae_inputs = tf.placeholder(tf.float32, (None, FLAGS.HEIGHT, FLAGS.WIDTH, 3),
name='auto_input') # input to the network (MNIST images)
target = tf.placeholder(tf.float32, (None, FLAGS.HEIGHT, FLAGS.WIDTH, 3),
name='target')
ae_output = model.net(ae_inputs, training=True)
learning_rate = tf.placeholder(tf.float32, shape=[], name='learning_rate')
#ogit_output = tf.nn.sigmoid(ae_output)
# loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=logit_output,name="p_loss"))
size = tf.size(ae_output)
loss = tf.nn.l2_loss((target - ae_output), name="p_loss")/tf.to_float(size)
global_step = tf.Variable(FLAGS.gs_val, name="p_global_step", trainable=False)
train_op = tf.train.AdamOptimizer(learning_rate, name="p_trainopt").minimize(loss, global_step=global_step)
# Statistics
average_pl = tf.placeholder(tf.float32,shape=[])
with tf.name_scope('losses'):
tf.summary.scalar('total loss', loss)
tf.summary.image('param generated', (ae_output))
tf.summary.image('original', (ae_inputs))
tf.summary.image('target', (target))
tf.summary.scalar("average_loss", average_pl)
summary = tf.summary.merge_all()
init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
with tf.Session(config=config) as sess:
writer = tf.summary.FileWriter(FLAGS.summary_path, sess.graph)
saver = tf.train.Saver(tf.trainable_variables())
file = tf.train.latest_checkpoint(model_path)
sess.run(init_op)
if file:
print('Restoring model from {}'.format(file))
saver.restore(sess, file)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
start_time = time.time()
start_ti = time.time()
acc_loss = []
count =1
try:
while not coord.should_stop():
ig_b, tg_b =sess.run([images,target_ig])
if count % FLAGS.record == 0:
_, loss_t, step, summary_str = sess.run([train_op, loss, global_step, summary],
feed_dict={ae_inputs: ig_b,
target: tg_b,
learning_rate: FLAGS.LEARNING_RATE,
average_pl:(sum(acc_loss)/len(acc_loss))})
print(step,"--> loss: ", loss_t," ,average loss: ",(sum(acc_loss)/len(acc_loss)),
" ,elpased time: ", elapsed_time)
if FLAGS.writesum:
writer.add_summary(summary_str, step)
if count % FLAGS.chanelr == 0:
FLAGS.LEARNING_RATE = FLAGS.LEARNING_RATE / FLAGS.div
print('learning rate now: ', FLAGS.LEARNING_RATE)
acc_loss = []
else:
_, loss_t, step = sess.run([train_op, loss, global_step],
feed_dict={ae_inputs: ig_b,
target: tg_b,
learning_rate: FLAGS.LEARNING_RATE})
acc_loss.append(loss_t)
elapsed_time = time.time() - start_time
start_time = time.time()
count = count+1
except tf.errors.OutOfRangeError:
print(step, loss_t, elapsed_time)
saver.save(sess, model_path + '/style-model',global_step=step)
print("final time elspased", (time.time() - start_ti))
print('Done training -- epoch limit reached')
finally:
coord.request_stop()
coord.join(threads)
return step
And here is the function to read images in batches -
def image(batch, height, width, path, epochs=1, shuffle=False, subtract_mean = False):
# TODO: Find a proper way to do this
ll = [int(i.split('.')[0]) for i in listdir(path)]
ll.sort()
filenames = [join(path, '%d.jpg' %(f)) for f in ll if isfile(join(path, '%d.jpg' %(f)))]
png = filenames[0].lower().endswith('png') # If first file is a png, assume they all are
filename_queue = tf.train.string_input_producer(filenames,
shuffle=shuffle, num_epochs=epochs)
reader = tf.WholeFileReader()
_, img_bytes = reader.read(filename_queue)
image = tf.image.decode_png(img_bytes, channels=3)\
if png else tf.image.decode_jpeg(img_bytes, channels=3)
processed_image = preprocess(image, height,width, subtract_mean=subtract_mean)
return tf.train.batch([processed_image], batch)
def net_batch(batch, height, width, path_input,path_target, epochs=1, zero_one= True, shuffle=False,
subtract_mean = False,input_mean=None, target_mean=None):
#TODO: Find a proper way to do this
ll_in = [int(i.split('.')[0]) for i in listdir(path_input)]
ll_in.sort()
filenames_input = [join(path_input, '%d.jpg' % (f)) for f in ll_in if isfile(join(path_input, '%d.jpg' % (f)))]
#filenames_input = [join(path_input, f) for f in listdir(path_input) if isfile(join(path_input, f))]
png_input = filenames_input[0].lower().endswith('png') # If first file is a png, assume they all are
ll_out = [int(i.split('.')[0]) for i in listdir(path_target)]
ll_out.sort()
filenames_target = [join(path_target, '%d.jpg' % (f)) for f in ll_out if isfile(join(path_target, '%d.jpg' % (f)))]
# filenames_target = [join(path_target, f) for f in listdir(path_target) if isfile(join(path_target, f))]
png_output = filenames_target[0].lower().endswith('png') # If first file is a png, assume they all are
file_queue = tf.train.slice_input_producer([filenames_input,filenames_target],shuffle=True, num_epochs=epochs)
input_file = tf.read_file(file_queue[0])
input_image = tf.image.decode_png(input_file, channels=3)\
if png_input else tf.image.decode_jpeg(input_file, channels=3)
processed_image_input = preprocess(input_image, height,width, subtract_mean=subtract_mean,zero_one=zero_one,
net=True,meandata=input_mean)
target_file = tf.read_file(file_queue[1])
target_image = tf.image.decode_png(target_file, channels=3) \
if png_output else tf.image.decode_jpeg(target_file, channels=3)
processed_image_target = preprocess(target_image, height, width, subtract_mean=subtract_mean,zero_one=zero_one,
net=True,meandata=target_mean)
return tf.train.batch([processed_image_input,processed_image_target], batch)
Problem In between few run of the loop to call function1() and
function2() the whole code crashes giving following error -
Traceback (most recent call last):
File "/usr/lib/python3/dist-packages/apport_python_hook.py", line 63, in appo
rt_excepthook
File "<frozen importlib._bootstrap>", line 969, in _find_and_load
File "<frozen importlib._bootstrap>", line 958, in _find_and_load_unlocked
File "<frozen importlib._bootstrap>", line 673, in _load_unlocked
File "<frozen importlib._bootstrap_external>", line 661, in exec_module
File "<frozen importlib._bootstrap_external>", line 766, in get_code
File "<frozen importlib._bootstrap_external>", line 818, in get_data
OSError: [Errno 24] Too many open files: '/usr/lib/python3/dist-packages/apport
/__init__.py'
Original exception was:
Traceback (most recent call last):
File "Mainfile.py", line 157, in <module>
File "Mainfile.py", line 131, in main
File "/home/suryabhan/Desktop/New_NST_MAC/slowNST.py", line 246, in Nonpapram
etric
File "/home/suryabhan/Desktop/New_NST_MAC/slowNST.py", line 214, in create_st
yleimage
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/coord
inator.py", line 389, in join
File "/home/suryabhan/.local/lib/python3.5/site-packages/six.py", line 686, i
n reraise
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/training/queue
_runner_impl.py", line 238, in _run
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/client/session
.py", line 1235, in _single_operation_run
File "/usr/lib/python3.5/contextlib.py", line 66, in __exit__
File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/erro
rs_impl.py", line 466, in raise_exception_on_not_ok_status
tensorflow.python.framework.errors_impl.ResourceExhaustedError: Input/76834.jpg
[[Node: ReaderReadV2 = ReaderReadV2[_device="/job:localhost/replica:0/
task:0/cpu:0"](WholeFileReaderV2, input_producer)]]
[[Node: Assert_2/Assert/_72 = _Recv[client_terminated=false, recv_devi
ce="/job:localhost/replica:0/task:0/gpu:0", send_device="/job:localhost/replica
:0/task:0/cpu:0", send_device_incarnation=1, tensor_name="edge_157_Assert_2/Ass
ert", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"]()]
]
I have spend hours tries to fix the issue but nowhere close , my theory is the threads in function1() doesn't stop even after calling coord.request_stop() which keep accumulating open threads and finally code crashes. Even if this is the reason , I really don't know how to solve it.

Related

Keras deep clustering undefined errors in clustering custom layer

i was following this guide to implement a clustering into a deep model
https://ai-mrkogao.github.io/reinforcement%20learning/clusteringkeras/ but i got two errors :
The first one, on this line
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
it says
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 463, in __call__
self.build(unpack_singleton(input_shapes))
File "<stdin>", line 14, in build
TypeError: add_weight() got multiple values for argument 'name'
So i located
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
from the class ClusteringLayer and i removed name='clusters' as follow
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
But now it keeps giving me the following error
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 463, in __call__
self.build(unpack_singleton(input_shapes))
File "<stdin>", line 14, in build
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 282, in add_weight
constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 620, in variable
value, dtype=dtype, name=name, constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py", line 782, in variable
constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py", line 263, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py", line 460, in __init__
shape=shape)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py", line 582, in _init_from_args
if init_from_fn else [initial_value]) as name:
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 6513, in __enter__
return self._name_scope.__enter__()
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 4306, in name_scope
if not _VALID_SCOPE_NAME_REGEX.match(name):
TypeError: expected string or bytes-like object
Here is the complete and reproducible code
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
def autoencoder(dims, act='relu', init='glorot_uniform'):
n_stacks = len(dims) - 1
input_img = Input(shape=(dims[0],), name='input')
x = input_img
for i in range(n_stacks-1):
x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
x = encoded
for i in range(n_stacks-1, 0, -1):
x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
decoded = x
return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)
x_train.shape
x.shape
n_clusters = len(np.unique(y))
kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=4)
y_pred_kmeans = kmeans.fit_predict(x)
y_pred_kmeans[:10]
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',distribution='uniform')
pretrain_optimizer = SGD(lr=1, momentum=0.9)
autoencoder, encoder = autoencoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=250, epochs=2) #, callbacks=cb)
autoencoder.save_weights( 'ae_weights.h5')
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
encoder.output
clustering_layer
784 image input -> 10 classification
The result of
from tensorflow.python import keras
print(keras.__version__)
is
2.2.4-tf
You can even download the docker with keras installation at repbioinfo/autoencoderforclustering
I have solved the error, just correct these line
self.clusters = self.add_weight(shape = (self.n_clusters, input_dim),
initializer='glorot_uniform', name='clusters')
instead of
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')

In Tensorflow I can't use any MultiRNNCell instance in dynamic decode, but a single RNNCell instance can work on it

I make a seq2seq model using tensorflow and meet a problem that my program throws an error when I use MultiRNNCell in tf.contrib.seq2seq.dynamic_decode.
The problem happens over here:
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
When I run this code,the console shows this Error message:
C:\Users\TopView\AppData\Local\Programs\Python\Python36\python.exe E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py:417: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version.
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:432: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version.
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
encoder_final_state shpe
LSTMStateTuple(c=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_5:0' shape=(?, 24) dtype=float32>, h=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_6:0' shape=(?, 24) dtype=float32>)
decoder_inputs shape before embedded
(128, 10)
decoder inputs shape after embedded
(128, 10, 5)
Traceback (most recent call last):
File "E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py", line 14, in <module>
len(embedding_matrix['embedding'][0]))
File "E:\PycharmProject\cikm_transport\CIKM\CIKM\translate_model\model.py", line 109, in __init__
maximum_iterations=self.FLAGS.max_len)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 323, in dynamic_decode
swap_memory=swap_memory)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3209, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2941, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2878, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3179, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 266, in body
decoder_finished) = decoder.step(time, inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py", line 137, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1325, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 846, in call
(c_prev, m_prev) = state
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 436, in __iter__
"Tensor objects are not iterable when eager execution is not "
TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
Process finished with exit code 1
But when I change the instance of defw_rnn, make it a single RNN instance like LSTMCell, the Error disappears:
defw_rnn=tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
And the code works well. However, I've found that most of the code about seq2seq model on the Internet using MultiRNNCell and they also use tensorflow, so it really confuse me that what is wrong with my program.
Here is the entire code:
import tensorflow as tf
import numpy as np
class Seq2SeqModel(object):
def bw_fw_rnn(self):
with tf.name_scope("forward_rnn"):
fw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
fw = tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=self.FLAGS.keep_prob)
with tf.name_scope("backward_rnn"):
bw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
bw = tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=self.FLAGS.keep_prob)
return (fw, bw)
def decode_inputs_preprocess(self, data, id_matrix):
ending=tf.strided_slice(data,[0,0],[self.batch_size,-1],[1,1])
decoder_input=tf.concat([tf.fill([self.batch_size,1],id_matrix.index('<go>')),ending],1)
return decoder_input
def __init__(self, FLAGS, english_id_matrix, spanish_id_matrix, english_vocab_size,spanish_vocab_size, embedding_size):
self.FLAGS = FLAGS
self.english_vocab_size = english_vocab_size
self.embedding_size = embedding_size
self.encoder_input = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='encoder_inputs')
self.decoder_targets = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='decoder_targets')
self.encoder_input_sequence_length = tf.placeholder(shape=[None], dtype=tf.int32, name='encoder_inputs_length')
self.decoder_targets_length = tf.placeholder(shape=[None], dtype=tf.int32, name='decoder_targets_length')
self.batch_size = self.FLAGS.batch_size
with tf.name_scope('embedding_look_up'):
spanish_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
english_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
self.spanish_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.english_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.spanish_embeddings_inputs_op = spanish_embeddings.assign(self.spanish_embeddings_inputs)
self.english_embeddings_inputs_op = english_embeddings.assign(self.english_embeddings_inputs)
encoder_inputs = tf.nn.embedding_lookup(spanish_embeddings, self.encoder_input)
with tf.name_scope('encoder'):
enfw_rnn, enbw_rnn = self.bw_fw_rnn()
encoder_outputs, encoder_final_state = \
tf.nn.bidirectional_dynamic_rnn(enfw_rnn, enbw_rnn, encoder_inputs
, sequence_length=self.encoder_input_sequence_length, dtype=tf.float32)
print("encoder_final_state shpe")
# final_state_c=tf.concat([encoder_final_state[0][-1].c,encoder_final_state[1][-1].c],1)
# final_state_h=tf.concat([encoder_final_state[0][-1].h,encoder_final_state[1][-1].h],1)
# encoder_final_state=tf.contrib.rnn.LSTMStateTuple(c=final_state_c,
# h=final_state_h)
encoder_final_state=encoder_final_state[0][-1]
print(encoder_final_state)
with tf.name_scope('dense_layer'):
output_layer = tf.layers.Dense(english_vocab_size,
kernel_initializer=tf.truncated_normal_initializer(
mean=0.0, stddev=0.1
))
# training decoder
with tf.name_scope('decoder'), tf.variable_scope('decode'):
decoder_inputs=self.decode_inputs_preprocess(self.decoder_targets,english_id_matrix)
print('decoder_inputs shape before embedded')
print(decoder_inputs.shape)
decoder_inputs = tf.nn.embedding_lookup(english_embeddings,decoder_inputs)
print('decoder inputs shape after embedded')
print(decoder_inputs.shape)
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
print("training logits shape")
print(training_logits.shape)
# predicting decoder
with tf.variable_scope('decode', reuse=True):
start_tokens = tf.tile(tf.constant([english_id_matrix.index('<go>')], dtype=tf.int32),
[self.batch_size], name='start_tokens')
predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(english_embeddings,
start_tokens,
english_id_matrix.index('<eos>'))
predicting_decoder = tf.contrib.seq2seq.BasicDecoder(defw_rnn,
predicting_helper,
encoder_final_state,
output_layer)
predicting_decoder_output, _, predicting_decoder_output_length =\
tf.contrib.seq2seq.dynamic_decode(
predicting_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
self.predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions')
print("predicting logits shape")
print(self.predicting_logits.shape)
masks = tf.sequence_mask(self.decoder_targets_length, self.FLAGS.max_len, dtype=tf.float32, name='masks')
with tf.variable_scope('optimization'), tf.name_scope('optimization'):
# Loss
self.cost = tf.contrib.seq2seq.sequence_loss(training_logits, self.decoder_targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(self.FLAGS.alpha)
# Gradient Clipping
gradients = optimizer.compute_gradients(self.cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
self.train_op = optimizer.apply_gradients(capped_gradients)
Well……I've figured out.The problem happened because I only sent the final state of the encoder to a decoder.

pymc3 model with ODE solver using theano

I am using a model where the mean response depends on the solution to an ODE. I am trying to fit this model using pymc3, but am running into problems (relating to missing test values) when joining the ODE solver to the model.
Model
y_t is Lognormally distributed with mean mu_t and standard deviation sigma.
mu_t is the solution to a set of ODE's at time t.
Problem
Theano/ pymc3 gives an error because the theano tensor variables used in solving the ODE have no test values. See below for a copy of the errors. I've tried setting
th.config.compute_test_value = 'ignore'
but I think that pymc3 changes it back to require test values. I am fairly new to theano and pymc3, so I apologise if I am missing something obvious.
Code
Imports
import pymc3 as pm
import theano as th
import theano.tensor as tt
from FormatData import *
import pandas as pd
Functions to solve ODE
# Runge Kutta integrator
def rungekuttastep(h, y, fprime, *args):
k1 = h*fprime(y, *args)
k2 = h*fprime(y + 0.5*k1, *args)
k3 = h*fprime(y + 0.5*k2, *args)
k4 = h*fprime(y + k3, *args)
y_np1 = y + (1./6.)*k1 + (1./3.)*k2 + (1./3.)*k3 + (1./6.)*k4
return y_np1
# ODE equations for my model
def ODE(y, *args):
alpha = args
yprime = tt.zeros_like(y)
yprime = tt.set_subtensor(yprime[0], alpha[0]*y[1] - alpha[1]*y[0])
yprime = tt.set_subtensor(yprime[1], -alpha[2]*y[0]*y[1])
return yprime
# Function to return ODE values given parameters
def calcFittedTitreVals(alpha, niter, hsize, inits):
y0 = tt.vector('y0')
h = tt.scalar('h')
i = tt.iscalar('i')
alpha0 = tt.scalar('alpha0')
alpha1 = tt.scalar('alpha1')
alpha2 = tt.scalar('alpha2')
result, updates = th.scan(fn=lambda y0, h: rungekuttastep(h, y0, ODE, alpha0, alpha1, alpha2),
outputs_info=[{'initial': y0}], non_sequences=h, n_steps=i)
odeint = th.function(inputs=[h, y0, i, alpha0, alpha1, alpha2], outputs=result, updates=updates)
z1 = odeint(h=hsize, # size of the interval
y0=inits, # starting values
i=niter, # number of iterations
alpha0=alpha[0], alpha1=alpha[1], alpha2=alpha[2])
C = z1[:, 0]
A = z1[:, 1]
return C, A
Generate Data
t = np.arange(0, 45, 0.1) # times at which to solve ODE
alpha = np.array([2, 0.4, 0.0001]) # true paramter values ODE
C, A = calcFittedTitreVals(alpha, niter=450, hsize=0.1, inits=[0, 1200])
td = np.arange(0, 45, 1) # times at which I observe data
sigma = 0.1
indices = np.array(np.searchsorted(t, td)).flatten()
DATA = pd.DataFrame(
data={'observed': np.random.lognormal(np.log(C[indices]), sigma),
'true': C[indices], 'time': td})
pymc3 model function
def titreLogNormal(Y, hsize, inits, times):
Y = th.shared(Y)
inits = th.shared(inits)
timesG = np.arange(0, 45, step=hsize)
indices = np.array(np.searchsorted(timesG, times)).flatten()
nTsteps = th.shared(timesG.shape[0])
hsize = th.shared(hsize)
y0 = tt.vector('y0')
h = tt.scalar('h')
i = tt.iscalar('i')
alpha0 = tt.scalar('alpha0')
alpha1 = tt.scalar('alpha1')
alpha2 = tt.scalar('alpha2')
result, updates = th.scan(fn=lambda y0, h: rungekuttastep(h, y0, ODE, alpha0, alpha1, alpha2),
outputs_info=[{'initial': y0}], non_sequences=h, n_steps=i)
odeint = th.function(inputs=[h, y0, i, alpha0, alpha1, alpha2], outputs=result, updates=updates)
model = pm.Model()
with model:
alpha = pm.Gamma('alpha', 0., 10., shape=3, testval=[2, 0.4, 0.001])
sigma = pm.Gamma('sigma', 0.1, 0.1, testval=0.1)
res = odeint(h=hsize, y=inits, i=nTsteps, alpha0=alpha[0], alpha1=alpha[1], alpha2=alpha[2])
mu = pm.Deterministic("mu", res[indices, 0])
y = pm.Lognormal('y', mu, sigma, observed=Y)
return model
Create model with data
model = titreLogNormal(
Y=np.array(DATA[['observed']]).flatten(),
hsize=0.1, inits={'a': 0, 'p': 1200},
times=np.array(DATA[['time']]).flatten())
Errors
Traceback (most recent call last):
File "/home/millerp/.local/lib/python3.5/site-packages/theano/gof/op.py", line 625, in __call__
storage_map[ins] = [self._get_test_value(ins)]
File "/home/millerp/.local/lib/python3.5/site-packages/theano/gof/op.py", line 581, in _get_test_value
raise AttributeError('%s has no test value %s' % (v, detailed_err_msg))
AttributeError: y0 has no test value
Backtrace when that variable is created:
File "/home/millerp/pycharm/pycharm-edu-3.5.1/helpers/pydev/_pydev_bundle/pydev_console_utils.py", line 251, in add_exec
more = self.do_add_exec(code_fragment)
File "/home/millerp/pycharm/pycharm-edu-3.5.1/helpers/pydev/_pydev_bundle/pydev_ipython_console.py", line 41, in do_add_exec
res = bool(self.interpreter.add_exec(codeFragment.text))
File "/home/millerp/pycharm/pycharm-edu-3.5.1/helpers/pydev/_pydev_bundle/pydev_ipython_console_011.py", line 455, in add_exec
self.ipython.run_cell(line, store_history=True)
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-54c976fefe1e>", line 99, in <module>
times=np.array(DATA[['time']]).flatten()
File "<ipython-input-2-54c976fefe1e>", line 71, in titreLogNormal
y0 = tt.vector('y0')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-54c976fefe1e>", line 99, in <module>
times=np.array(DATA[['time']]).flatten()
File "<ipython-input-2-54c976fefe1e>", line 86, in titreLogNormal
outputs_info=[{'initial': y0}], non_sequences=h, n_steps=i)
File "/home/millerp/.local/lib/python3.5/site-packages/theano/scan_module/scan.py", line 660, in scan
tensor.shape_padleft(actual_arg), 0),
File "/home/millerp/.local/lib/python3.5/site-packages/theano/tensor/basic.py", line 4429, in shape_padleft
return DimShuffle(_t.broadcastable, pattern)(_t)
File "/home/millerp/.local/lib/python3.5/site-packages/theano/gof/op.py", line 639, in __call__
(i, ins, node, detailed_err_msg))
ValueError: Cannot compute test value: input 0 (y0) of Op InplaceDimShuffle{x,0}(y0) missing default value.
Backtrace when that variable is created:
File "/home/millerp/pycharm/pycharm-edu-3.5.1/helpers/pydev/_pydev_bundle/pydev_console_utils.py", line 251, in add_exec
more = self.do_add_exec(code_fragment)
File "/home/millerp/pycharm/pycharm-edu-3.5.1/helpers/pydev/_pydev_bundle/pydev_ipython_console.py", line 41, in do_add_exec
res = bool(self.interpreter.add_exec(codeFragment.text))
File "/home/millerp/pycharm/pycharm-edu-3.5.1/helpers/pydev/_pydev_bundle/pydev_ipython_console_011.py", line 455, in add_exec
self.ipython.run_cell(line, store_history=True)
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
interactivity=interactivity, compiler=compiler, result=result)
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2821, in run_ast_nodes
if self.run_code(code, result):
File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2881, in run_code
exec(code_obj, self.user_global_ns, self.user_ns)
File "<ipython-input-2-54c976fefe1e>", line 99, in <module>
times=np.array(DATA[['time']]).flatten()
File "<ipython-input-2-54c976fefe1e>", line 71, in titreLogNormal
y0 = tt.vector('y0')

TensorFlow error: logits and labels must be the same size?

've downloaded a CBIS-DDSM dataset, a revisioned DDSM dataset of format DICOM. Using this dataset, I've been trying to run the CNN, based on the code used by the YouTuber 'sentdex'. Mostly the pre-processing of the dataset got sorted out, but running the CNN properly has become troubling. I think this line here prevents the functionning of the entire CNN code:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,
labels=y))
Even if I changed the part
'labels=y'
to
'labels=tf.reshape(y, [1,2])'
, I still have the same error message. I tried to bypass the 'tf.nn.softmax_cross_entropy_with_logits' with the numerically unstable version. Then, the CNN can be run. But I only got the 'nan'. Any suggestions on what I should do? I'm really stuck.
Here's an excerpt of my code
IMG_SIZE_PX = 256
N_CLASSES = 2
def convolutional_neural_network(x):
weights = {'W_conv1':tf.Variable(tf.random_normal([5,5,1,32])),
'W_conv2':tf.Variable(tf.random_normal([5,5,32,64])),
'W_fc':tf.Variable(tf.random_normal([IMG_SIZE_PX*IMG_SIZE_PX,1024])),
'out':tf.Variable(tf.random_normal([1024, N_CLASSES]))}
biases = {'b_conv1':tf.Variable(tf.random_normal([32])),
'b_conv2':tf.Variable(tf.random_normal([64])),
'b_fc':tf.Variable(tf.random_normal([1024])),
'out':tf.Variable(tf.random_normal([N_CLASSES]))}
x = tf.reshape(x, shape=[-1, IMG_SIZE_PX, IMG_SIZE_PX, 1])
conv1 = tf.nn.relu(conv2d(x, weights['W_conv1']) + biases['b_conv1'])
conv1 = maxpool2d(conv1)
conv1Shape = conv1.get_shape().as_list()
print ("Conv1 Shape! : " + str(conv1Shape))
conv2 = tf.nn.relu(conv2d(conv1, weights['W_conv2']) + biases['b_conv2'])
conv2 = maxpool2d(conv2)
conv2Shape = conv2.get_shape().as_list()
print ("Conv2 Shape! : " + str(conv2Shape))
fc = tf.reshape(conv2,[-1, 64 * 64 * 16])
fc = tf.nn.relu(tf.matmul(fc, weights['W_fc'])+biases['b_fc'])
fc = tf.nn.dropout(fc, KEEP_RATE)
output = tf.matmul(fc, weights['out'])+biases['out']
return output
def train_neural_network(x, EnableUnstableNumericalCalc = False):
# Loading the Input File; list of (Numpy array, label)
Data_Chunk = np.load('MTrain-test-calc-256.npy')
train_data = Data_Chunk[:NegNumber]
validation_data = Data_Chunk[NegNumber:]
# Training part
prediction = convolutional_neural_network(x)
predShape = prediction.get_shape().as_list()
if EnableUnstableNumericalCalc == False:
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,
labels=y))
else:
intermed = tf.nn.softmax(prediction)
cost = -tf.reduce_sum(y * tf.log(intermed), 1)
optimizer = tf.train.AdamOptimizer(learning_rate=1e-4).minimize(cost)
hm_epochs = NB_EPOCHS
print("===============================================================================================")
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
successful_runs = 0
total_runs = 0
for epoch in range(hm_epochs):
epoch_loss = 0
for data in train_data:
total_runs += 1
X = data[0]
Y = data[1]
# try:
_, c = sess.run([optimizer, cost], feed_dict={x: X, y: Y})
epoch_loss += c
successful_runs += 1
"""
except InvalidArgumentError:
print("***** Error(Training): Invalid Arument Error! *****")
'''
print("X = {}".format(X))
print("Y = {}".format(Y))
print(str(e))
'''
pass
"""
try:
correct = tf.equal(tf.argmax(prediction, 1), tf.argmax(y, 1))
accuracy = tf.reduce_mean(tf.cast(correct, 'float'))
print('Epoch', epoch + 1, 'completed out of', hm_epochs,'loss:', epoch_loss)
print('Accuracy:',accuracy.eval({x:[i[0] for i in validation_data], y:[i[1] for i in validation_data]}))
except Exception as e:
print("!: Invalid information. Accuracy and fitment percent could not be computed.!")
And here's the Error message:
Traceback (most recent call last):
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1323, in _do_call
return fn(*args)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1302, in _run_fn
status, run_metadata)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\framework\errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be same size: logits_size=[4,2] labels_size=[1,2]
[[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape_2, Reshape_3)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "ConvNet.py", line 149, in <module>
train_neural_network(x)
File "ConvNet.py", line 120, in train_neural_network
_, c = sess.run([optimizer, cost], feed_dict={x: X, y: Y})
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\client\session.py", line 889, in run
run_metadata_ptr)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1317, in _do_run
options, run_metadata)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\client\session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be same size: logits_size=[4,2] labels_size=[1,2]
[[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape_2, Reshape_3)]]
Caused by op 'SoftmaxCrossEntropyWithLogits', defined at:
File "ConvNet.py", line 149, in <module>
train_neural_network(x)
File "ConvNet.py", line 95, in train_neural_network
labels=y))
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\ops\nn_ops.py", line 1783, in softmax_cross_entropy_with_logits
precise_logits, labels, name=name)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 4363, in _softmax_cross_entropy_with_logits
name=name)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 2956, in create_op
op_def=op_def)
File "C:\Pythons\Python35\lib\site-packages\tensorflow\python\framework\ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[4,2] labels_size=[1,2]
[[Node: SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:GPU:0"](Reshape_2, Reshape_3)]]

ValueError when trying to run bi-Directional MultiLSTM neural network

Traceback (most recent call last):
File "train_rnn.py", line 92, in <module>
batch_size=FLAGS.batch_size)
File "/home/iit/sourab/conv_extractive/codes/cnn-text-classification-tf/rnn_code/text_rnn.py", line 65, in __init__
initial_state_bw=self.rnn_tuple_state_bw)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 375, in bidirectional_dynamic_rnn
time_major=time_major, scope=fw_scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 574, in dynamic_rnn
dtype=dtype)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 737, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2770, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2599, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2549, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 722, in _time_step
(output, new_state) = call_cell()
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 708, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 669, in _get_single_variable
found_var.get_shape()))
**ValueError: Trying to share variable bidirectional_rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (1024, 2048) and found shape (640, 2048).**
Below I pass parameters to the constructor of the TextRNN class.
rnn = TextRNN(
sequence_size=x.shape[1],
truncated_backprop_length=FLAGS.truncated_backprop_length,
state_size=FLAGS.state_size,
num_classes=y.shape[1],
vocab_size=len(vocab_processor.vocabulary_),
embedding_size=FLAGS.embedding_dim,
num_layers=FLAGS.num_layers,
batch_size=FLAGS.batch_size)
Here, is the TextRNN class:
from __future__ import print_function, division
import tensorflow as tf
import numpy as np
import os
import sys
class TextRNN(object):
"""
An RNN for text classification
Uses an embedding layer followed by multilayered Bi-Directional LSTMs followed by a softmax layer
"""
def __init__(
self, sequence_size, truncated_backprop_length, state_size, num_classes, vocab_size, embedding_size, num_layers, batch_size):
#placeholders for input, output and dropout probability
self.input_x = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length, sequence_size], name="input_x")
self.input_y = tf.placeholder(tf.float32, [batch_size,truncated_backprop_length, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
self.fw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
self.bw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W")
embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
#embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
self.embedded_chars_expanded = tf.reduce_mean(embedded_chars, axis=2)
state_per_layer_list_fw = tf.unstack(self.fw_init_state, axis=0)
self.rnn_tuple_state_fw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_fw[idx][0,:,:], state_per_layer_list_fw[idx][1,:,:])
for idx in range(num_layers)]
)
state_per_layer_list_bw = tf.unstack(self.bw_init_state, axis=0)
self.rnn_tuple_state_bw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_bw[idx][0,:,:], state_per_layer_list_bw[idx][1,:,:])
for idx in range(num_layers)]
)
W2 = tf.Variable(np.random.rand(2*state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.random.rand(1,num_classes), dtype=tf.float32)
with tf.name_scope('BiMultiLSTM'):
with tf.name_scope('forward_cell'):
cell_fw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.dropout_keep_prob)
cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * num_layers, state_is_tuple=True)
with tf.name_scope('Backward_cell'):
cell_bw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.dropout_keep_prob)
cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * num_layers, state_is_tuple=True)
self.output_hidden_states, self.current_states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
inputs=self.embedded_chars_expanded,
initial_state_fw=self.rnn_tuple_state_fw,
initial_state_bw=self.rnn_tuple_state_bw)
self.outputs_concat = tf.concat(self.output_hidden_states, 2)
self.output_series = tf.reshape(self.outputs_concat, [-1, 2*state_size])
self._current_state_fw = self.current_states[0]
self._current_state_bw = self.current_states[1]
#output
with tf.name_scope("output"):
self.logits = tf.matmul(self.output_series, W2) + b2 #Broadcasted addition
self.labels = tf.reshape(self.input_y, [-1, num_classes])
self.logits_series = tf.unstack(tf.reshape(self.logits, [batch_size, truncated_backprop_length, num_classes]), axis=1)
self.predictions_series = [tf.nn.softmax(logit) for logit in self.logits_series]
self.labels_series = tf.unstack(tf.reshape(self.labels, [batch_size, truncated_backprop_length, num_classes]), axis=1)
#loss
with tf.name_scope("loss"):
self.losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
self.total_loss = tf.reduce_mean(self.losses)
#accuracy
self.correct_predictions = []
with tf.name_scope("accuracy"):
for predictions, labels in zip(self.predictions_series, self.labels_series):
self.correct_predictions.append(tf.equal(tf.argmax(predictions, axis=1), tf.argmax(labels, axis=1)))
self.sum_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, tf.float32))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, tf.float32))
These are the parameters that I passed:
Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=50
CHECKPOINT_EVERY=100
DATA_FILE=./../data/cnn_train.txt
DEV_FILE=./../data/cnn_test.txt
DROPOUT_KEEP_PROB=1.0
EMBEDDING_DIM=128
EVALUATE_EVERY=100
LOG_DEVICE_PLACEMENT=False
NUM_CHECKPOINTS=5
NUM_CLASSES=2
NUM_EPOCHS=200
NUM_LAYERS=3
STATE_SIZE=512
TRUNCATED_BACKPROP_LENGTH=10
I searched the net but failed to solve the error. The program works if I give state_size same as embedding_size and gives the above error for all other cases where state_size is not equal to embedding_size.

Resources