Pytorch: Error in DataParallel for RNN model - pytorch

I'm trying to use torch.nn.DataParallel for a RNN model. My model looks like this:
class EncoderRNN(nn.Module):
def __init__(self, vocal_size, hidden_size):
super(EncoderRNN, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(vocal_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
def forward(self, input_batch, input_batch_length, hidden):
embedded = self.embedding(input_batch)
packed_input = nn.utils.rnn.pack_padded_sequence(embedded, input_batch_length.cpu().numpy(), batch_first=True)
output, hidden = self.gru(packed_input, hidden)
return output, hidden
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, vocab_size):
super(DecoderRNN, self).__init__()
self.hidden_size = hidden_size
self.embedding = nn.Embedding(vocab_size, hidden_size)
self.gru = nn.GRU(hidden_size, hidden_size, batch_first=True)
self.out = nn.Linear(hidden_size, vocab_size)
self.softmax = nn.LogSoftmax(dim=1)
def forward(self, target_batch, target_batch_length, hidden, train=False):
embedded = self.embedding(target_batch)
output = F.relu(embedded)
if train:
# minus 1 to eliminate <EOS>
packed_target = nn.utils.rnn.pack_padded_sequence(output, (target_batch_length - 1).cpu().numpy(),
batch_first=True)
output, hidden = self.gru(packed_target, hidden)
output = self.softmax(self.out(output[0]))
return output, hidden
And I implemented DataParallel like this when declaring the model:
encoder = nn.DataParallel(encoder)
decoder = nn.DataParallel(decoder)
The code runs on a server with 4 GPUs, and I received following error message:
/home/cjunjie/NLP/DocSummarization/model.py:18: UserWarning: RNN module weights are not part of single contiguous chunk of memory. This means they need to be compacted at every call, possibly greatly increasing memory usage. To compact weights again call flatten_parameters().
output, hidden = self.gru(packed_input, hidden)
Traceback (most recent call last):
File "train.py", line 144, in <module>
train_iteration(encoder, decoder, fileDataSet)
File "train.py", line 110, in train_iteration
target_indices, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion)
File "train.py", line 41, in train
encoder_output, encoder_hidden = encoder(input_batch, input_batch_length, encoder_hidden)
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/modules/module.py", line 357, in __call__
result = self.forward(*input, **kwargs)
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 74, in forward
return self.gather(outputs, self.output_device)
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/data_parallel.py", line 86, in gather
return gather(outputs, output_device, dim=self.dim)
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 65, in gather
return gather_map(outputs)
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 60, in gather_map
return type(out)(map(gather_map, zip(*outputs)))
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 60, in gather_map
return type(out)(map(gather_map, zip(*outputs)))
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/utils/rnn.py", line 39, in __new__
return super(PackedSequence, cls).__new__(cls, *args[0])
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/scatter_gather.py", line 57, in gather_map
return Gather.apply(target_device, dim, *outputs)
File "/home/cjunjie/anaconda3/lib/python3.6/site-packages/torch/nn/parallel/_functions.py", line 58, in forward
assert all(map(lambda i: i.is_cuda, inputs))
AssertionError
I searched for the same problem, but none of them have a solution. Can anyone help?

In order to run the code on GPUs you need to copy both variables and model weights to cuda. I suspect you did not copy model weights to cuda. To do that you need to do
encoder.cuda()
decoder.cuda()

Related

Using albumentation with Tensorflow Sequence API

I am trying to use tf.keras.utils.Sequence object as input to my keras model so,that I can apply augmentations that are not available in tensorflow using albumentations library. But I am getting error while doing so. (The image pre-processing operations mentioned here are just for clarity)
import albumentations as A
from tensorflow.keras.utils import Sequence
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout
from tensorflow.keras.models import Sequential
TRAIN_DIR = os.path.join('..', 'Data', 'PetImages')
def load_data():
list_of_fpaths = glob.glob('../Data/PetImages/Cat/*')
labels = [1] * len(list_of_fpaths)
temp = glob.glob('../Data/PetImages/Dog/*')
list_of_fpaths.extend(temp)
labels.extend([0] * len(temp))
return list_of_fpaths, labels
# Now list of fpaths contain the list of file paths and labels contain
# corresponding labels
class DataSequence(Sequence):
def __init__(self, x_set, y_set, batch_size, augmentations):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.augment = augmentations
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
a = np.array([
self.augment(image=plt.imread(file_name))["image"] for file_name in
batch_x
])
b = np.array(batch_y)
return a,b
def get_model(input_shape):
model = Sequential([
Conv2D(8, 3, activation='relu', input_shape=input_shape),
MaxPool2D(2),
Conv2D(16, 3, activation='relu'),
MaxPool2D(2),
Conv2D(32, 3, activation='relu'),
MaxPool2D(2),
Conv2D(32, 3, activation='relu'),
MaxPool2D(2),
Conv2D(32, 3, activation='relu'),
MaxPool2D(2),
Flatten(),
Dense(1024, activation='relu'),
Dropout(0.3),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
return model
ALBUMENTATIONS_TRAIN = A.Compose([
A.Resize(256, 256),
# A.Resize(512, 512),
A.ToFloat(),
# A.RandomCrop(384, 384, p=0.5),
])
ALBUMENTATIONS_TEST = A.Compose([
A.ToFloat(),
A.Resize(256, 256)
])
X, Y = load_data()
train_gen = DataSequence(X, Y, 16, ALBUMENTATIONS_TRAIN)
model = get_model(input_shape=(256,256,3))
model.fit(train_gen,epochs=100)
The error that I am getting is
17/748 [..............................] - ETA: 1:06 - loss: 0.4304 - accuracy: 0.92282020-07-08 13:25:47.751964: W tensorflow/core/framework/op_kernel.cc:1741] Invalid argument: ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\ops\script_ops.py", line 243, in __call__
ret = func(*args)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 309, in wrapper
return func(*args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 932, in generator_fn
yield x[i]
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 40, in __getitem__
a = np.array([
ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 89, in <module>
model.fit(train_gen,epochs=100)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py", line 66, in _method_wrapper
return method(self, *args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py", line 848, in fit
tmp_logs = train_function(iterator)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\def_function.py", line 580, in __call__
result = self._call(*args, **kwds)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\def_function.py", line 611, in _call
return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 2420, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 1661, in _filtered_call
return self._call_flat(
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 1745, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 593, in call
outputs = execute.execute(
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\execute.py", line 59, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\ops\script_ops.py", line 243, in __call__
ret = func(*args)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 309, in wrapper
return func(*args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 932, in generator_fn
yield x[i]
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 40, in __getitem__
a = np.array([
ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
[[{{node PyFunc}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_4]]
(1) Invalid argument: ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\ops\script_ops.py", line 243, in __call__
ret = func(*args)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 309, in wrapper
return func(*args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 932, in generator_fn
yield x[i]
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 40, in __getitem__
a = np.array([
ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
[[{{node PyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_1195]
Function call stack:
train_function -> train_function
Process finished with exit code 1
Please help me to understand what mistake I am making.
Base on the error messages, there is at least one grayscale image in your dataset that was resize to 256x256 and thus cannot fit into your network.

In Tensorflow I can't use any MultiRNNCell instance in dynamic decode, but a single RNNCell instance can work on it

I make a seq2seq model using tensorflow and meet a problem that my program throws an error when I use MultiRNNCell in tf.contrib.seq2seq.dynamic_decode.
The problem happens over here:
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
When I run this code,the console shows this Error message:
C:\Users\TopView\AppData\Local\Programs\Python\Python36\python.exe E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py:417: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version.
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:432: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version.
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
encoder_final_state shpe
LSTMStateTuple(c=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_5:0' shape=(?, 24) dtype=float32>, h=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_6:0' shape=(?, 24) dtype=float32>)
decoder_inputs shape before embedded
(128, 10)
decoder inputs shape after embedded
(128, 10, 5)
Traceback (most recent call last):
File "E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py", line 14, in <module>
len(embedding_matrix['embedding'][0]))
File "E:\PycharmProject\cikm_transport\CIKM\CIKM\translate_model\model.py", line 109, in __init__
maximum_iterations=self.FLAGS.max_len)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 323, in dynamic_decode
swap_memory=swap_memory)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3209, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2941, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2878, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3179, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 266, in body
decoder_finished) = decoder.step(time, inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py", line 137, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1325, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 846, in call
(c_prev, m_prev) = state
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 436, in __iter__
"Tensor objects are not iterable when eager execution is not "
TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
Process finished with exit code 1
But when I change the instance of defw_rnn, make it a single RNN instance like LSTMCell, the Error disappears:
defw_rnn=tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
And the code works well. However, I've found that most of the code about seq2seq model on the Internet using MultiRNNCell and they also use tensorflow, so it really confuse me that what is wrong with my program.
Here is the entire code:
import tensorflow as tf
import numpy as np
class Seq2SeqModel(object):
def bw_fw_rnn(self):
with tf.name_scope("forward_rnn"):
fw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
fw = tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=self.FLAGS.keep_prob)
with tf.name_scope("backward_rnn"):
bw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
bw = tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=self.FLAGS.keep_prob)
return (fw, bw)
def decode_inputs_preprocess(self, data, id_matrix):
ending=tf.strided_slice(data,[0,0],[self.batch_size,-1],[1,1])
decoder_input=tf.concat([tf.fill([self.batch_size,1],id_matrix.index('<go>')),ending],1)
return decoder_input
def __init__(self, FLAGS, english_id_matrix, spanish_id_matrix, english_vocab_size,spanish_vocab_size, embedding_size):
self.FLAGS = FLAGS
self.english_vocab_size = english_vocab_size
self.embedding_size = embedding_size
self.encoder_input = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='encoder_inputs')
self.decoder_targets = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='decoder_targets')
self.encoder_input_sequence_length = tf.placeholder(shape=[None], dtype=tf.int32, name='encoder_inputs_length')
self.decoder_targets_length = tf.placeholder(shape=[None], dtype=tf.int32, name='decoder_targets_length')
self.batch_size = self.FLAGS.batch_size
with tf.name_scope('embedding_look_up'):
spanish_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
english_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
self.spanish_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.english_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.spanish_embeddings_inputs_op = spanish_embeddings.assign(self.spanish_embeddings_inputs)
self.english_embeddings_inputs_op = english_embeddings.assign(self.english_embeddings_inputs)
encoder_inputs = tf.nn.embedding_lookup(spanish_embeddings, self.encoder_input)
with tf.name_scope('encoder'):
enfw_rnn, enbw_rnn = self.bw_fw_rnn()
encoder_outputs, encoder_final_state = \
tf.nn.bidirectional_dynamic_rnn(enfw_rnn, enbw_rnn, encoder_inputs
, sequence_length=self.encoder_input_sequence_length, dtype=tf.float32)
print("encoder_final_state shpe")
# final_state_c=tf.concat([encoder_final_state[0][-1].c,encoder_final_state[1][-1].c],1)
# final_state_h=tf.concat([encoder_final_state[0][-1].h,encoder_final_state[1][-1].h],1)
# encoder_final_state=tf.contrib.rnn.LSTMStateTuple(c=final_state_c,
# h=final_state_h)
encoder_final_state=encoder_final_state[0][-1]
print(encoder_final_state)
with tf.name_scope('dense_layer'):
output_layer = tf.layers.Dense(english_vocab_size,
kernel_initializer=tf.truncated_normal_initializer(
mean=0.0, stddev=0.1
))
# training decoder
with tf.name_scope('decoder'), tf.variable_scope('decode'):
decoder_inputs=self.decode_inputs_preprocess(self.decoder_targets,english_id_matrix)
print('decoder_inputs shape before embedded')
print(decoder_inputs.shape)
decoder_inputs = tf.nn.embedding_lookup(english_embeddings,decoder_inputs)
print('decoder inputs shape after embedded')
print(decoder_inputs.shape)
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
print("training logits shape")
print(training_logits.shape)
# predicting decoder
with tf.variable_scope('decode', reuse=True):
start_tokens = tf.tile(tf.constant([english_id_matrix.index('<go>')], dtype=tf.int32),
[self.batch_size], name='start_tokens')
predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(english_embeddings,
start_tokens,
english_id_matrix.index('<eos>'))
predicting_decoder = tf.contrib.seq2seq.BasicDecoder(defw_rnn,
predicting_helper,
encoder_final_state,
output_layer)
predicting_decoder_output, _, predicting_decoder_output_length =\
tf.contrib.seq2seq.dynamic_decode(
predicting_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
self.predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions')
print("predicting logits shape")
print(self.predicting_logits.shape)
masks = tf.sequence_mask(self.decoder_targets_length, self.FLAGS.max_len, dtype=tf.float32, name='masks')
with tf.variable_scope('optimization'), tf.name_scope('optimization'):
# Loss
self.cost = tf.contrib.seq2seq.sequence_loss(training_logits, self.decoder_targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(self.FLAGS.alpha)
# Gradient Clipping
gradients = optimizer.compute_gradients(self.cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
self.train_op = optimizer.apply_gradients(capped_gradients)
Well……I've figured out.The problem happened because I only sent the final state of the encoder to a decoder.

InvalidArguementError tensorflow

I am relatively new to tensorflow and I am working on relation classification. I will list down my problem step wise so that it is clear and hope that someone can point out my mistake( which I am sure must be a silly one):
For the word embedding layer I needed to initialize a tf variable with a tensor which was of size more that 2GB. So I followed the solutions provided here and changed my code.
Code snippets before change :
train.py
if FLAGS.model_type == 'cnn':
with tf.Graph().as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
cnn = textCNN(
sequence_length=x_trains[0].shape[1],
num_classes=num_classes,
vocab_size=len(word_embed_vecs),
embedding_size=FLAGS.embedding_dim,
dist_vocab_size=dist_vocab_size,
dist_size=FLAGS.pos_dim,
filter_sizes=list(map(int,
FLAGS.filter_sizes.split(","))),
num_filters=FLAGS.num_filters,
l2_reg_lambda=FLAGS.l2_reg_lambda,
word_vecs=word_embed_vecs, #word_embed_vecs is of shape
#(2451510, 300)
train_emb=FLAGS.train_emb)
text_cnn.py
class textCNN(object):
"""
A CNN for text classification.
Uses an embedding layer, followed by a convolutional, max-pooling and
softmax layer.
"""
def __init__(
self, sequence_length, num_classes, vocab_size,
embedding_size, dist_vocab_size, dist_size, filter_sizes, num_filters,
l2_reg_lambda=0.0, word_vecs=None, train_emb=True):
# Placeholders for input, output and dropout
self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
name="input_x")
self.e1_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e1_dist")
self.e2_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e2_dist")
self.input_y = tf.placeholder(tf.float32, [None, num_classes],
name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32,
name="dropout_keep_prob")
l2_loss = tf.constant(0.0)
# Embedding layer
with tf.device('/gpu:0'), tf.name_scope("embedding"):
#with tf.name_scope("embedding"):
self.W = tf.Variable(
trainable=train_emb,
initial_value=tf.constant(word_vecs, dtype=tf.float32), name="W")
self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars,
-1)
Code snippets after change:
train.py
if FLAGS.model_type == 'cnn':
with tf.Graph().as_default():
session_conf = tf.ConfigProto(
allow_soft_placement=FLAGS.allow_soft_placement,
log_device_placement=FLAGS.log_device_placement)
sess = tf.Session(config=session_conf)
with sess.as_default():
cnn = textCNN(
sequence_length=x_trains[0].shape[1],
num_classes=num_classes,
vocab_size=len(word_embed_vecs),
embedding_size=FLAGS.embedding_dim,
dist_vocab_size=dist_vocab_size,
dist_size=FLAGS.pos_dim,
filter_sizes=list(map(int,
FLAGS.filter_sizes.split(","))),
num_filters=FLAGS.num_filters,
l2_reg_lambda=FLAGS.l2_reg_lambda,
**# word_vecs=word_embed_vecs, #word_embed_vecs is of
shape (2451510, 300)**
train_emb=FLAGS.train_emb)
# Generate batches
dev_f1s = []
test_f1s = []
for fi in range(len(x_trains)):
sess.run(tf.global_variables_initializer())
batches = data_helpers.batch_iter(
list(zip(x_trains[fi], train_e1_dists[fi], train_e2_dists[fi],
y_trains[fi])), FLAGS.batch_size, FLAGS.embedding_dim,
FLAGS.num_epochs)
# Training loop. For each batch...
evaluate_every = len(x_trains[fi]) / FLAGS.batch_size
for batch in batches:
x_batch, e1_dist, e2_dist, y_batch = zip(*batch)
# Train
feed_dict = {
cnn.input_x: x_batch,
cnn.e1_dist: e1_dist,
cnn.e2_dist: e2_dist,
cnn.input_y: y_batch,
cnn.dropout_keep_prob: FLAGS.dropout_keep_prob,
cnn.wordvecs: word_embed_vecs
}
_, step, summaries, loss, accuracy = sess.run(
[train_op, global_step, train_summary_op, cnn.loss,
cnn.accuracy], feed_dict)
text_cnn.py
class textCNN(object):
"""
A CNN for text classification.
Uses an embedding layer, followed by a convolutional, max-pooling and
softmax layer.
"""
def __init__(
self, sequence_length, num_classes, vocab_size,
embedding_size, dist_vocab_size, dist_size, filter_sizes, num_filters,
l2_reg_lambda=0.0, train_emb=True):
# Placeholders for input, output and dropout
self.input_x = tf.placeholder(tf.int32, [None, sequence_length],
name="input_x")
self.e1_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e1_dist")
self.e2_dist = tf.placeholder(tf.int32, [None, sequence_length],
name="e2_dist")
self.input_y = tf.placeholder(tf.float32, [None, num_classes],
name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32,
name="dropout_keep_prob")
self.wordvecs = tf.placeholder(tf.float32, shape = (2451510, 300),
name = "wordvecs")
l2_loss = tf.constant(0.0)
# Embedding layer
with tf.device('/gpu:0'), tf.name_scope("embedding"):
#with tf.name_scope("embedding"):
#self.W = tf.Variable(
# trainable=False,
# initial_value=tf.constant(word_vecs, dtype=tf.float32),
# name="W")
self.WordVecs = tf.Variable(trainable = False,
initial_value=self.wordvecs, name="WordVecs")
self.embedded_chars = tf.nn.embedding_lookup(self.WordVecs,
self.input_x)
self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars,
-1)
So basically I added a placeholder of the shape of my embedding vectors and passed it in feeddict while calling sess.run. however I am getting InvalidArguementError. The traceback details are as below:
Traceback (most recent call last):
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1302, in _run_fn
status, run_metadata)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/errors_impl.py", line 473, in
__exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must
feed a value for placeholder tensor 'wordvecs' with dtype float and shape
[2451510,300]
[[Node: wordvecs = Placeholder[dtype=DT_FLOAT, shape=[2451510,300],
_device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train.py", line 470, in <module>
tf.app.run()
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "train.py", line 466, in main
train()
File "train.py", line 407, in train
sess.run(tf.global_variables_initializer())
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must
feed a value for placeholder tensor 'wordvecs' with dtype float and shape
[2451510,300]
[[Node: wordvecs = Placeholder[dtype=DT_FLOAT, shape=[2451510,300],
_device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
Caused by op 'wordvecs', defined at:
File "train.py", line 470, in <module>
tf.app.run()
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/platform/app.py", line 48, in run
_sys.exit(main(_sys.argv[:1] + flags_passthrough))
File "train.py", line 466, in main
train()
File "train.py", line 310, in train
train_emb=FLAGS.train_emb)
File "/home/mishra/Project/RelExtractKBP/text_cnn.py", line 20, in
__init__
self.wordvecs = tf.placeholder(tf.float32, shape=(2451510, 300),
name="wordvecs")
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/ops/array_ops.py", line 1599, in placeholder
return gen_array_ops._placeholder(dtype=dtype, shape=shape, name=name)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/ops/gen_array_ops.py", line 3091, in
_placeholder
"Placeholder", dtype=dtype, shape=shape, name=name)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/op_def_library.py", line 787, in
_apply_op_helper
op_def=op_def)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/mishra/anaconda3/envs/tensorflow/lib/python3.6/site-
packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint:
disable=protected-access
InvalidArgumentError (see above for traceback): You must feed a value for
placeholder tensor 'wordvecs' with dtype float and shape [2451510,300]
[[Node: wordvecs = Placeholder[dtype=DT_FLOAT, shape=[2451510,300],
_device="/job:localhost/replica:0/task:0/device:GPU:0"]()]]
From what I understand, this error may occur if I do not pass the value to the placeholder while calling sess.run. However, I am not sure what I need to do when calling sess.run(tf.global_variables_initializer()) which is generating this error as per the Traceback.
Any pointers will really help. Thanks a lot.
The placeholder wordvecs needs to be fed.
This can be reproduced by the following example from tf.placeholder example in the official documentation -
x = tf.placeholder(tf.float32, shape=(1024, 1024))
y = tf.matmul(x, x)
with tf.Session() as sess:
print(sess.run(y)) # ERROR: will fail because x was not
fed.
rand_array = np.random.rand(1024, 1024)
print(sess.run(y, feed_dict={x: rand_array})) # Will succeed.
The error is seen at sess.run(tf.global_variables_initializer()) step because, it tries to initialize the following variable:
self.WordVecs = tf.Variable(trainable = False,
initial_value=self.wordvecs, name="WordVecs")
and its initial_value points to a tf.Placeholder, which is yet to be initialized.
self.wordvecs = tf.placeholder(tf.float32, shape = (2451510, 300),
name = "wordvecs")

tensorflow graph error when trying to generate data in transfer learning

I'm trying to use transfer learning on the pretrained inception model, so I created a class for feature extraction from the model:
from prototype import Dataset, VideoStreamHandler
import numpy
import random
from keras.applications.inception_v3 import preprocess_input
from keras.preprocessing import image
from scipy.misc import imresize
import time
class Extractor(Dataset.Dataset):
"""
"""
def __init__(self, path_to_data, seq_len, base_model, image_shape=(299, 299, 3)):
super().__init__(path_to_data, seq_len, input_shape=image_shape)
self._extractor = base_model
def extract_features(self, batch_size):
"""
passes the data through the base model to get the feature map to later train on
:return: feature map
"""
class_one_hot = self.one_hot_encode() # get the one hot for the classes
data = self.clean_data(self.get_data(), self._input_shape[0])
print("Processing {} videos".format(len(self.get_data())))
transfer_maps, labels = [], []
rand = random.SystemRandom()
while True:
for _ in range(batch_size):
row = rand.choice(data)
sequence = self.get_frames(row[0])
if len(sequence) > self._input_shape[0]:
sequence = self.rescale_frame_list(sequence, self._input_shape[0])
print("{} video processing is complete".format(row[0].split('\\')[-1]))
features = []
for frame in sequence:
frame_arr = image.img_to_array(frame) # turn image to numpy array
frame_arr = numpy.expand_dims(frame_arr, axis=0)
frame_arr = preprocess_input(frame_arr)
features.append(self._extractor.predict(frame_arr))
transfer_maps.append(features)
labels.append(class_one_hot[row[1]])
yield numpy.array(transfer_maps), numpy.array(labels)
def get_frames(self, pth):
"""
:type: string
:param pth: path to the specific file from which we take the frames
:return: the frames in the file
"""
f_queue = VideoStreamHandler.VideoStream(pth) # This object opens a thread that reads frames with opencv
# capture independently from the frame processing to prevent i/o delay and speed up processing
f_queue.start()
time.sleep(1.0) # wait a moment so the thread could start reading frames
sequence = []
while f_queue.isnt_empty():
frame = f_queue.read()
# resize is used to keep all frames from all videos the same size
frame = imresize(frame, (self._input_shape[1], self._input_shape[2]))
sequence.append(frame)
f_queue.close() # close the thread
return sequence
Then, I attempt to train a new model with keras's fit_generator:
my_model.fit_generator(generator=train_gen, epochs=10, steps_per_epoch=steps_per_epoch, verbose=1, workers=4)
However, I get this error:
Blockquote
Traceback (most recent call last):
File "C:/Users/Aviad Lazar/Desktop/project/prototype/transfer_learning.py", line 41, in
main()
File "C:/Users/Aviad Lazar/Desktop/project/prototype/transfer_learning.py", line 34, in main
my_model.fit_generator(generator=train_gen, epochs=10, steps_per_epoch=steps_per_epoch, verbose=1, workers=4)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\models.py", line 1315, in fit_generator
initial_epoch=initial_epoch)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\engine\training.py", line 2194, in fit_generator
generator_output = next(output_generator)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\utils\data_utils.py", line 793, in get
six.reraise(value.class, value, value.traceback)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\six.py", line 693, in reraise
raise value
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\utils\data_utils.py", line 658, in _data_generator_task
generator_output = next(self._generator)
File "C:\Users\Aviad Lazar\Desktop\project\prototype\FeatureExtractor.py", line 48, in extract_features
features.append(self._extractor.predict(frame_arr))
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\engine\training.py", line 1832, in predict
self._make_predict_function()
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\engine\training.py", line 1031, in _make_predict_function
**kwargs)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\backend\tensorflow_backend.py", line 2506, in function
return Function(inputs, outputs, updates=updates, **kwargs)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\keras\backend\tensorflow_backend.py", line 2449, in init
with tf.control_dependencies(self.outputs):
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\tensorflow\python\framework\ops.py", line 4863, in control_dependencies
return get_default_graph().control_dependencies(control_inputs)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\tensorflow\python\framework\ops.py", line 4481, in control_dependencies
c = self.as_graph_element(c)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\tensorflow\python\framework\ops.py", line 3478, in as_graph_element
return self._as_graph_element_locked(obj, allow_tensor, allow_operation)
File "C:\Users\Aviad Lazar\Desktop\project\venv\lib\site-packages\tensorflow\python\framework\ops.py", line 3557, in _as_graph_element_locked
raise ValueError("Tensor %s is not an element of this graph." % obj)
ValueError: Tensor Tensor("global_average_pooling2d_1/Mean:0", shape=(?, 2048), dtype=float32) is not an element of this graph.

ValueError when trying to run bi-Directional MultiLSTM neural network

Traceback (most recent call last):
File "train_rnn.py", line 92, in <module>
batch_size=FLAGS.batch_size)
File "/home/iit/sourab/conv_extractive/codes/cnn-text-classification-tf/rnn_code/text_rnn.py", line 65, in __init__
initial_state_bw=self.rnn_tuple_state_bw)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 375, in bidirectional_dynamic_rnn
time_major=time_major, scope=fw_scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 574, in dynamic_rnn
dtype=dtype)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 737, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2770, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2599, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2549, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 722, in _time_step
(output, new_state) = call_cell()
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 708, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 669, in _get_single_variable
found_var.get_shape()))
**ValueError: Trying to share variable bidirectional_rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (1024, 2048) and found shape (640, 2048).**
Below I pass parameters to the constructor of the TextRNN class.
rnn = TextRNN(
sequence_size=x.shape[1],
truncated_backprop_length=FLAGS.truncated_backprop_length,
state_size=FLAGS.state_size,
num_classes=y.shape[1],
vocab_size=len(vocab_processor.vocabulary_),
embedding_size=FLAGS.embedding_dim,
num_layers=FLAGS.num_layers,
batch_size=FLAGS.batch_size)
Here, is the TextRNN class:
from __future__ import print_function, division
import tensorflow as tf
import numpy as np
import os
import sys
class TextRNN(object):
"""
An RNN for text classification
Uses an embedding layer followed by multilayered Bi-Directional LSTMs followed by a softmax layer
"""
def __init__(
self, sequence_size, truncated_backprop_length, state_size, num_classes, vocab_size, embedding_size, num_layers, batch_size):
#placeholders for input, output and dropout probability
self.input_x = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length, sequence_size], name="input_x")
self.input_y = tf.placeholder(tf.float32, [batch_size,truncated_backprop_length, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
self.fw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
self.bw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W")
embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
#embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
self.embedded_chars_expanded = tf.reduce_mean(embedded_chars, axis=2)
state_per_layer_list_fw = tf.unstack(self.fw_init_state, axis=0)
self.rnn_tuple_state_fw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_fw[idx][0,:,:], state_per_layer_list_fw[idx][1,:,:])
for idx in range(num_layers)]
)
state_per_layer_list_bw = tf.unstack(self.bw_init_state, axis=0)
self.rnn_tuple_state_bw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_bw[idx][0,:,:], state_per_layer_list_bw[idx][1,:,:])
for idx in range(num_layers)]
)
W2 = tf.Variable(np.random.rand(2*state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.random.rand(1,num_classes), dtype=tf.float32)
with tf.name_scope('BiMultiLSTM'):
with tf.name_scope('forward_cell'):
cell_fw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.dropout_keep_prob)
cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * num_layers, state_is_tuple=True)
with tf.name_scope('Backward_cell'):
cell_bw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.dropout_keep_prob)
cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * num_layers, state_is_tuple=True)
self.output_hidden_states, self.current_states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
inputs=self.embedded_chars_expanded,
initial_state_fw=self.rnn_tuple_state_fw,
initial_state_bw=self.rnn_tuple_state_bw)
self.outputs_concat = tf.concat(self.output_hidden_states, 2)
self.output_series = tf.reshape(self.outputs_concat, [-1, 2*state_size])
self._current_state_fw = self.current_states[0]
self._current_state_bw = self.current_states[1]
#output
with tf.name_scope("output"):
self.logits = tf.matmul(self.output_series, W2) + b2 #Broadcasted addition
self.labels = tf.reshape(self.input_y, [-1, num_classes])
self.logits_series = tf.unstack(tf.reshape(self.logits, [batch_size, truncated_backprop_length, num_classes]), axis=1)
self.predictions_series = [tf.nn.softmax(logit) for logit in self.logits_series]
self.labels_series = tf.unstack(tf.reshape(self.labels, [batch_size, truncated_backprop_length, num_classes]), axis=1)
#loss
with tf.name_scope("loss"):
self.losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
self.total_loss = tf.reduce_mean(self.losses)
#accuracy
self.correct_predictions = []
with tf.name_scope("accuracy"):
for predictions, labels in zip(self.predictions_series, self.labels_series):
self.correct_predictions.append(tf.equal(tf.argmax(predictions, axis=1), tf.argmax(labels, axis=1)))
self.sum_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, tf.float32))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, tf.float32))
These are the parameters that I passed:
Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=50
CHECKPOINT_EVERY=100
DATA_FILE=./../data/cnn_train.txt
DEV_FILE=./../data/cnn_test.txt
DROPOUT_KEEP_PROB=1.0
EMBEDDING_DIM=128
EVALUATE_EVERY=100
LOG_DEVICE_PLACEMENT=False
NUM_CHECKPOINTS=5
NUM_CLASSES=2
NUM_EPOCHS=200
NUM_LAYERS=3
STATE_SIZE=512
TRUNCATED_BACKPROP_LENGTH=10
I searched the net but failed to solve the error. The program works if I give state_size same as embedding_size and gives the above error for all other cases where state_size is not equal to embedding_size.

Resources