Invalid placeholder in tensorflow - python-3.x

I am trying to write a custom loss function as follows.
def vgg16_feature_model(flayers, weights='imagenet'):
"""
Feature exctraction VGG16 model.
# Arguments
flayers: list of strings with names of layers to get the features for.
The length of `flayers` should be > 1, otherwise the output shape
is one axis less.
weights: ether "imagenet" or path to the file with weights.
# Returns
features_model: keras.models.Model instance to extract the features.
# Raises
AssertionError: in case of `flayers` is not a list.
AssertionError: in case of length of 'flayers' < 2.
"""
assert isinstance(flayers,list), "First argument 'flayers' must be a list"
assert len(flayers) > 1, "Length of 'flayers' must be > 1."
base_model = VGG16(include_top=False, weights=weights)
vgg16_outputs = [base_model.get_layer(flayers[i]).output for i in range(len(flayers))]
features_model = Model(inputs=[base_model.input], outputs=vgg16_outputs, name='vgg16_features')
features_model.trainable = False
features_model.compile(loss='mse', optimizer='adam')
return features_model
# Losses:
# -------
def total_loss(mask, vgg16_weights='imagenet'):
"""
Total loss defined in Eq 7 of Liu et al 2018 with:
y_true = I_gt,
y_pred = I_out,
y_comp = I_comp.
"""
vgg16_lnames = ['block1_pool', 'block2_pool', 'block3_pool']
vgg_model = vgg16_feature_model(vgg16_lnames, weights=vgg16_weights)
def loss(y_true, y_pred):
mask_inv = 1 - mask
y_comp = mask * y_true + mask_inv * y_pred
print("y_pred", y_pred)
print(y_comp)
input()
vgg_out = vgg_model(y_pred)
vgg_gt = vgg_model(y_true)
print("abc-----------------------------------")
vgg_comp = vgg_model(y_comp)
print("abc")
l_valid = loss_per_pixel(y_true, y_pred, mask)
l_hole = loss_per_pixel(y_true, y_pred, mask_inv)
l_perc = loss_perc(vgg_out, vgg_gt, vgg_comp)
l_style = loss_style(vgg_out, vgg_gt, vgg_comp)
l_tv = loss_tv(y_comp, mask_inv)
return l_valid + 6.*l_hole + 0.05*l_perc + 120.*l_style + 0.1*l_tv
return loss
I am getting an error as
Traceback (most recent call last):
File "inpainter_main.py", line 46, in <module>
model = pconv_model(lr=LR_STAGE1, image_size=IMAGE_SIZE, vgg16_weights=VGG16_WEIGHTS)
File "/home/bitsy-chuck/Downloads/PConv2D-2ndimp/inpainter_utils/pconv2d_model.py", line 118, in pconv_model
model.compile(Adam(lr=lr), loss=total_loss(mask_input, vgg16_weights=vgg16_weights))
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py", line 446, in compile
self._compile_weights_loss_and_weighted_metrics()
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py", line 1515, in _compile_weights_loss_and_weighted_metrics
self.total_loss = self._prepare_total_loss(masks)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py", line 1575, in _prepare_total_loss
per_sample_losses = loss_fn.call(y_true, y_pred)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/losses.py", line 246, in call
return self.fn(y_true, y_pred, **self._fn_kwargs)
File "/home/bitsy-chuck/Downloads/PConv2D-2ndimp/inpainter_utils/pconv2d_loss.py", line 58, in loss
vgg_comp = vgg_model(y_comp)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 737, in __call__
base_layer_utils.create_keras_history(inputs)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 186, in create_keras_history
_, created_layers = _create_keras_history_helper(tensors, set(), [])
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 249, in _create_keras_history_helper
layer_inputs, processed_ops, created_layers)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 246, in _create_keras_history_helper
constants[i] = backend.function([], op_input)([])
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/backend.py", line 3632, in __call__
run_metadata=self.run_metadata)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1472, in __call__
run_metadata_ptr)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'pconv2d_dec_16_target' with dtype float and shape [?,?,?,?]
[[{{node pconv2d_dec_16_target}}]]
I first thought that y_comp is not correct, but
y_pred ---> Tensor("pconv2d_dec_16/BiasAdd:0", shape=(None, 512, 512, 3), dtype=float32)
y_comp ---> Tensor("loss_1/pconv2d_dec_16_loss/add:0", shape=(None, 512, 512, 3), dtype=float32)
They both appear the same to me and it should work, according to me.
error is at line vgg_comp = vgg_model(y_comp)
Can anyone also explain why am I getting an error of placeholder?
Tf version 1.3
keras 2.2.4

placeholder errors are usually due to tensorflow versions. I had the exact same error and it was fixed when I installed keras first and then tensorflow first. Using anaconda might help as they cache all the files when you uninstall so it is easy to install again without having to download the entire thing again.
There might be some other fix, I believe, but this fixed mine.

Related

GridSearchCV(): ValueError: Input contains NaN, infinity or a value too large for dtype('float64')

I get the ValueError in the title when I try to perform a GridsearchCV on an MLP classifier. Ofcourse I checked if any of np.inf or np.nan exist in my dataset, but they dont:
print(np.any(np.isnan(X)))
returns False
print(np.all(np.isfinite(X)))
Returns True
I also casted all my values to np.float64
X = X.values.astype(np.float64)
Y = Y.values
My scikit-learn version is 0.22.2.post1 (latest)
The code i'm trying to execute:
from scipy.stats import randint as sp_randint
hiddenlayers = [(sp_randint.rvs(100,600,1),sp_randint.rvs(100,600,1),), (sp_randint.rvs(100,600,1),)]
alpha_range = 10.0 ** np.arange(-2, 1)
param_grid_MLP = [{'solver': ['lbfgs'],
'hidden_layer_sizes': hiddenlayers,
'activation': ['identity','tanh', 'relu', 'logistic'],
'alpha': alpha_range
},
{'solver': ['sgd'],
'hidden_layer_sizes': hiddenlayers,
'activation': ['identity','tanh', 'relu', 'logistic'],
'alpha': alpha_range,
'learning_rate':['constant','invscaling','adaptive']
},
{'solver': ['adam'],
'hidden_layer_sizes': hiddenlayers,
'activation': ['identity','tanh', 'relu', 'logistic'],
'alpha': alpha_range
}]
mlp = MLPClassifier(random_state=0)
cross_validation = StratifiedKFold(5)
# scoring = {'AUC': 'roc_auc',
# 'Accuracy': make_scorer(accuracy_score),
# 'Recall':make_scorer(recall_score,pos_label='crafted'),
# 'Precision': make_scorer(precision_score,pos_label='crafted')}
scoring = {'AUC': 'roc_auc',
'Accuracy': make_scorer(accuracy_score),
'Recall':make_scorer(recall_score,pos_label='crafted')}
grid_search_MLP = GridSearchCV(estimator=mlp,
param_grid=param_grid_MLP,
scoring=scoring,cv=cross_validation.split(X_train,y_train),
refit='Recall',
n_jobs=-1,
verbose=True)
grid_search_MLP.fit(X_train,y_train)
print('Best score: {}'.format(grid_search_MLP.best_score_))
print('Best index: {}'.format(grid_search_MLP.best_index_))
print('Best parameters: {}'.format(grid_search_MLP.best_params_))
mlp = grid_search_MLP.best_estimator_
mlp
The full error traceback:
Traceback (most recent call last):
File "/usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executor.py", line 418, in _process_worker
r = call_item()
File "/usr/local/lib/python3.7/dist-packages/joblib/externals/loky/process_executor.py", line 272, in __call__
return self.fn(*self.args, **self.kwargs)
File "/usr/local/lib/python3.7/dist-packages/joblib/_parallel_backends.py", line 608, in __call__
return self.func(*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/joblib/parallel.py", line 256, in __call__
for func, args, kwargs in self.items]
File "/usr/local/lib/python3.7/dist-packages/joblib/parallel.py", line 256, in <listcomp>
for func, args, kwargs in self.items]
File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 544, in _fit_and_score
test_scores = _score(estimator, X_test, y_test, scorer)
File "/usr/local/lib/python3.7/dist-packages/sklearn/model_selection/_validation.py", line 591, in _score
scores = scorer(estimator, X_test, y_test)
File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 87, in __call__
*args, **kwargs)
File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_scorer.py", line 332, in _score
return self._sign * self._score_func(y, y_pred, **self._kwargs)
File "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_ranking.py", line 369, in roc_auc_score
y_score = check_array(y_score, ensure_2d=False)
File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py", line 578, in check_array
allow_nan=force_all_finite == 'allow-nan')
File "/usr/local/lib/python3.7/dist-packages/sklearn/utils/validation.py", line 60, in _assert_all_finite
msg_dtype if msg_dtype is not None else X.dtype)
ValueError: Input contains NaN, infinity or a value too large for dtype('float64').
It seems to me that you might have a corrupted value in your array, or a non numeric value. Try to check if there are other types in your array, before transforming to float. Try also to find the min and max value in your array, that might help to find the value which raises the error.
try giving verbose a large number, or run that 3 parts of grid one by one. if you realize sgd gives the problem, its probably explained here MLPRegressor error when solver sgd is used

Chainer CNN- TypeError: forward() missing 1 required positional argument: 'x'

I'm trying to run a classifier on Chainer, but failed due to the following error.
I have no idea about the error, because I confirmed that the iterator actually sent a batch to the trainer.
Is there a problem with the neural network model? Or, the way the data has been fed into the model is wrong?
Input.py
from chainer.datasets import split_dataset_random
from chainer.iterators import SerialIterator
from chainer.optimizers import Adam
from chainer.training import Trainer
from chainer.training.updaters import StandardUpdater
from chainer import functions as F, links as L
from chainer import Sequential
import numpy as np
batch_size = 3
X_train = np.ones((9957, 60, 80, 3), dtype=np.float32)
X_train, _ = split_dataset_random(X_train, 8000, seed=0)
train_iter = SerialIterator(X_train, batch_size)
model = Sequential(
L.Convolution2D(None, 64, 3, 2),
F.relu,
L.Convolution2D(64, 32, 3, 2),
F.relu,
L.Linear(None, 16),
F.dropout,
L.Linear(16, 4)
)
model_loss = L.Classifier(model)
optimizer = Adam()
optimizer.setup(model_loss)
updater = StandardUpdater(train_iter, optimizer)
trainer = Trainer(updater, (25, 'epoch'))
trainer.run()
Stacktrace.py
Exception in main training loop: forward() missing 1 required positional argument: 'x'
Traceback (most recent call last):
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 315, in run
update()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
self.update_core()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 181, in update_core
optimizer.update(loss_func, in_arrays)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/optimizer.py", line 680, in update
loss = lossfun(*args, **kwds)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/links/model/classifier.py", line 143, in forward
self.y = self.predictor(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/sequential.py", line 210, in forward
x = layer(*x)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
Will finalize trainer extensions and updater before reraising the exception.
Traceback (most recent call last):
File "/home/user/deploy/aaa.py", line 33, in <module>
trainer.run()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 348, in run
six.reraise(*exc_info)
File "/home/user/miniconda3/lib/python3.7/site-packages/six.py", line 693, in reraise
raise value
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 315, in run
update()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
self.update_core()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 181, in update_core
optimizer.update(loss_func, in_arrays)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/optimizer.py", line 680, in update
loss = lossfun(*args, **kwds)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/links/model/classifier.py", line 143, in forward
self.y = self.predictor(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/sequential.py", line 210, in forward
x = layer(*x)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
TypeError: forward() missing 1 required positional argument: 'x'
Is there a problem with the neural network model or the way the data has been fed into the model? Please let me know if you need to see the whole code
All you had to do is to give a tuple of ndarray and int to the model, because this is the specification of L.Classifier.
Is there a problem with the neural network model? Or, the way the data has been fed into the model is wrong?
Therefore, the absolute answer is "the way the data has been fed in to the model is wrong".
In the following code, I defined a class inheriting DatasetMixin to feed a tuple of ndarray and int. (This is a conventional way how Chainer goes)
It should be noted that the input argument of L.Convolution2D must be an ndarray whose shape is (batch, channel, width, height). So I transpose the array in the dataset.
Solution.py
from chainer.datasets import split_dataset_random
from chainer.iterators import SerialIterator
from chainer.optimizers import Adam
from chainer.training import Trainer
from chainer.training.updaters import StandardUpdater
from chainer import functions as F, links as L
from chainer import Sequential
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
def __init__(self, X, labels):
super(MyDataset, self).__init__()
self.X_ = X
self.labels_ = labels
self.size_ = X.shape[0]
def __len__(self):
return self.size_
def get_example(self, i):
return np.transpose(self.X_[i, ...], (2, 0, 1)), self.labels_[i]
batch_size = 3
X_train = np.ones((9957, 60, 80, 3), dtype=np.float32)
label_train = np.random.randint(0, 4, (9957,), dtype=np.int32)
dataset = MyDataset(X_train, label_train)
dataset_train, _ = split_dataset_random(dataset, 8000, seed=0)
train_iter = SerialIterator(dataset_train, batch_size)
model = Sequential(
L.Convolution2D(None, 64, 3, 2),
F.relu,
L.Convolution2D(64, 32, 3, 2),
F.relu,
L.Linear(None, 16),
F.dropout,
L.Linear(16, 4)
)
model_loss = L.Classifier(model)
optimizer = Adam()
optimizer.setup(model_loss)
updater = StandardUpdater(train_iter, optimizer)
trainer = Trainer(updater, (25, 'epoch'))
trainer.run()

In Tensorflow I can't use any MultiRNNCell instance in dynamic decode, but a single RNNCell instance can work on it

I make a seq2seq model using tensorflow and meet a problem that my program throws an error when I use MultiRNNCell in tf.contrib.seq2seq.dynamic_decode.
The problem happens over here:
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
When I run this code,the console shows this Error message:
C:\Users\TopView\AppData\Local\Programs\Python\Python36\python.exe E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py:417: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version.
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:432: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version.
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
encoder_final_state shpe
LSTMStateTuple(c=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_5:0' shape=(?, 24) dtype=float32>, h=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_6:0' shape=(?, 24) dtype=float32>)
decoder_inputs shape before embedded
(128, 10)
decoder inputs shape after embedded
(128, 10, 5)
Traceback (most recent call last):
File "E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py", line 14, in <module>
len(embedding_matrix['embedding'][0]))
File "E:\PycharmProject\cikm_transport\CIKM\CIKM\translate_model\model.py", line 109, in __init__
maximum_iterations=self.FLAGS.max_len)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 323, in dynamic_decode
swap_memory=swap_memory)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3209, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2941, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2878, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3179, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 266, in body
decoder_finished) = decoder.step(time, inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py", line 137, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1325, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 846, in call
(c_prev, m_prev) = state
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 436, in __iter__
"Tensor objects are not iterable when eager execution is not "
TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
Process finished with exit code 1
But when I change the instance of defw_rnn, make it a single RNN instance like LSTMCell, the Error disappears:
defw_rnn=tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
And the code works well. However, I've found that most of the code about seq2seq model on the Internet using MultiRNNCell and they also use tensorflow, so it really confuse me that what is wrong with my program.
Here is the entire code:
import tensorflow as tf
import numpy as np
class Seq2SeqModel(object):
def bw_fw_rnn(self):
with tf.name_scope("forward_rnn"):
fw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
fw = tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=self.FLAGS.keep_prob)
with tf.name_scope("backward_rnn"):
bw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
bw = tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=self.FLAGS.keep_prob)
return (fw, bw)
def decode_inputs_preprocess(self, data, id_matrix):
ending=tf.strided_slice(data,[0,0],[self.batch_size,-1],[1,1])
decoder_input=tf.concat([tf.fill([self.batch_size,1],id_matrix.index('<go>')),ending],1)
return decoder_input
def __init__(self, FLAGS, english_id_matrix, spanish_id_matrix, english_vocab_size,spanish_vocab_size, embedding_size):
self.FLAGS = FLAGS
self.english_vocab_size = english_vocab_size
self.embedding_size = embedding_size
self.encoder_input = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='encoder_inputs')
self.decoder_targets = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='decoder_targets')
self.encoder_input_sequence_length = tf.placeholder(shape=[None], dtype=tf.int32, name='encoder_inputs_length')
self.decoder_targets_length = tf.placeholder(shape=[None], dtype=tf.int32, name='decoder_targets_length')
self.batch_size = self.FLAGS.batch_size
with tf.name_scope('embedding_look_up'):
spanish_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
english_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
self.spanish_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.english_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.spanish_embeddings_inputs_op = spanish_embeddings.assign(self.spanish_embeddings_inputs)
self.english_embeddings_inputs_op = english_embeddings.assign(self.english_embeddings_inputs)
encoder_inputs = tf.nn.embedding_lookup(spanish_embeddings, self.encoder_input)
with tf.name_scope('encoder'):
enfw_rnn, enbw_rnn = self.bw_fw_rnn()
encoder_outputs, encoder_final_state = \
tf.nn.bidirectional_dynamic_rnn(enfw_rnn, enbw_rnn, encoder_inputs
, sequence_length=self.encoder_input_sequence_length, dtype=tf.float32)
print("encoder_final_state shpe")
# final_state_c=tf.concat([encoder_final_state[0][-1].c,encoder_final_state[1][-1].c],1)
# final_state_h=tf.concat([encoder_final_state[0][-1].h,encoder_final_state[1][-1].h],1)
# encoder_final_state=tf.contrib.rnn.LSTMStateTuple(c=final_state_c,
# h=final_state_h)
encoder_final_state=encoder_final_state[0][-1]
print(encoder_final_state)
with tf.name_scope('dense_layer'):
output_layer = tf.layers.Dense(english_vocab_size,
kernel_initializer=tf.truncated_normal_initializer(
mean=0.0, stddev=0.1
))
# training decoder
with tf.name_scope('decoder'), tf.variable_scope('decode'):
decoder_inputs=self.decode_inputs_preprocess(self.decoder_targets,english_id_matrix)
print('decoder_inputs shape before embedded')
print(decoder_inputs.shape)
decoder_inputs = tf.nn.embedding_lookup(english_embeddings,decoder_inputs)
print('decoder inputs shape after embedded')
print(decoder_inputs.shape)
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
print("training logits shape")
print(training_logits.shape)
# predicting decoder
with tf.variable_scope('decode', reuse=True):
start_tokens = tf.tile(tf.constant([english_id_matrix.index('<go>')], dtype=tf.int32),
[self.batch_size], name='start_tokens')
predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(english_embeddings,
start_tokens,
english_id_matrix.index('<eos>'))
predicting_decoder = tf.contrib.seq2seq.BasicDecoder(defw_rnn,
predicting_helper,
encoder_final_state,
output_layer)
predicting_decoder_output, _, predicting_decoder_output_length =\
tf.contrib.seq2seq.dynamic_decode(
predicting_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
self.predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions')
print("predicting logits shape")
print(self.predicting_logits.shape)
masks = tf.sequence_mask(self.decoder_targets_length, self.FLAGS.max_len, dtype=tf.float32, name='masks')
with tf.variable_scope('optimization'), tf.name_scope('optimization'):
# Loss
self.cost = tf.contrib.seq2seq.sequence_loss(training_logits, self.decoder_targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(self.FLAGS.alpha)
# Gradient Clipping
gradients = optimizer.compute_gradients(self.cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
self.train_op = optimizer.apply_gradients(capped_gradients)
Well……I've figured out.The problem happened because I only sent the final state of the encoder to a decoder.

Keras int_shape returns None in custom loss function

My try to obtain the batch size within a custom loss function using K.int_shape() demonstrated by the code below.
from keras import layers, Input, Model
import keras.backend as K
import numpy as np
train_X=np.random.random([100, 5])
train_Y=train_X.sum(axis=1)
inputs=Input(shape=(5,), dtype='float32', name='posts')
outputs=layers.Dense(1, activation='relu')(inputs)
model = Model(inputs, outputs)#, net_qc])
model.summary()
def myloss(y_true, y_pred):
n=K.int_shape(y_pred)[0]
return K.sum(y_pred)/n
model.compile(optimizer='adam', loss=myloss)
model.fit(train_X, train_Y, epochs=10, batch_size=10)
The error message below suggest K.int_shape returns None. I have tried several things without success, would really appreciate some helps.
Traceback (most recent call last):
File "./test_intshape.py", line 21, in <module>
model.compile(optimizer='adam', loss=myloss)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/keras/engine/training.py", line 830, in compile
sample_weight, mask)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/keras/engine/training.py", line 429, in weighted
score_array = fn(y_true, y_pred)
File "./test_intshape.py", line 19, in myloss
return K.sum(y_pred)/n
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 820, in binary_op_wrapper
y = ops.convert_to_tensor(y, dtype=x.dtype.base_dtype, name="y")
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 639, in convert_to_tensor
as_ref=False)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 704, in internal_convert_to_tensor
ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/constant_op.py", line 113, in _constant_tensor_conversion_function
return constant(v, dtype=dtype, name=name)
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/constant_op.py", line 102, in constant
tensor_util.make_tensor_proto(value, dtype=dtype, shape=shape, verify_shape=verify_shape))
File "/home/ubuntu/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages/tensorflow/python/framework/tensor_util.py", line 360, in make_tensor_proto
raise ValueError("None values not supported.")
ValueError: None values not supported.
That is the expected behaviour because K.int_shape() doesn't return a symbolic tensor but the current known shape. Well you would only know the batch size at runtime and when constructing the graph it will be None. What you are looking for is K.shape() instead which will return the symbolic tensor that will have the batch size set at runtime, ie:
n = K.shape(y_pred)[0]

Tensorflow 1.4 Bidirectional RNN not working as expected

I am trying to use Bidirectional RNN and pass the output through a CNN for text classification. However, I am getting all sorts of shape errors with bidirectional RNN. Although, If I use two dynamic rnn with reverse op in the second layer, it appears to work fine:
Here is bidirectional RNN code that DOES NOT work for me:
# Bidirectional LSTM layer
with tf.name_scope("bidirectional-lstm"):
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
self.lstm_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
lstm_fw_cell,
lstm_bw_cell,
self.embedded_chars,
sequence_length=self.seqlen,
dtype=tf.float32)
self.lstm_outputs = tf.concat(self.lstm_outputs, axis=2)
Here is the two layer dynamic rnn that DOES work for me:
# Bidirectional LSTM layer
with tf.name_scope("bidirectional-lstm"):
lstm_fw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
lstm_bw_cell = tf.nn.rnn_cell.BasicLSTMCell(hidden_size, forget_bias=1.0)
with tf.variable_scope("lstm-output-fw"):
self.lstm_outputs_fw, _ = tf.nn.dynamic_rnn(
lstm_fw_cell,
self.embedded_chars,
sequence_length=self.seqlen,
dtype=tf.float32)
with tf.variable_scope("lstm-output-bw"):
self.embedded_chars_rev = array_ops.reverse_sequence(self.embedded_chars, seq_lengths=self.seqlen, seq_dim=1)
tmp, _ = tf.nn.dynamic_rnn(
lstm_bw_cell,
self.embedded_chars_rev,
sequence_length=self.seqlen,
dtype=tf.float32)
self.lstm_outputs_bw = array_ops.reverse_sequence(tmp, seq_lengths=self.seqlen, seq_dim=1)
Concatenate outputs
self.lstm_outputs = tf.add(self.lstm_outputs_fw, self.lstm_outputs_bw, name="lstm_outputs")
What am I doing wrong with bidirectional RNN ?
I am passing the output of this to CNN and error occurs when computing the
Here is the rest of the code:
# Convolution + maxpool layer for each filter size
pooled_outputs = []
for i, filter_size in enumerate(filter_sizes):
with tf.name_scope("conv-maxpool-%s" % filter_size):
# Convolution Layer
filter_shape = [filter_size, hidden_size, 1, num_filters]
W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W")
b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b")
conv = tf.nn.conv2d(
self.lstm_outputs_expanded,
W,
strides=[1, 1, 1, 1],
padding="VALID",
name="conv")
# Apply nonlinearity
h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu")
# Maxpooling over the outputs
pooled = tf.nn.max_pool(
h,
ksize=[1, sequence_length - filter_size + 1, 1, 1],
strides=[1, 1, 1, 1],
padding='VALID',
name="pool")
pooled_outputs.append(pooled)
# Combine all the pooled features
num_filters_total = num_filters * len(filter_sizes)
self.h_pool = tf.concat(axis=3, values=pooled_outputs)
self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total])
# Dropout layer
with tf.name_scope("dropout"):
self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob)
# Final (unnormalized) scores and predictions
with tf.name_scope("output"):
# Standard output weights initialization
W = tf.get_variable(
"W",
shape=[num_filters_total, num_classes],
initializer=tf.contrib.layers.xavier_initializer())
b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b")
# # Initialized output weights to 0.0, might improve accuracy
# W = tf.Variable(tf.constant(0.0, shape=[num_filters_total, num_classes]), name="W")
# b = tf.Variable(tf.constant(0.0, shape=[num_classes]), name="b")
l2_loss += tf.nn.l2_loss(W)
l2_loss += tf.nn.l2_loss(b)
self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores")
self.predictions = tf.argmax(self.scores, 1, name="predictions")
# Calculate mean cross-entropy loss
with tf.name_scope("loss"):
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss
# Accuracy
with tf.name_scope("accuracy"):
correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1))
self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
And here is the error message:
Traceback (most recent call last):
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1323, in _do_call
return fn(*args)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1302, in _run_fn
status, run_metadata)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/errors_impl.py", line 473, in __exit__
c_api.TF_GetCode(self.status.status))
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be same size: logits_size=[7550,2] labels_size=[50,2]
[[Node: loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](loss/Reshape, loss/Reshape_1)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "train_upgraded.py", line 209, in <module>
train_step(x_batch, seqlen_batch, y_batch)
File "train_upgraded.py", line 177, in train_step
feed_dict)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 889, in run
run_metadata_ptr)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1120, in _run
feed_dict_tensor, options, run_metadata)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1317, in _do_run
options, run_metadata)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1336, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: logits and labels must be same size: logits_size=[7550,2] labels_size=[50,2]
[[Node: loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](loss/Reshape, loss/Reshape_1)]]
Caused by op 'loss/SoftmaxCrossEntropyWithLogits', defined at:
File "train_upgraded.py", line 87, in <module>
l2_reg_lambda=FLAGS.l2_reg_lambda)
File "/media/hemant/MVV/MyValueVest-local/learning/Initial Embeddings/STEP 2 lstm-context-embeddings-master/model_upgraded.py", line 138, in __init__
losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py", line 1783, in softmax_cross_entropy_with_logits
precise_logits, labels, name=name)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 4364, in _softmax_cross_entropy_with_logits
name=name)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2956, in create_op
op_def=op_def)
File "/home/hemant/anaconda3/envs/tf14/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1470, in __init__
self._traceback = self._graph._extract_stack() # pylint: disable=protected-access
InvalidArgumentError (see above for traceback): logits and labels must be same size: logits_size=[7550,2] labels_size=[50,2]
[[Node: loss/SoftmaxCrossEntropyWithLogits = SoftmaxCrossEntropyWithLogits[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](loss/Reshape, loss/Reshape_1)]]
All I had to do was multiply the hidden size by 2 since output size of birrectional RNN is twice that of rnn.
filter_shape = [filter_size, hidden_size*2, 1, num_filters]3
Problem Solved.

Resources