i was following this guide to implement a clustering into a deep model
https://ai-mrkogao.github.io/reinforcement%20learning/clusteringkeras/ but i got two errors :
The first one, on this line
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
it says
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 463, in __call__
self.build(unpack_singleton(input_shapes))
File "<stdin>", line 14, in build
TypeError: add_weight() got multiple values for argument 'name'
So i located
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform', name='clusters')
from the class ClusteringLayer and i removed name='clusters' as follow
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
But now it keeps giving me the following error
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 463, in __call__
self.build(unpack_singleton(input_shapes))
File "<stdin>", line 14, in build
File "/usr/local/lib/python3.6/dist-packages/keras/engine/base_layer.py", line 282, in add_weight
constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 620, in variable
value, dtype=dtype, name=name, constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/backend.py", line 782, in variable
constraint=constraint)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/variables.py", line 263, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py", line 460, in __init__
shape=shape)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/resource_variable_ops.py", line 582, in _init_from_args
if init_from_fn else [initial_value]) as name:
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 6513, in __enter__
return self._name_scope.__enter__()
File "/usr/lib/python3.6/contextlib.py", line 81, in __enter__
return next(self.gen)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 4306, in name_scope
if not _VALID_SCOPE_NAME_REGEX.match(name):
TypeError: expected string or bytes-like object
Here is the complete and reproducible code
import keras.backend as K
from keras.engine.topology import Layer, InputSpec
from keras.layers import Dense, Input
from keras.models import Model
from keras.optimizers import SGD
from keras import callbacks
from keras.initializers import VarianceScaling
from sklearn.cluster import KMeans
def autoencoder(dims, act='relu', init='glorot_uniform'):
n_stacks = len(dims) - 1
input_img = Input(shape=(dims[0],), name='input')
x = input_img
for i in range(n_stacks-1):
x = Dense(dims[i + 1], activation=act, kernel_initializer=init, name='encoder_%d' % i)(x)
encoded = Dense(dims[-1], kernel_initializer=init, name='encoder_%d' % (n_stacks - 1))(x) # hidden layer, features are extracted from here
x = encoded
for i in range(n_stacks-1, 0, -1):
x = Dense(dims[i], activation=act, kernel_initializer=init, name='decoder_%d' % i)(x)
x = Dense(dims[0], kernel_initializer=init, name='decoder_0')(x)
decoded = x
return Model(inputs=input_img, outputs=decoded, name='AE'), Model(inputs=input_img, outputs=encoded, name='encoder')
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x = np.concatenate((x_train, x_test))
y = np.concatenate((y_train, y_test))
x = x.reshape((x.shape[0], -1))
x = np.divide(x, 255.)
x_train.shape
x.shape
n_clusters = len(np.unique(y))
kmeans = KMeans(n_clusters=n_clusters, n_init=20, n_jobs=4)
y_pred_kmeans = kmeans.fit_predict(x)
y_pred_kmeans[:10]
dims = [x.shape[-1], 500, 500, 2000, 10]
init = VarianceScaling(scale=1. / 3., mode='fan_in',distribution='uniform')
pretrain_optimizer = SGD(lr=1, momentum=0.9)
autoencoder, encoder = autoencoder(dims, init=init)
autoencoder.compile(optimizer=pretrain_optimizer, loss='mse')
autoencoder.fit(x, x, batch_size=250, epochs=2) #, callbacks=cb)
autoencoder.save_weights( 'ae_weights.h5')
class ClusteringLayer(Layer):
def __init__(self, n_clusters, weights=None, alpha=1.0, **kwargs):
if 'input_shape' not in kwargs and 'input_dim' in kwargs:
kwargs['input_shape'] = (kwargs.pop('input_dim'),)
super(ClusteringLayer, self).__init__(**kwargs)
self.n_clusters = n_clusters
self.alpha = alpha
self.initial_weights = weights
self.input_spec = InputSpec(ndim=2)
def build(self, input_shape):
assert len(input_shape) == 2
input_dim = input_shape[1]
self.input_spec = InputSpec(dtype=K.floatx(), shape=(None, input_dim))
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
if self.initial_weights is not None:
self.set_weights(self.initial_weights)
del self.initial_weights
self.built = True
def call(self, inputs, **kwargs):
q = 1.0 / (1.0 + (K.sum(K.square(K.expand_dims(inputs, axis=1) - self.clusters), axis=2) / self.alpha))
q **= (self.alpha + 1.0) / 2.0
q = K.transpose(K.transpose(q) / K.sum(q, axis=1)) # Make sure each sample's 10 values add up to 1.
return q
def compute_output_shape(self, input_shape):
assert input_shape and len(input_shape) == 2
return input_shape[0], self.n_clusters
def get_config(self):
config = {'n_clusters': self.n_clusters}
base_config = super(ClusteringLayer, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
clustering_layer = ClusteringLayer(n_clusters, name='clustering')(encoder.output)
model = Model(inputs=encoder.input, outputs=clustering_layer)
model.compile(optimizer=SGD(0.01, 0.9), loss='kld')
model.get_layer(name='clustering').set_weights([kmeans.cluster_centers_])
encoder.output
clustering_layer
784 image input -> 10 classification
The result of
from tensorflow.python import keras
print(keras.__version__)
is
2.2.4-tf
You can even download the docker with keras installation at repbioinfo/autoencoderforclustering
I have solved the error, just correct these line
self.clusters = self.add_weight(shape = (self.n_clusters, input_dim),
initializer='glorot_uniform', name='clusters')
instead of
self.clusters = self.add_weight((self.n_clusters, input_dim), initializer='glorot_uniform')
Related
I am trying to write a custom loss function as follows.
def vgg16_feature_model(flayers, weights='imagenet'):
"""
Feature exctraction VGG16 model.
# Arguments
flayers: list of strings with names of layers to get the features for.
The length of `flayers` should be > 1, otherwise the output shape
is one axis less.
weights: ether "imagenet" or path to the file with weights.
# Returns
features_model: keras.models.Model instance to extract the features.
# Raises
AssertionError: in case of `flayers` is not a list.
AssertionError: in case of length of 'flayers' < 2.
"""
assert isinstance(flayers,list), "First argument 'flayers' must be a list"
assert len(flayers) > 1, "Length of 'flayers' must be > 1."
base_model = VGG16(include_top=False, weights=weights)
vgg16_outputs = [base_model.get_layer(flayers[i]).output for i in range(len(flayers))]
features_model = Model(inputs=[base_model.input], outputs=vgg16_outputs, name='vgg16_features')
features_model.trainable = False
features_model.compile(loss='mse', optimizer='adam')
return features_model
# Losses:
# -------
def total_loss(mask, vgg16_weights='imagenet'):
"""
Total loss defined in Eq 7 of Liu et al 2018 with:
y_true = I_gt,
y_pred = I_out,
y_comp = I_comp.
"""
vgg16_lnames = ['block1_pool', 'block2_pool', 'block3_pool']
vgg_model = vgg16_feature_model(vgg16_lnames, weights=vgg16_weights)
def loss(y_true, y_pred):
mask_inv = 1 - mask
y_comp = mask * y_true + mask_inv * y_pred
print("y_pred", y_pred)
print(y_comp)
input()
vgg_out = vgg_model(y_pred)
vgg_gt = vgg_model(y_true)
print("abc-----------------------------------")
vgg_comp = vgg_model(y_comp)
print("abc")
l_valid = loss_per_pixel(y_true, y_pred, mask)
l_hole = loss_per_pixel(y_true, y_pred, mask_inv)
l_perc = loss_perc(vgg_out, vgg_gt, vgg_comp)
l_style = loss_style(vgg_out, vgg_gt, vgg_comp)
l_tv = loss_tv(y_comp, mask_inv)
return l_valid + 6.*l_hole + 0.05*l_perc + 120.*l_style + 0.1*l_tv
return loss
I am getting an error as
Traceback (most recent call last):
File "inpainter_main.py", line 46, in <module>
model = pconv_model(lr=LR_STAGE1, image_size=IMAGE_SIZE, vgg16_weights=VGG16_WEIGHTS)
File "/home/bitsy-chuck/Downloads/PConv2D-2ndimp/inpainter_utils/pconv2d_model.py", line 118, in pconv_model
model.compile(Adam(lr=lr), loss=total_loss(mask_input, vgg16_weights=vgg16_weights))
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py", line 446, in compile
self._compile_weights_loss_and_weighted_metrics()
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/training/tracking/base.py", line 456, in _method_wrapper
result = method(self, *args, **kwargs)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py", line 1515, in _compile_weights_loss_and_weighted_metrics
self.total_loss = self._prepare_total_loss(masks)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/training_v1.py", line 1575, in _prepare_total_loss
per_sample_losses = loss_fn.call(y_true, y_pred)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/losses.py", line 246, in call
return self.fn(y_true, y_pred, **self._fn_kwargs)
File "/home/bitsy-chuck/Downloads/PConv2D-2ndimp/inpainter_utils/pconv2d_loss.py", line 58, in loss
vgg_comp = vgg_model(y_comp)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_v1.py", line 737, in __call__
base_layer_utils.create_keras_history(inputs)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 186, in create_keras_history
_, created_layers = _create_keras_history_helper(tensors, set(), [])
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 249, in _create_keras_history_helper
layer_inputs, processed_ops, created_layers)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/engine/base_layer_utils.py", line 246, in _create_keras_history_helper
constants[i] = backend.function([], op_input)([])
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/backend.py", line 3632, in __call__
run_metadata=self.run_metadata)
File "/home/bitsy-chuck/anaconda3/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1472, in __call__
run_metadata_ptr)
tensorflow.python.framework.errors_impl.InvalidArgumentError: You must feed a value for placeholder tensor 'pconv2d_dec_16_target' with dtype float and shape [?,?,?,?]
[[{{node pconv2d_dec_16_target}}]]
I first thought that y_comp is not correct, but
y_pred ---> Tensor("pconv2d_dec_16/BiasAdd:0", shape=(None, 512, 512, 3), dtype=float32)
y_comp ---> Tensor("loss_1/pconv2d_dec_16_loss/add:0", shape=(None, 512, 512, 3), dtype=float32)
They both appear the same to me and it should work, according to me.
error is at line vgg_comp = vgg_model(y_comp)
Can anyone also explain why am I getting an error of placeholder?
Tf version 1.3
keras 2.2.4
placeholder errors are usually due to tensorflow versions. I had the exact same error and it was fixed when I installed keras first and then tensorflow first. Using anaconda might help as they cache all the files when you uninstall so it is easy to install again without having to download the entire thing again.
There might be some other fix, I believe, but this fixed mine.
I am trying to use tf.keras.utils.Sequence object as input to my keras model so,that I can apply augmentations that are not available in tensorflow using albumentations library. But I am getting error while doing so. (The image pre-processing operations mentioned here are just for clarity)
import albumentations as A
from tensorflow.keras.utils import Sequence
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import Dense, Conv2D, Flatten, MaxPool2D, Dropout
from tensorflow.keras.models import Sequential
TRAIN_DIR = os.path.join('..', 'Data', 'PetImages')
def load_data():
list_of_fpaths = glob.glob('../Data/PetImages/Cat/*')
labels = [1] * len(list_of_fpaths)
temp = glob.glob('../Data/PetImages/Dog/*')
list_of_fpaths.extend(temp)
labels.extend([0] * len(temp))
return list_of_fpaths, labels
# Now list of fpaths contain the list of file paths and labels contain
# corresponding labels
class DataSequence(Sequence):
def __init__(self, x_set, y_set, batch_size, augmentations):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.augment = augmentations
def __len__(self):
return int(np.ceil(len(self.x) / float(self.batch_size)))
def __getitem__(self, idx):
batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size]
batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size]
a = np.array([
self.augment(image=plt.imread(file_name))["image"] for file_name in
batch_x
])
b = np.array(batch_y)
return a,b
def get_model(input_shape):
model = Sequential([
Conv2D(8, 3, activation='relu', input_shape=input_shape),
MaxPool2D(2),
Conv2D(16, 3, activation='relu'),
MaxPool2D(2),
Conv2D(32, 3, activation='relu'),
MaxPool2D(2),
Conv2D(32, 3, activation='relu'),
MaxPool2D(2),
Conv2D(32, 3, activation='relu'),
MaxPool2D(2),
Flatten(),
Dense(1024, activation='relu'),
Dropout(0.3),
Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
loss='binary_crossentropy',
metrics=['accuracy']
)
return model
ALBUMENTATIONS_TRAIN = A.Compose([
A.Resize(256, 256),
# A.Resize(512, 512),
A.ToFloat(),
# A.RandomCrop(384, 384, p=0.5),
])
ALBUMENTATIONS_TEST = A.Compose([
A.ToFloat(),
A.Resize(256, 256)
])
X, Y = load_data()
train_gen = DataSequence(X, Y, 16, ALBUMENTATIONS_TRAIN)
model = get_model(input_shape=(256,256,3))
model.fit(train_gen,epochs=100)
The error that I am getting is
17/748 [..............................] - ETA: 1:06 - loss: 0.4304 - accuracy: 0.92282020-07-08 13:25:47.751964: W tensorflow/core/framework/op_kernel.cc:1741] Invalid argument: ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\ops\script_ops.py", line 243, in __call__
ret = func(*args)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 309, in wrapper
return func(*args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 932, in generator_fn
yield x[i]
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 40, in __getitem__
a = np.array([
ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 89, in <module>
model.fit(train_gen,epochs=100)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py", line 66, in _method_wrapper
return method(self, *args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\training.py", line 848, in fit
tmp_logs = train_function(iterator)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\def_function.py", line 580, in __call__
result = self._call(*args, **kwds)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\def_function.py", line 611, in _call
return self._stateless_fn(*args, **kwds) # pylint: disable=not-callable
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 2420, in __call__
return graph_function._filtered_call(args, kwargs) # pylint: disable=protected-access
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 1661, in _filtered_call
return self._call_flat(
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 1745, in _call_flat
return self._build_call_outputs(self._inference_function.call(
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\function.py", line 593, in call
outputs = execute.execute(
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\eager\execute.py", line 59, in quick_execute
tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: 2 root error(s) found.
(0) Invalid argument: ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\ops\script_ops.py", line 243, in __call__
ret = func(*args)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 309, in wrapper
return func(*args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 932, in generator_fn
yield x[i]
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 40, in __getitem__
a = np.array([
ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
[[{{node PyFunc}}]]
[[IteratorGetNext]]
[[IteratorGetNext/_4]]
(1) Invalid argument: ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
Traceback (most recent call last):
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\ops\script_ops.py", line 243, in __call__
ret = func(*args)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\autograph\impl\api.py", line 309, in wrapper
return func(*args, **kwargs)
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\data\ops\dataset_ops.py", line 785, in generator_py_func
values = next(generator_state.get_iterator(iterator_id))
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 801, in wrapped_generator
for data in generator_fn():
File "C:\Users\aksha\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\keras\engine\data_adapter.py", line 932, in generator_fn
yield x[i]
File "D:/ACAD/TENSORFLOW/Rough/data_aug_pipeline.py", line 40, in __getitem__
a = np.array([
ValueError: could not broadcast input array from shape (256,256,3) into shape (256,256)
[[{{node PyFunc}}]]
[[IteratorGetNext]]
0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_1195]
Function call stack:
train_function -> train_function
Process finished with exit code 1
Please help me to understand what mistake I am making.
Base on the error messages, there is at least one grayscale image in your dataset that was resize to 256x256 and thus cannot fit into your network.
I'm trying to run a classifier on Chainer, but failed due to the following error.
I have no idea about the error, because I confirmed that the iterator actually sent a batch to the trainer.
Is there a problem with the neural network model? Or, the way the data has been fed into the model is wrong?
Input.py
from chainer.datasets import split_dataset_random
from chainer.iterators import SerialIterator
from chainer.optimizers import Adam
from chainer.training import Trainer
from chainer.training.updaters import StandardUpdater
from chainer import functions as F, links as L
from chainer import Sequential
import numpy as np
batch_size = 3
X_train = np.ones((9957, 60, 80, 3), dtype=np.float32)
X_train, _ = split_dataset_random(X_train, 8000, seed=0)
train_iter = SerialIterator(X_train, batch_size)
model = Sequential(
L.Convolution2D(None, 64, 3, 2),
F.relu,
L.Convolution2D(64, 32, 3, 2),
F.relu,
L.Linear(None, 16),
F.dropout,
L.Linear(16, 4)
)
model_loss = L.Classifier(model)
optimizer = Adam()
optimizer.setup(model_loss)
updater = StandardUpdater(train_iter, optimizer)
trainer = Trainer(updater, (25, 'epoch'))
trainer.run()
Stacktrace.py
Exception in main training loop: forward() missing 1 required positional argument: 'x'
Traceback (most recent call last):
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 315, in run
update()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
self.update_core()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 181, in update_core
optimizer.update(loss_func, in_arrays)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/optimizer.py", line 680, in update
loss = lossfun(*args, **kwds)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/links/model/classifier.py", line 143, in forward
self.y = self.predictor(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/sequential.py", line 210, in forward
x = layer(*x)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
Will finalize trainer extensions and updater before reraising the exception.
Traceback (most recent call last):
File "/home/user/deploy/aaa.py", line 33, in <module>
trainer.run()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 348, in run
six.reraise(*exc_info)
File "/home/user/miniconda3/lib/python3.7/site-packages/six.py", line 693, in reraise
raise value
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/trainer.py", line 315, in run
update()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 165, in update
self.update_core()
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/training/updaters/standard_updater.py", line 181, in update_core
optimizer.update(loss_func, in_arrays)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/optimizer.py", line 680, in update
loss = lossfun(*args, **kwds)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/links/model/classifier.py", line 143, in forward
self.y = self.predictor(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/sequential.py", line 210, in forward
x = layer(*x)
File "/home/user/miniconda3/lib/python3.7/site-packages/chainer/link.py", line 242, in __call__
out = forward(*args, **kwargs)
TypeError: forward() missing 1 required positional argument: 'x'
Is there a problem with the neural network model or the way the data has been fed into the model? Please let me know if you need to see the whole code
All you had to do is to give a tuple of ndarray and int to the model, because this is the specification of L.Classifier.
Is there a problem with the neural network model? Or, the way the data has been fed into the model is wrong?
Therefore, the absolute answer is "the way the data has been fed in to the model is wrong".
In the following code, I defined a class inheriting DatasetMixin to feed a tuple of ndarray and int. (This is a conventional way how Chainer goes)
It should be noted that the input argument of L.Convolution2D must be an ndarray whose shape is (batch, channel, width, height). So I transpose the array in the dataset.
Solution.py
from chainer.datasets import split_dataset_random
from chainer.iterators import SerialIterator
from chainer.optimizers import Adam
from chainer.training import Trainer
from chainer.training.updaters import StandardUpdater
from chainer import functions as F, links as L
from chainer import Sequential
from chainer.dataset import DatasetMixin
import numpy as np
class MyDataset(DatasetMixin):
def __init__(self, X, labels):
super(MyDataset, self).__init__()
self.X_ = X
self.labels_ = labels
self.size_ = X.shape[0]
def __len__(self):
return self.size_
def get_example(self, i):
return np.transpose(self.X_[i, ...], (2, 0, 1)), self.labels_[i]
batch_size = 3
X_train = np.ones((9957, 60, 80, 3), dtype=np.float32)
label_train = np.random.randint(0, 4, (9957,), dtype=np.int32)
dataset = MyDataset(X_train, label_train)
dataset_train, _ = split_dataset_random(dataset, 8000, seed=0)
train_iter = SerialIterator(dataset_train, batch_size)
model = Sequential(
L.Convolution2D(None, 64, 3, 2),
F.relu,
L.Convolution2D(64, 32, 3, 2),
F.relu,
L.Linear(None, 16),
F.dropout,
L.Linear(16, 4)
)
model_loss = L.Classifier(model)
optimizer = Adam()
optimizer.setup(model_loss)
updater = StandardUpdater(train_iter, optimizer)
trainer = Trainer(updater, (25, 'epoch'))
trainer.run()
I make a seq2seq model using tensorflow and meet a problem that my program throws an error when I use MultiRNNCell in tf.contrib.seq2seq.dynamic_decode.
The problem happens over here:
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
When I run this code,the console shows this Error message:
C:\Users\TopView\AppData\Local\Programs\Python\Python36\python.exe E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn.py:417: calling reverse_sequence (from tensorflow.python.ops.array_ops) with seq_dim is deprecated and will be removed in a future version.
Instructions for updating:
seq_dim is deprecated, use seq_axis instead
WARNING:tensorflow:From C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\util\deprecation.py:432: calling reverse_sequence (from tensorflow.python.ops.array_ops) with batch_dim is deprecated and will be removed in a future version.
Instructions for updating:
batch_dim is deprecated, use batch_axis instead
encoder_final_state shpe
LSTMStateTuple(c=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_5:0' shape=(?, 24) dtype=float32>, h=<tf.Tensor 'encoder/bidirectional_rnn/fw/fw/while/Exit_6:0' shape=(?, 24) dtype=float32>)
decoder_inputs shape before embedded
(128, 10)
decoder inputs shape after embedded
(128, 10, 5)
Traceback (most recent call last):
File "E:/PycharmProject/cikm_transport/CIKM/CIKM/translate_model/train.py", line 14, in <module>
len(embedding_matrix['embedding'][0]))
File "E:\PycharmProject\cikm_transport\CIKM\CIKM\translate_model\model.py", line 109, in __init__
maximum_iterations=self.FLAGS.max_len)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 323, in dynamic_decode
swap_memory=swap_memory)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3209, in while_loop
result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2941, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2878, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 3179, in <lambda>
body = lambda i, lv: (i + 1, orig_body(*lv))
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\decoder.py", line 266, in body
decoder_finished) = decoder.step(time, inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\contrib\seq2seq\python\ops\basic_decoder.py", line 137, in step
cell_outputs, cell_state = self._cell(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 232, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1325, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 339, in __call__
*args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 846, in call
(c_prev, m_prev) = state
File "C:\Users\TopView\AppData\Local\Programs\Python\Python36\lib\site-packages\tensorflow\python\framework\ops.py", line 436, in __iter__
"Tensor objects are not iterable when eager execution is not "
TypeError: Tensor objects are not iterable when eager execution is not enabled. To iterate over this tensor use tf.map_fn.
Process finished with exit code 1
But when I change the instance of defw_rnn, make it a single RNN instance like LSTMCell, the Error disappears:
defw_rnn=tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
And the code works well. However, I've found that most of the code about seq2seq model on the Internet using MultiRNNCell and they also use tensorflow, so it really confuse me that what is wrong with my program.
Here is the entire code:
import tensorflow as tf
import numpy as np
class Seq2SeqModel(object):
def bw_fw_rnn(self):
with tf.name_scope("forward_rnn"):
fw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
fw = tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=self.FLAGS.keep_prob)
with tf.name_scope("backward_rnn"):
bw = tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer) for _ in
range(self.FLAGS.rnn_layer_size)])
bw = tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=self.FLAGS.keep_prob)
return (fw, bw)
def decode_inputs_preprocess(self, data, id_matrix):
ending=tf.strided_slice(data,[0,0],[self.batch_size,-1],[1,1])
decoder_input=tf.concat([tf.fill([self.batch_size,1],id_matrix.index('<go>')),ending],1)
return decoder_input
def __init__(self, FLAGS, english_id_matrix, spanish_id_matrix, english_vocab_size,spanish_vocab_size, embedding_size):
self.FLAGS = FLAGS
self.english_vocab_size = english_vocab_size
self.embedding_size = embedding_size
self.encoder_input = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='encoder_inputs')
self.decoder_targets = tf.placeholder(shape=[None, self.FLAGS.max_len], dtype=tf.int32, name='decoder_targets')
self.encoder_input_sequence_length = tf.placeholder(shape=[None], dtype=tf.int32, name='encoder_inputs_length')
self.decoder_targets_length = tf.placeholder(shape=[None], dtype=tf.int32, name='decoder_targets_length')
self.batch_size = self.FLAGS.batch_size
with tf.name_scope('embedding_look_up'):
spanish_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
english_embeddings = tf.Variable(
tf.random_uniform([english_vocab_size,
embedding_size], -1.0, 1.0),
dtype=tf.float32)
self.spanish_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.english_embeddings_inputs = tf.placeholder(
dtype=tf.float32, shape=[english_vocab_size, embedding_size],
name='spanish_embeddings_inputs')
self.spanish_embeddings_inputs_op = spanish_embeddings.assign(self.spanish_embeddings_inputs)
self.english_embeddings_inputs_op = english_embeddings.assign(self.english_embeddings_inputs)
encoder_inputs = tf.nn.embedding_lookup(spanish_embeddings, self.encoder_input)
with tf.name_scope('encoder'):
enfw_rnn, enbw_rnn = self.bw_fw_rnn()
encoder_outputs, encoder_final_state = \
tf.nn.bidirectional_dynamic_rnn(enfw_rnn, enbw_rnn, encoder_inputs
, sequence_length=self.encoder_input_sequence_length, dtype=tf.float32)
print("encoder_final_state shpe")
# final_state_c=tf.concat([encoder_final_state[0][-1].c,encoder_final_state[1][-1].c],1)
# final_state_h=tf.concat([encoder_final_state[0][-1].h,encoder_final_state[1][-1].h],1)
# encoder_final_state=tf.contrib.rnn.LSTMStateTuple(c=final_state_c,
# h=final_state_h)
encoder_final_state=encoder_final_state[0][-1]
print(encoder_final_state)
with tf.name_scope('dense_layer'):
output_layer = tf.layers.Dense(english_vocab_size,
kernel_initializer=tf.truncated_normal_initializer(
mean=0.0, stddev=0.1
))
# training decoder
with tf.name_scope('decoder'), tf.variable_scope('decode'):
decoder_inputs=self.decode_inputs_preprocess(self.decoder_targets,english_id_matrix)
print('decoder_inputs shape before embedded')
print(decoder_inputs.shape)
decoder_inputs = tf.nn.embedding_lookup(english_embeddings,decoder_inputs)
print('decoder inputs shape after embedded')
print(decoder_inputs.shape)
defw_rnn=tf.nn.rnn_cell.MultiRNNCell([
tf.nn.rnn_cell.LSTMCell(num_units=self.FLAGS.rnn_units,
initializer=tf.orthogonal_initializer)
for _ in range(self.FLAGS.rnn_layer_size)])
training_helper = tf.contrib.seq2seq.TrainingHelper(inputs=decoder_inputs,
sequence_length=self.decoder_targets_length,
time_major=False)
training_decoder = \
tf.contrib.seq2seq.BasicDecoder(
defw_rnn, training_helper,
encoder_final_state,
output_layer)
training_decoder_output, _, training_decoder_output_length = \
tf.contrib.seq2seq.dynamic_decode(
training_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
training_logits = tf.identity(training_decoder_output.rnn_output, 'logits')
print("training logits shape")
print(training_logits.shape)
# predicting decoder
with tf.variable_scope('decode', reuse=True):
start_tokens = tf.tile(tf.constant([english_id_matrix.index('<go>')], dtype=tf.int32),
[self.batch_size], name='start_tokens')
predicting_helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(english_embeddings,
start_tokens,
english_id_matrix.index('<eos>'))
predicting_decoder = tf.contrib.seq2seq.BasicDecoder(defw_rnn,
predicting_helper,
encoder_final_state,
output_layer)
predicting_decoder_output, _, predicting_decoder_output_length =\
tf.contrib.seq2seq.dynamic_decode(
predicting_decoder,
impute_finished=True,
maximum_iterations=self.FLAGS.max_len)
self.predicting_logits = tf.identity(predicting_decoder_output.sample_id, name='predictions')
print("predicting logits shape")
print(self.predicting_logits.shape)
masks = tf.sequence_mask(self.decoder_targets_length, self.FLAGS.max_len, dtype=tf.float32, name='masks')
with tf.variable_scope('optimization'), tf.name_scope('optimization'):
# Loss
self.cost = tf.contrib.seq2seq.sequence_loss(training_logits, self.decoder_targets, masks)
# Optimizer
optimizer = tf.train.AdamOptimizer(self.FLAGS.alpha)
# Gradient Clipping
gradients = optimizer.compute_gradients(self.cost)
capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) for grad, var in gradients if grad is not None]
self.train_op = optimizer.apply_gradients(capped_gradients)
Well……I've figured out.The problem happened because I only sent the final state of the encoder to a decoder.
Traceback (most recent call last):
File "train_rnn.py", line 92, in <module>
batch_size=FLAGS.batch_size)
File "/home/iit/sourab/conv_extractive/codes/cnn-text-classification-tf/rnn_code/text_rnn.py", line 65, in __init__
initial_state_bw=self.rnn_tuple_state_bw)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 375, in bidirectional_dynamic_rnn
time_major=time_major, scope=fw_scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 574, in dynamic_rnn
dtype=dtype)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 737, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2770, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2599, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2549, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 722, in _time_step
(output, new_state) = call_cell()
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn.py", line 708, in <lambda>
call_cell = lambda: cell(input_t, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 916, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 752, in __call__
output, new_state = self._cell(inputs, state, scope)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 441, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 383, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 1017, in _linear
initializer=kernel_initializer)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "/home/iit/.local/lib/python3.5/site-packages/tensorflow/python/ops/variable_scope.py", line 669, in _get_single_variable
found_var.get_shape()))
**ValueError: Trying to share variable bidirectional_rnn/fw/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (1024, 2048) and found shape (640, 2048).**
Below I pass parameters to the constructor of the TextRNN class.
rnn = TextRNN(
sequence_size=x.shape[1],
truncated_backprop_length=FLAGS.truncated_backprop_length,
state_size=FLAGS.state_size,
num_classes=y.shape[1],
vocab_size=len(vocab_processor.vocabulary_),
embedding_size=FLAGS.embedding_dim,
num_layers=FLAGS.num_layers,
batch_size=FLAGS.batch_size)
Here, is the TextRNN class:
from __future__ import print_function, division
import tensorflow as tf
import numpy as np
import os
import sys
class TextRNN(object):
"""
An RNN for text classification
Uses an embedding layer followed by multilayered Bi-Directional LSTMs followed by a softmax layer
"""
def __init__(
self, sequence_size, truncated_backprop_length, state_size, num_classes, vocab_size, embedding_size, num_layers, batch_size):
#placeholders for input, output and dropout probability
self.input_x = tf.placeholder(tf.int32, [batch_size, truncated_backprop_length, sequence_size], name="input_x")
self.input_y = tf.placeholder(tf.float32, [batch_size,truncated_backprop_length, num_classes], name="input_y")
self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
self.fw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
self.bw_init_state = tf.placeholder(tf.float32, [num_layers, 2, batch_size, state_size])
# Embedding layer
with tf.device('/cpu:0'), tf.name_scope("embedding"):
self.W = tf.Variable(
tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
name="W")
embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x)
#embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1)
self.embedded_chars_expanded = tf.reduce_mean(embedded_chars, axis=2)
state_per_layer_list_fw = tf.unstack(self.fw_init_state, axis=0)
self.rnn_tuple_state_fw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_fw[idx][0,:,:], state_per_layer_list_fw[idx][1,:,:])
for idx in range(num_layers)]
)
state_per_layer_list_bw = tf.unstack(self.bw_init_state, axis=0)
self.rnn_tuple_state_bw = tuple(
[tf.contrib.rnn.LSTMStateTuple(state_per_layer_list_bw[idx][0,:,:], state_per_layer_list_bw[idx][1,:,:])
for idx in range(num_layers)]
)
W2 = tf.Variable(np.random.rand(2*state_size, num_classes), dtype=tf.float32)
b2 = tf.Variable(np.random.rand(1,num_classes), dtype=tf.float32)
with tf.name_scope('BiMultiLSTM'):
with tf.name_scope('forward_cell'):
cell_fw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_fw = tf.contrib.rnn.DropoutWrapper(cell_fw, output_keep_prob=self.dropout_keep_prob)
cell_fw = tf.contrib.rnn.MultiRNNCell([cell_fw] * num_layers, state_is_tuple=True)
with tf.name_scope('Backward_cell'):
cell_bw = tf.contrib.rnn.BasicLSTMCell( num_units=state_size, state_is_tuple=True)
cell_bw = tf.contrib.rnn.DropoutWrapper(cell_bw, output_keep_prob=self.dropout_keep_prob)
cell_bw = tf.contrib.rnn.MultiRNNCell([cell_bw] * num_layers, state_is_tuple=True)
self.output_hidden_states, self.current_states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
inputs=self.embedded_chars_expanded,
initial_state_fw=self.rnn_tuple_state_fw,
initial_state_bw=self.rnn_tuple_state_bw)
self.outputs_concat = tf.concat(self.output_hidden_states, 2)
self.output_series = tf.reshape(self.outputs_concat, [-1, 2*state_size])
self._current_state_fw = self.current_states[0]
self._current_state_bw = self.current_states[1]
#output
with tf.name_scope("output"):
self.logits = tf.matmul(self.output_series, W2) + b2 #Broadcasted addition
self.labels = tf.reshape(self.input_y, [-1, num_classes])
self.logits_series = tf.unstack(tf.reshape(self.logits, [batch_size, truncated_backprop_length, num_classes]), axis=1)
self.predictions_series = [tf.nn.softmax(logit) for logit in self.logits_series]
self.labels_series = tf.unstack(tf.reshape(self.labels, [batch_size, truncated_backprop_length, num_classes]), axis=1)
#loss
with tf.name_scope("loss"):
self.losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.labels)
self.total_loss = tf.reduce_mean(self.losses)
#accuracy
self.correct_predictions = []
with tf.name_scope("accuracy"):
for predictions, labels in zip(self.predictions_series, self.labels_series):
self.correct_predictions.append(tf.equal(tf.argmax(predictions, axis=1), tf.argmax(labels, axis=1)))
self.sum_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, tf.float32))
self.accuracy = tf.reduce_mean(tf.cast(self.correct_predictions, tf.float32))
These are the parameters that I passed:
Parameters:
ALLOW_SOFT_PLACEMENT=True
BATCH_SIZE=50
CHECKPOINT_EVERY=100
DATA_FILE=./../data/cnn_train.txt
DEV_FILE=./../data/cnn_test.txt
DROPOUT_KEEP_PROB=1.0
EMBEDDING_DIM=128
EVALUATE_EVERY=100
LOG_DEVICE_PLACEMENT=False
NUM_CHECKPOINTS=5
NUM_CLASSES=2
NUM_EPOCHS=200
NUM_LAYERS=3
STATE_SIZE=512
TRUNCATED_BACKPROP_LENGTH=10
I searched the net but failed to solve the error. The program works if I give state_size same as embedding_size and gives the above error for all other cases where state_size is not equal to embedding_size.