bert model showing TypeError: Layer input_spec must be an instance of InputSpec. Got: InputSpec(shape=(None, 55, 768), ndim=3) - keras

I am trying to use bert pretrained model for intent classification. here is my code in jupyter notebok.
class DataPreparation:
text_column = "text"
label_column = "intent"
def __init__(self, train, test, tokenizer: FullTokenizer, classes, max_seq_len=192):
self.tokenizer = tokenizer
self.max_seq_len = 0
self.classes = classes
((self.train_x, self.train_y), (self.test_x, self.test_y)) = map(self.prepare_data, [train, test])
print("max seq_len", self.max_seq_len)
self.max_seq_len = min(self.max_seq_len, max_seq_len)
self.train_x, self.test_x = map(self.data_padding, [self.train_x, self.test_x])
def prepare_data(self, df):
x, y = [], []
for _, row in tqdm(df.iterrows()):
text, label = row[DataPreparation.text_column], row[DataPreparation.label_column]
tokens = self.tokenizer.tokenize(text)
tokens = ["[CLS]"] + tokens + ["[SEP]"]
token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
self.max_seq_len = max(self.max_seq_len, len(token_ids))
x.append(token_ids)
y.append(self.classes.index(label))
return np.array(x), np.array(y)
def data_padding(self, ids):
x = []
for input_ids in ids:
input_ids = input_ids[:min(len(input_ids), self.max_seq_len - 2)]
input_ids = input_ids + [0] * (self.max_seq_len - len(input_ids))
x.append(np.array(input_ids))
return np.array(x)
tokenizer = FullTokenizer(vocab_file=os.path.join(bert_ckpt_dir, "vocab.txt"))
def model_defination(max_seq_len, bert_ckpt_file):
with tf.io.gfile.GFile(bert_config_file, "r") as reader:
bc = StockBertConfig.from_json_string(reader.read())
bert_params = map_stock_config_to_params(bc)
bert_params.adapter_size = None
bert = BertModelLayer.from_params(bert_params, name="bert")
input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32',name="input_ids")
bert_output = bert(input_ids)
print("bert shape", bert_output.shape)
cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
cls_out = keras.layers.Dropout(0.5)(cls_out)
logits = keras.layers.Dense(units=768, activation="tanh")(cls_out)
logits = keras.layers.Dropout(0.5)(logits)
logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits)
model = keras.Model(inputs=input_ids, outputs=logits)
model.build(input_shape=(None, max_seq_len))
load_stock_weights(bert, bert_ckpt_file)
return model
classes = train.intent.unique().tolist()
data = DataPreparation(train, test, tokenizer, classes, max_seq_len=128)
data.train_x.shape
data.train_y[0]
model = model_defination(data.max_seq_len, bert_ckpt_file)
Now when I am trying to call the function, I am getting error. The parameter values have max_seq_len = 55, bert_ckpt_file = bert checkpoint file.
when i create the model i am getting the below error:
TypeError Traceback (most recent call last)
<ipython-input-17-af3e534b3882> in <module>
----> 1 model = model_defination(data.max_seq_len, bert_ckpt_file)
<ipython-input-16-a83a622dafe3> in model_defination(max_seq_len, bert_ckpt_file)
9 input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32',name="input_ids")
10 #input_spec = tf.keras.layers.InputSpec(ndim=3)
---> 11 bert_output = bert(input_ids)
12
13 print("bert shape", bert_output.shape)
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
974 # >> model = tf.keras.Model(inputs, outputs)
975 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
--> 976 return self._functional_construction_call(inputs, args, kwargs,
977 input_list)
978
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1112 layer=self, inputs=inputs, build_graph=True, training=training_value):
1113 # Check input assumptions set after layer building, e.g. input shape.
-> 1114 outputs = self._keras_tensor_symbolic_call(
1115 inputs, input_masks, args, kwargs)
1116
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
846 return tf.nest.map_structure(keras_tensor.KerasTensor, output_signature)
847 else:
--> 848 return self._infer_output_signature(inputs, args, kwargs, input_masks)
849
850 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
~\Anaconda3\lib\site-packages\keras\engine\base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
886 self._maybe_build(inputs)
887 inputs = self._maybe_cast_inputs(inputs)
--> 888 outputs = call_fn(inputs, *args, **kwargs)
889
890 self._handle_activity_regularization(inputs, outputs)
~\Anaconda3\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
693 except Exception as e: # pylint:disable=broad-except
694 if hasattr(e, 'ag_error_metadata'):
--> 695 raise e.ag_error_metadata.to_exception(e)
696 else:
697 raise
TypeError: in user code:
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\bert\model.py:80 call *
output = self.encoders_layer(embedding_output, mask=mask, training=training)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:1030 __call__ **
self._maybe_build(inputs)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:2659 _maybe_build
self.build(input_shapes) # pylint:disable=not-callable
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\bert\transformer.py:209 build
self.input_spec = keras.layers.InputSpec(shape=input_shape)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:2777 __setattr__
super(tf.__internal__.tracking.AutoTrackable, self).__setattr__(name, value) # pylint: disable=bad-super-call
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\tensorflow\python\training\tracking\base.py:530 _method_wrapper
result = method(self, *args, **kwargs)
C:\Users\kamrul.moin\Anaconda3\lib\site-packages\keras\engine\base_layer.py:1296 input_spec
raise TypeError('Layer input_spec must be an instance of InputSpec. '
TypeError: Layer input_spec must be an instance of InputSpec. Got: InputSpec(shape=(None, 55, 768), ndim=3)

I have solved the error. It was due to the shape of my training data. I added index as a column in the training data. After reset the index column in the training data i get it right.
The below code solved the error:
train = train.reset_index(drop=True)
test = test.reset_index(drop=True)

Related

How to reconstruct the decoder from an LSTM-AE?

I have a trained LSTM-AE, of which the architecture is as follows:
In brief, I have an LSTM-AE of depth 3, the number of cells on the LSTM layers on the encoder side are [120, 80, 50] (and symmetric for the decoder). I built the model using the code shown on this page. For information, because I want to train the LSTM-AT directly on variable-length time series, so I didn't specify the timestamps in the input layer, which means the model is trained on batches of size 1 (one time series per batch).
I can extract the encoder just fine, but I cannot do the same for the decoder :-(... My goal is to check, given a vector of 50 features (which are extracted by the encoder), whether the decoder can reconstruct the input series.
Here's my attempt so far:
# load the full autoencoder
model = load_model(path_to_model)
# reconstruct the decoder
in_layer = Input(shape=(None, 50))
time_dist = model.layers[-1]
dec_1 = model.layers[-2]
dec_2 = model.layers[-3]
dec_3 = model.layers[-4]
rep_vec = model.layers[-5]
out_layer = time_dist(dec_1(dec_2(dec_3(rep_vec(in_layer)))))
decoder = Model(in_layer, out_layer, name='decoder')
res = decoder(input_feature) # input_feature has shape (50,)
I obtained this error:
InvalidArgumentError: slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: decoder/repeat/strided_slice/
If you are interested in the full error log...
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
Input In [86], in <module>
13 out_layer = time_dist(dec_1(dec_2(dec_3(rep_vec(in_layer)))))
14 decoder = Model(in_layer, out_layer, name='decoder')
---> 15 res = decoder(input_feature)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1030, in Layer.__call__(self, *args, **kwargs)
1026 inputs = self._maybe_cast_inputs(inputs, input_list)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1032 if self._activity_regularizer:
1033 self._handle_activity_regularization(inputs, outputs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:420, in Functional.call(self, inputs, training, mask)
401 #doc_controls.do_not_doc_inheritable
402 def call(self, inputs, training=None, mask=None):
403 """Calls the model on new inputs.
404
405 In this case `call` just reapplies
(...)
418 a list of tensors if there are more than one outputs.
419 """
--> 420 return self._run_internal_graph(
421 inputs, training=training, mask=mask)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:556, in Functional._run_internal_graph(self, inputs, training, mask)
553 continue # Node is not computable, try skipping.
555 args, kwargs = node.map_arguments(tensor_dict)
--> 556 outputs = node.layer(*args, **kwargs)
558 # Update tensor_dict.
559 for x_id, y in zip(node.flat_output_ids, nest.flatten(outputs)):
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1030, in Layer.__call__(self, *args, **kwargs)
1026 inputs = self._maybe_cast_inputs(inputs, input_list)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1032 if self._activity_regularizer:
1033 self._handle_activity_regularization(inputs, outputs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/layers/core.py:919, in Lambda.call(self, inputs, mask, training)
915 return var
917 with backprop.GradientTape(watch_accessed_variables=True) as tape,\
918 variable_scope.variable_creator_scope(_variable_creator):
--> 919 result = self.function(inputs, **kwargs)
920 self._check_variables(created_variables, tape.watched_variables())
921 return result
File D:/PhD/Code/feature_learning/train_models/train_lstmae.py:30, in repeat_vector(args)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206, in add_dispatch_support.<locals>.wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
209 # TypeError, when given unexpected types. So we need to catch both.
210 result = dispatch(wrapper, args, kwargs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1040, in _slice_helper(tensor, slice_spec, var)
1038 var_empty = constant([], dtype=dtypes.int32)
1039 packed_begin = packed_end = packed_strides = var_empty
-> 1040 return strided_slice(
1041 tensor,
1042 packed_begin,
1043 packed_end,
1044 packed_strides,
1045 begin_mask=begin_mask,
1046 end_mask=end_mask,
1047 shrink_axis_mask=shrink_axis_mask,
1048 new_axis_mask=new_axis_mask,
1049 ellipsis_mask=ellipsis_mask,
1050 var=var,
1051 name=name)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206, in add_dispatch_support.<locals>.wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
209 # TypeError, when given unexpected types. So we need to catch both.
210 result = dispatch(wrapper, args, kwargs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1213, in strided_slice(input_, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, var, name)
1210 if strides is None:
1211 strides = ones_like(begin)
-> 1213 op = gen_array_ops.strided_slice(
1214 input=input_,
1215 begin=begin,
1216 end=end,
1217 strides=strides,
1218 name=name,
1219 begin_mask=begin_mask,
1220 end_mask=end_mask,
1221 ellipsis_mask=ellipsis_mask,
1222 new_axis_mask=new_axis_mask,
1223 shrink_axis_mask=shrink_axis_mask)
1225 parent_name = name
1227 if var is not None:
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/gen_array_ops.py:10505, in strided_slice(input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, name)
10503 return _result
10504 except _core._NotOkStatusException as e:
> 10505 _ops.raise_from_not_ok_status(e, name)
10506 except _core._FallbackException:
10507 pass
File ~/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:6897, in raise_from_not_ok_status(e, name)
6895 message = e.message + (" name: " + name if name is not None else "")
6896 # pylint: disable=protected-access
-> 6897 six.raise_from(core._status_to_exception(e.code, message), None)
File <string>:3, in raise_from(value, from_value)
InvalidArgumentError: slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: decoder/repeat/strided_slice/
I appreciate very much any advice you would give me!
Edit
Here is the code I used to build the mode:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.backend import shape
def repeat_vector(args):
"""Builds the repeat vector layer dynamically by the size of the input series"""
layer_to_repeat = args[0]
sequence_layer = args[1]
return RepeatVector(shape(sequence_layer)[1])(layer_to_repeat)
n_atts = 3 # time series of 3 measurements
n_units = [120, 80, 50] # encoder - 1st layer: 120, 2nd layer: 80, 3rd layer: 50 (and symmetric for decoder)
n_layers = len(n_units)
init = GlorotUniform(seed=420)
reg = None
optimizer = Adam(learning_rate=0.0001)
activ = 'tanh'
loss_metric = 'mse'
inputs = Input(shape=(None, n_atts), name='input_layer')
# the encoder
encoded = LSTM(n_units[0], name='encoder_1', return_sequences=(n_layers != 1), kernel_initializer=init,
kernel_regularizer=reg, activation=activ)(inputs)
for i in range(1, n_layers):
if i != n_layers - 1:
encoded = LSTM(n_units[i], name='encoder_{}'.format(i + 1), return_sequences=(n_layers != 1),
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(encoded)
else:
encoded = LSTM(n_units[i], name='encoder_{}'.format(i + 1), return_sequences=False,
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(encoded)
# repeat the vector (plug the encoder to the decoder)
repeated = Lambda(repeat_vector, output_shape=(None, n_units[-1]), name='repeat')([encoded, inputs])
# the decoder
decoded = LSTM(n_units[n_layers - 1], return_sequences=True, name='decoder_1',
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(repeated) # first layer
for i in range(1, n_layers):
decoded = LSTM(n_units[n_layers - 1 - i], return_sequences=True, name='decoder_{}'.format(i + 1),
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(decoded)
# last layer
tdist = TimeDistributed(Dense(n_atts))(decoded)
# compile the model
model = Model(inputs, tdist, name='lstm-ae')
model.compile(optimizer=optimizer, loss=loss_metric)
For information, I use tensorflow 2.5.
Because the number of units is read from a config file, I wrote the code this way to add the layers programmatically.

layers compatibility between attention layer and CONV1D in keras

I am building a model in bilstm-attention-conv1d fashion (i want to use multiple conv1d with different kernel sizes) I am facing the layers incompatibility issue between the attention layer and conv1d layer. I have tried Reshape function but it's not working, Following is my code:
my model is as follows
sequence_input = Input(shape=(maxlen,), dtype="int32")
embedded_sequences = Embedding(50000, output_dim=output_dim)(sequence_input)
lstm = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences = True), name="bi_lstm_0")(embedded_sequences)
# Getting our LSTM outputs
(lstm, forward_h, forward_c, backward_h, backward_c) = Bidirectional(LSTM(RNN_CELL_SIZE, return_sequences=True, return_state=True),
name="bi_lstm_1")(lstm)
state_h = Concatenate()([forward_h, backward_h])
state_c = Concatenate()([forward_c, backward_c])
context_vector, attention_weights = Attention(10)(lstm, state_h)
x = Reshape((maxlen, output_dim, 1))(context_vector)
kernel_sizes = [1,2,3,4,5]
convs = []
for kernel_size in range(len(kernel_sizes)):
conv = Conv1D(128, kernel_size, activation='relu')(x)
convs.append(conv)
avg_pool = GlobalAveragePooling1D()(convs)
max_pool = GlobalMaxPooling1D()(convs)
conc = concatenate([avg_pool, max_pool])
output = Dense(50, activation="sigmoid")(conc)
model = keras.Model(inputs=sequence_input, outputs=output)
print(model.summary())
my code gives me the following error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-114-8e5c0c75e84a> in <module>()
13 context_vector, attention_weights = Attention(10)(lstm, state_h)
14
---> 15 x = Reshape((maxlen, output_dim, 1))(context_vector)
16
17
6 frames
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in __call__(self, *args, **kwargs)
950 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
951 return self._functional_construction_call(inputs, args, kwargs,
--> 952 input_list)
953
954 # Maintains info about the `Layer.call` stack.
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1089 # Check input assumptions set after layer building, e.g. input shape.
1090 outputs = self._keras_tensor_symbolic_call(
-> 1091 inputs, input_masks, args, kwargs)
1092
1093 if outputs is None:
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _keras_tensor_symbolic_call(self, inputs, input_masks, args, kwargs)
820 return nest.map_structure(keras_tensor.KerasTensor, output_signature)
821 else:
--> 822 return self._infer_output_signature(inputs, args, kwargs, input_masks)
823
824 def _infer_output_signature(self, inputs, args, kwargs, input_masks):
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/base_layer.py in _infer_output_signature(self, inputs, args, kwargs, input_masks)
861 # TODO(kaftan): do we maybe_build here, or have we already done it?
862 self._maybe_build(inputs)
--> 863 outputs = call_fn(inputs, *args, **kwargs)
864
865 self._handle_activity_regularization(inputs, outputs)
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in call(self, inputs)
555 # Set the static shape for the result since it might lost during array_ops
556 # reshape, eg, some `None` dim in the result could be inferred.
--> 557 result.set_shape(self.compute_output_shape(inputs.shape))
558 return result
559
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in compute_output_shape(self, input_shape)
546 output_shape = [input_shape[0]]
547 output_shape += self._fix_unknown_dimension(input_shape[1:],
--> 548 self.target_shape)
549 return tensor_shape.TensorShape(output_shape)
550
/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/layers/core.py in _fix_unknown_dimension(self, input_shape, output_shape)
534 output_shape[unknown] = original // known
535 elif original != known:
--> 536 raise ValueError(msg)
537 return output_shape
538
ValueError: total size of new array must be unchanged, input_shape = [256], output_shape = [2500, 100, 1]
kindly help me

A simple linear regression model with a DenseVariational layer in Tensorflow-Probability returns: TypeError: 'NoneType' object is not callable

This is an attempt to use Tensforflow-Probability and more specifically the DenseVariational layer but it fails for some reason. How can I correct the code?
x_train = np.linspace(-1, 1, 100)[:, np.newaxis]
y_train = x_train + 0.3*np.random.randn(100)[:, np.newaxis]
def prior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
prior_model = Sequential([
tfpl.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc = tf.zeros(n) , scale_diag = tf.ones(n)
))
])
def posterior(kernel_size, bias_size, dtype = None):
n = kernel_size + bias_size
posterior_model = Sequential([
tfpl.VariableLayer(tfpl.MultivariateNormalTriL.params_size(n) , dtype = dtype), # The parameters of the model are declared Variables that are trainable
tfpl.MultivariateNormalTriL(n) # The posterior function will return to the Variational layer that will call it a MultivariateNormalTril object that will have as many dimensions
# as the parameters of the Variational Dense Layer. That means that each parameter will be generated by a distinct Normal Gaussian shifted and scaled
# by a mu and sigma learned from the data, independently of all the other weights. The output of this Variablelayer will become the input to the
# MultivariateNormalTriL object.
# The shape of the VariableLayer object will be defined by the number of parameters needed to create the MultivariateNormalTriL object given
# that it will live in a Space of n dimensions (event_size = n). This number is returned by the tfpl.MultivariateNormalTriL.params_size(n)
])
return(posterior_model)
model = Sequential([
tfpl.DenseVariational(
input_shape = (1, ), # The input is of dimensionality 1, a series
units = 1, # A linear regression is represented by a Dense layer with one single unit
make_prior_fn = prior, # We pass the function we have defined which returns the prior distribution on the weights
make_posterior_fn = posterior, # We pass the function we have defined which returns the variational approximation of the posterior distribution on the weights
kl_weight = 1/ x_train.shape[0], # Tensorflow scales the likelihood loss calculated using the mini-batch to become an unbiased estimator of the true loss but does not do the
# same for the DL divergence loss. Here we instruct it to do the necessary scaling.
kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
# the KL Divergence will be approxiated using Sampling
)
])
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-14-e7cf0bfd5902> in <module>
17 # same for the DL divergence loss. Here we instruct it to do the necessary scaling.
18
---> 19 kl_use_exact = True # Unless there is a closed form equation for the KL divergence in the library of Tensorflow setting True will return error. By setting False instead
20 # the KL Divergence will be approxiated using Sampling
21
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in __init__(self, layers, name)
140 layers = [layers]
141 for layer in layers:
--> 142 self.add(layer)
143
144 #property
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\training\tracking\base.py in _method_wrapper(self, *args, **kwargs)
455 self._self_setattr_tracking = False # pylint: disable=protected-access
456 try:
--> 457 result = method(self, *args, **kwargs)
458 finally:
459 self._self_setattr_tracking = previous_value # pylint: disable=protected-access
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\sequential.py in add(self, layer)
204 # and create the node connecting the current layer
205 # to the input layer we just created.
--> 206 layer(x)
207 set_inputs = True
208
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in __call__(self, *args, **kwargs)
924 if _in_functional_construction_mode(self, inputs, args, kwargs, input_list):
925 return self._functional_construction_call(inputs, args, kwargs,
--> 926 input_list)
927
928 # Maintains info about the `Layer.call` stack.
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\keras\engine\base_layer.py in _functional_construction_call(self, inputs, args, kwargs, input_list)
1115 try:
1116 with ops.enable_auto_cast_variables(self._compute_dtype_object):
-> 1117 outputs = call_fn(cast_inputs, *args, **kwargs)
1118
1119 except errors.OperatorNotAllowedInGraphError as e:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in wrapper(*args, **kwargs)
253 try:
254 with conversion_ctx:
--> 255 return converted_call(f, args, kwargs, options=options)
256 except Exception as e: # pylint:disable=broad-except
257 if hasattr(e, 'ag_error_metadata'):
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in converted_call(f, args, kwargs, caller_fn_scope, options)
455 if conversion.is_in_whitelist_cache(f, options):
456 logging.log(2, 'Whitelisted %s: from cache', f)
--> 457 return _call_unconverted(f, args, kwargs, options, False)
458
459 if ag_ctx.control_status_ctx().status == ag_ctx.Status.DISABLED:
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow\python\autograph\impl\api.py in _call_unconverted(f, args, kwargs, options, update_cache)
337
338 if kwargs is not None:
--> 339 return f(*args, **kwargs)
340 return f(*args)
341
~\Anaconda3\envs\tf2\lib\site-packages\tensorflow_probability\python\layers\dense_variational_v2.py in call(self, inputs)
120
121 q = self._posterior(inputs)
--> 122 r = self._prior(inputs)
123 self.add_loss(self._kl_divergence_fn(q, r))
124
TypeError: 'NoneType' object is not callable
Did you fail to return ?
def prior(kernel_size, bias_size, dtype=None):
n = kernel_size + bias_size
prior_model = tf.keras.Sequential([
tfp.layers.DistributionLambda(
lambda t: tfd.MultivariateNormalDiag(loc=tf.zeros(n), scale_diag=tf.ones(n)
))
])
return (prior_model)

Pytorch RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got CUDAType instead

I am trying to re-execute a GitHub project on my computer for recommendation using embedding, the goal is to first embed the user and item present in the movieLens dataset, and then use the inner product to predict a rating, when I finished the integration of all components, I got an error in the training.
Code:
from lightfm.datasets import fetch_movielens
movielens = fetch_movielens()
ratings_train, ratings_test = movielens['train'], movielens['test']
def _binarize(dataset):
dataset = dataset.copy()
dataset.data = (dataset.data >= 0.0).astype(np.float32)
dataset = dataset.tocsr()
dataset.eliminate_zeros()
return dataset.tocoo()
train, test = _binarize(movielens['train']), _binarize(movielens['test'])
class ScaledEmbedding(nn.Embedding):
""" Change the scale from normal to [0,1/embedding_dim] """
def reset_parameters(self):
self.weight.data.normal_(0, 1.0 / self.embedding_dim)
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class ZeroEmbedding(nn.Embedding):
def reset_parameters(self):
self.weight.data.zero_()
if self.padding_idx is not None:
self.weight.data[self.padding_idx].fill_(0)
class BilinearNet(nn.Module):
def __init__(self, num_users, num_items, embedding_dim, sparse=False):
super().__init__()
self.embedding_dim = embedding_dim
self.user_embeddings = ScaledEmbedding(num_users, embedding_dim,
sparse=sparse)
self.item_embeddings = ScaledEmbedding(num_items, embedding_dim,
sparse=sparse)
self.user_biases = ZeroEmbedding(num_users, 1, sparse=sparse)
self.item_biases = ZeroEmbedding(num_items, 1, sparse=sparse)
def forward(self, user_ids, item_ids):
user_embedding = self.user_embeddings(user_ids)
item_embedding = self.item_embeddings(item_ids)
user_embedding = user_embedding.view(-1, self.embedding_dim)
item_embedding = item_embedding.view(-1, self.embedding_dim)
user_bias = self.user_biases(user_ids).view(-1, 1)
item_bias = self.item_biases(item_ids).view(-1, 1)
dot = (user_embedding * item_embedding).sum(1)
return dot + user_bias + item_bias
def pointwise_loss(net,users, items, ratings, num_items):
negatives = Variable(
torch.from_numpy(np.random.randint(0,
num_items,
len(users))).cuda()
)
positives_loss = (1.0 - torch.sigmoid(net(users, items)))
negatives_loss = torch.sigmoid(net(users, negatives))
return torch.cat([positives_loss, negatives_loss]).mean()
embedding_dim = 128
minibatch_size = 1024
n_iter = 10
l2=0.0
sparse = True
num_users, num_items = train.shape
net = BilinearNet(num_users,
num_items,
embedding_dim,
sparse=sparse).cuda()
optimizer = optim.Adagrad(net.parameters(),
weight_decay=l2)
for epoch_num in range(n_iter):
users, items, ratings = shuffle(train)
user_ids_tensor = torch.from_numpy(users).cuda()
item_ids_tensor = torch.from_numpy(items).cuda()
ratings_tensor = torch.from_numpy(ratings).cuda()
epoch_loss = 0.0
for (batch_user,
batch_item,
batch_ratings) in zip(_minibatch(user_ids_tensor,
minibatch_size),
_minibatch(item_ids_tensor,
minibatch_size),
_minibatch(ratings_tensor,
minibatch_size)):
user_var = Variable(batch_user)
item_var = Variable(batch_item)
ratings_var = Variable(batch_ratings)
optimizer.zero_grad()
loss = pointwise_loss(net,user_var, item_var, ratings_var, num_items)
epoch_loss += loss.data[0]
loss.backward()
optimizer.step()
print('Epoch {}: loss {}'.format(epoch_num, epoch_loss))
Error:
RuntimeError Traceback (most recent call last) <ipython-input-87-dcd04440363f> in <module>()
22 ratings_var = Variable(batch_ratings)
23 optimizer.zero_grad()
---> 24 loss = pointwise_loss(net,user_var, item_var, ratings_var, num_items)
25 epoch_loss += loss.data[0]
26 loss.backward()
<ipython-input-86-679e10f637a5> in pointwise_loss(net, users, items, ratings, num_items)
8
9 positives_loss = (1.0 - torch.sigmoid(net(users, items)))
---> 10 negatives_loss = torch.sigmoid(net(users, negatives))
11
12 return torch.cat([positives_loss, negatives_loss]).mean()
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in
__call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
<ipython-input-58-3946abf81d81> in forward(self, user_ids, item_ids)
16
17 user_embedding = self.user_embeddings(user_ids)
---> 18 item_embedding = self.item_embeddings(item_ids)
19
20 user_embedding = user_embedding.view(-1, self.embedding_dim)
~\Anaconda3\lib\site-packages\torch\nn\modules\module.py in
__call__(self, *input, **kwargs)
491 result = self._slow_forward(*input, **kwargs)
492 else:
--> 493 result = self.forward(*input, **kwargs)
494 for hook in self._forward_hooks.values():
495 hook_result = hook(self, input, result)
~\Anaconda3\lib\site-packages\torch\nn\modules\sparse.py in forward(self, input)
115 return F.embedding(
116 input, self.weight, self.padding_idx, self.max_norm,
--> 117 self.norm_type, self.scale_grad_by_freq, self.sparse)
118
119 def extra_repr(self):
~\Anaconda3\lib\site-packages\torch\nn\functional.py in embedding(input, weight, padding_idx, max_norm, norm_type, scale_grad_by_freq, sparse) 1504 # remove once script supports set_grad_enabled 1505
_no_grad_embedding_renorm_(weight, input, max_norm, norm_type)
-> 1506 return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse) 1507 1508
RuntimeError: Expected tensor for argument #1 'indices' to have scalar type Long; but got CUDAType instead (while checking arguments for embedding)
can anyone help me please ?
I would suggest you to check the input type
I had the same issue which solved by converting the input type from int32 to int64.(running on win10)
ex:
x = torch.tensor(train).to(torch.int64)
For Error like:
Runtime Error: Expected tensor for argument #1 'indices' to have scalar type Long; but got CUDAType instead (while checking arguments for embedding)
b_input_ids = torch.tensor(b_input_ids).to(device).long()
Above conversion works flawlessly across Oses
For me, the error was caused by type_as.
Changing .type_as(z) to .to(self.device) solved the issue.

Tensorflow seq2seq Decoder problems?

I try to write a seq2seq decoder with the tensorflow tf.contrib.seq2seq package.
I am wondering if my code is correct and if there is better way to rewrite it. The documentation is not easy to read.
Or my question can be: how can I easily debug this kind of code? How can I inspect some intermediate results in tensorflow?
class Decoder:
def __init__(self, embedding, hidden_size, num_layers=1, max_length=15):
self.embedding = embedding
self.hidden_size = hidden_size
self.num_layers = num_layers
self.cell = tf.nn.rnn_cell.GRUCell(hidden_size)
self.linear = tf.Variable(tf.random_normal(shape=(self.hidden_size, cn_total_words))*0.1)
def __call__(self, inputs, state, encoder_outputs, encoder_state, decoder_length, mode="train"):
with tf.variable_scope("decoder") as scope:
inputs = tf.nn.embedding_lookup(self.embedding, inputs)
encoder_state = tf.tile(tf.expand_dims(encoder_state, 1), (1, tf.shape(inputs)[1], 1))
attention_mechanism = tf.contrib.seq2seq.LuongAttention(self.hidden_size, encoder_outputs)
attn_cell = tf.contrib.seq2seq.AttentionWrapper(self.cell, attention_mechanism, self.hidden_size)
if mode == "train":
helper = tf.contrib.seq2seq.TrainingHelper(inputs=inputs, sequence_length=decoder_length)
elif mode == "infer":
helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embedding=self.embedding,
start_tokens=tf.tile([en_dict["BOS"]], [tf.shape(inputs)[0]]), end_token=en_dict["EOS"])
decoder = tf.contrib.seq2seq.BasicDecoder(cell=attn_cell, helper=helper,
initial_state=attn_cell.zero_state(tf.shape(inputs)[0], tf.float32))
outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)
outputs = tf.concat([tf.expand_dims(out, 1) for out in outputs], 1)
outputs = tf.tensordot(outputs, self.linear, axes=[[2], [0]])
return outputs, state
I got the following error when running the code
--------------------------------------------------------------------------- ValueError Traceback (most recent call
last)
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py
in apply_op(self, op_type_name, name, **keywords)
434 preferred_dtype=default_dtype,
--> 435 as_ref=input_arg.is_ref)
436 if input_arg.number_attr and len(
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py
in internal_convert_n_to_tensor(values, dtype, name, as_ref,
preferred_dtype)
736 as_ref=as_ref,
--> 737 preferred_dtype=preferred_dtype))
738 return ret
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py
in internal_convert_to_tensor(value, dtype, name, as_ref,
preferred_dtype)
675 if ret is None:
--> 676 ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)
677
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/ops.py
in _TensorTensorConversionFunction(t, dtype, name, as_ref)
548 "Tensor conversion requested dtype %s for Tensor with dtype %s: %r"
--> 549 % (dtype.name, t.dtype.name, str(t)))
550 return t
ValueError: Tensor conversion requested dtype float32 for Tensor with
dtype int32: 'Tensor("seq2seq-train/decoder/ExpandDims_2:0", shape=(?,
1, ?), dtype=int32)'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call
last) in ()
4 emb_en = np.random.uniform(low=-0.1, high=0.1, size=(en_total_words, hidden_size))
5 emb_cn = np.random.uniform(low=-0.1, high=0.1, size=(cn_total_words, hidden_size))
----> 6 model = Seq2Seq(hidden_size, num_layers, emb_en, emb_cn)
7 sess = tf.Session()
8 init = tf.global_variables_initializer()
in init(self, hidden_size,
num_layers, embed_words_en, embed_words_cn)
81 encoder_outputs, encoder_state = self.encoder(self.encoder_inputs, self.encoder_length)
82 decoder_length = tf.cast(tf.reduce_sum(self.decoder_mask, 1), tf.int32)
---> 83 decoder_outputs, decoder_state = self.decoder(self.decoder_inputs, encoder_state, encoder_outputs,
encoder_state, decoder_length)
84
85 # decoder_outputs.append(decoder_out)
in call(self, inputs, state,
encoder_outputs, encoder_state, decoder_length, mode)
50
51 outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(decoder=decoder)
---> 52 outputs = tf.concat([tf.expand_dims(out, 1) for out in outputs], 1)
53
54 outputs = tf.tensordot(outputs, self.linear, axes=[[2], [0]])
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/array_ops.py
in concat(values, axis, name) 1064 return
gen_array_ops._concat_v2(values=values, 1065
axis=axis,
-> 1066 name=name) 1067 1068
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py
in _concat_v2(values, axis, name)
491 """
492 result = _op_def_lib.apply_op("ConcatV2", values=values, axis=axis,
--> 493 name=name)
494 return result
495
~/anaconda3/envs/py36/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py
in apply_op(self, op_type_name, name, **keywords)
461 (prefix, dtype.name))
462 else:
--> 463 raise TypeError("%s that don't all match." % prefix)
464 else:
465 raise TypeError("%s that are invalid." % prefix)
TypeError: Tensors in list passed to 'values' of 'ConcatV2' Op have
types [float32, int32] that don't all match.

Resources