Model not calculating loss during training returning ValueError (Huggingface/BERT) - nlp

I'm unable to properly pass my encoded data (with hidden states) through Trainer via Huggingface. Below is the call to Trainer with arguments and the full traceback. I'm not really sure where to begin with this error as I believe I've satisfied all requirements to pass the encoded data forward unless the inputs passed should include the labels.
from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred):
labels = pred.label_ids
pred = pred.predictions.argmax(-1)
f1 = f1_score(labels, pred, average="weighted")
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "f1": f1}
from transformers import Trainer, TrainingArguments
batch_size = 10
logging_steps = len(transcripts_encoded["train"]) // batch_size
model_name = f"{model_checkpoint}-finetuned-transcripts"
training_args = TrainingArguments(output_dir=model_name,
num_train_epochs=2,
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=0.01,
evaluation_strategy="epoch",
disable_tqdm=False,
logging_steps=logging_steps,
push_to_hub=False,
log_level="error")
from transformers import Trainer
trainer = Trainer(model=model, args=training_args,
compute_metrics=compute_metrics,
train_dataset=transcripts_encoded["train"],
eval_dataset=transcripts_encoded["valid"],
tokenizer=tokenizer)
trainer.train();
Here is the full traceback:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-124-76d295da3120> in <module>
24 tokenizer=tokenizer)
25
---> 26 trainer.train();
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1503 resume_from_checkpoint=resume_from_checkpoint,
1504 trial=trial,
-> 1505 ignore_keys_for_eval=ignore_keys_for_eval,
1506 )
1507
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in _inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1747 tr_loss_step = self.training_step(model, inputs)
1748 else:
-> 1749 tr_loss_step = self.training_step(model, inputs)
1750
1751 if (
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in training_step(self, model, inputs)
2506
2507 with self.compute_loss_context_manager():
-> 2508 loss = self.compute_loss(model, inputs)
2509
2510 if self.args.n_gpu > 1:
/opt/conda/lib/python3.7/site-packages/transformers/trainer.py in compute_loss(self, model, inputs, return_outputs)
2552 if isinstance(outputs, dict) and "loss" not in outputs:
2553 raise ValueError(
-> 2554 "The model did not return a loss from the inputs, only the following keys: "
2555 f"{','.join(outputs.keys())}. For reference, the inputs it received are {','.join(inputs.keys())}."
2556 )
ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.
I was expecting to for it to the training details (f1, loss, accuracy etc). My assumption is that my encoded data with the hidden states is not properly structured for the model to train per the arguments set.
UPDATED MODEL CODE:
here's where I'm loading and splitting
category_data = load_dataset("csv", data_files="testdatafinal.csv")
category_data = category_data.remove_columns(["someid", "someid", "somedimension"])
category_data = category_data['train']
train_testvalid = category_data.train_test_split(test_size=0.3)
test_valid = train_testvalid['test'].train_test_split(test_size=0.5)
from datasets.dataset_dict import DatasetDict
cd = DatasetDict({
'train': train_testvalid['train'],
'test': test_valid['test'],
'valid': test_valid['train']})
print(cd)
DatasetDict({
train: Dataset({
features: ['Transcript', 'Primary Label'],
num_rows: 646
})
test: Dataset({
features: ['Transcript', 'Primary Label'],
num_rows: 139
})
valid: Dataset({
features: ['Transcript', 'Primary Label'],
num_rows: 139
})
})
Here's where I'm grabbing the model checkpoint
model_checkpoint = 'distilbert-base-uncased'
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(model_checkpoint).to(device)
Here's where I'm mapping the encoded text
transcripts_encoded_one = transcripts_encoded.set_format("torch",
columns=["input_ids", "attention_mask", "Primary Label"])
Here's where i'm extracting hidden states and then mapping as well
def extract_hidden_states(batch):
#Place model inputs on the GPU/CPU
inputs = {k:v.to(device) for k, v in batch.items()
if k in tokenizer.model_input_names}
#Extract last hidden states
with torch.no_grad():
last_hidden_state = model(**inputs).last_hidden_state
# Return vecot for [CLS] Token
return {"hidden_state": last_hidden_state[:,0].cpu().numpy()}
transcripts_hidden = transcripts_encoded.map(extract_hidden_states, batched=True)
Calling AutoModel
from transformers import AutoModelForSequenceClassification
num_labels = 10
model =(AutoModelForSequenceClassification
.from_pretrained(model_checkpoint, num_labels=num_labels)
.to(device))
Accuracy Metrics
from sklearn.metrics import accuracy_score, f1_score
def compute_metrics(pred):
labels = pred.label_ids
pred = pred.predictions.argmax(-1)
f1 = f1_score(labels, pred, average="weighted")
acc = accuracy_score(labels, preds)
return {"accuracy": acc, "f1": f1}
Trainer
from transformers import Trainer, TrainingArguments
batch_size = 10
logging_steps = len(transcripts_encoded_one["train"]) // batch_size
model_name = f"{model_checkpoint}-finetuned-transcripts"
training_args = TrainingArguments(output_dir=model_name,
num_train_epochs=2,
learning_rate=2e-5,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=0.01,
evaluation_strategy="epoch",
disable_tqdm=False,
logging_steps=logging_steps,
push_to_hub=False,
log_level="error")
from transformers import Trainer
trainer = Trainer(model=model, args=training_args,
compute_metrics=compute_metrics,
train_dataset=transcripts_encoded_one["train"],
eval_dataset=transcripts_encoded_one["valid"],
tokenizer=tokenizer)
trainer.train();
I've tried passing "transcripts_encoded(without hidden states) and "transcripts_hidden (with hidden states) as the train and validation splits and both produce the same error
trainer.train_dataset[0]
{'Primary Label': 'cancel',
'input_ids': tensor([ 101, 2047, 3446, 2003, 2205, 6450, 2005, 1996, 2051, 1045,
2064, 5247, 3752, 4790, 1012, 2009, 2001, 2026, 5165, 2000,
6509, 2017, 2651, 999, 4067, 2017, 2005, 3967, 2075, 1996,
2047, 2259, 2335, 999, 2031, 1037, 6919, 2717, 1997, 1996,
2154, 999, 2994, 3647, 1998, 7965, 999, 2065, 2045, 2003,
2505, 2842, 2057, 2089, 2022, 2583, 2000, 6509, 2017, 2007,
3531, 2514, 2489, 2000, 3967, 2149, 2153, 1012, 1045, 2001,
2074, 2667, 2000, 17542, 2026, 15002, 1012, 2038, 2009, 2042,
13261, 1029, 7632, 1010, 2045, 999, 1045, 3246, 2017, 1005,
2128, 2725, 2092, 2651, 1012, 4067, 2017, 2005, 3967, 2075,
102]),
'attention_mask': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1])}

If possible, can you add your model code? According to your indicators and description, you should use BartForSequenceClassification. If you are using BartForSequenceClassification, I think the biggest possibility is that your training dataset has no labels.
loss = None
if labels is not None:
...
if not return_dict:
output = (logits,) + outputs[1:]
return ((loss,) + output) if loss is not None else output
return Seq2SeqSequenceClassifierOutput(
loss=loss,
logits=logits,
past_key_values=outputs.past_key_values,
decoder_hidden_states=outputs.decoder_hidden_states,
decoder_attentions=outputs.decoder_attentions,
cross_attentions=outputs.cross_attentions,
encoder_last_hidden_state=outputs.encoder_last_hidden_state,
encoder_hidden_states=outputs.encoder_hidden_states,
encoder_attentions=outputs.encoder_attentions,
)
modeling_outputs in transformers will drop the key which the value is None, then it will rasie ValueError that you describe.
UPDATE
Thanks for such detailed code. I find out the problem. You should set TrainingArguments.label_names to ["Primary Label"] or change Primary Label to any label string containing lowercase letters "label" like Primary label. for more details, see transformers.utils.generic.find_labels. Otherwise it will use the default label name instead of Primary Label. Furthermore you must map label to consecutive integers not cancel !!

Related

pretrained roberta relation extraction attribute error

I am trying to get the following pretrained huggingface model to work: https://huggingface.co/mmoradi/Robust-Biomed-RoBERTa-RelationClassification
I use the following code:
from transformers import AutoTokenizer, AutoModel
tokenizer = AutoTokenizer.from_pretrained("mmoradi/Robust-Biomed-RoBERTa-RelationClassification")
model = AutoModel.from_pretrained("mmoradi/Robust-Biomed-RoBERTa-RelationClassification")
inputs = tokenizer("""The colorectal cancer was caused by mutations in angina""")
outputs = model(**inputs)
For some reason, I get the following error when trying to produce outputs, so in the last line of my code:
--> 796 input_shape = input_ids.size()
797 elif inputs_embeds is not None:
798 input_shape = inputs_embeds.size()[:-1]
AttributeError: 'list' object has no attribute 'size'
The inputs look like this:
{'input_ids': [0, 133, 11311, 1688, 3894, 337, 1668, 21, 1726, 30, 28513, 11, 1480, 347, 2], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]}
I have no idea how to go about debugging this, so any help or hints are welcomed!
You have to specify the type of tensor that you want in return for tokenizer. If you don't, it will return a dictionary with two lists (input_ids and attention_mask):
inputs = tokenizer("""The colorectal cancer was caused by mutations in angina""", return_tensors="pt")

Confusion matrix as the metric for the optimization in a machine learning regression problem

I am training a model to segment an image to predict the degree of damage (ranging from 0: no damage, to 5: severe damage) for each pixel of an image. I have approached it this way:
def simple_loss(pred, mask): # regression case
pred = torch.sigmoid(pred)
return (F.mse_loss(pred, mask, reduce='none')).mean()
def structure_loss(pred, mask): # binary case: damaged vs undamaged
weit = 1 + 5 * torch.abs(F.avg_pool2d(mask, kernel_size=31, stride=1, padding=15) - mask)
wbce = F.binary_cross_entropy_with_logits(pred, mask, reduce='none')
wbce = (weit * wbce).sum(dim=(2, 3)) / weit.sum(dim=(2, 3))
pred = torch.sigmoid(pred)
inter = ((pred * mask) * weit).sum(dim=(2, 3))
union = ((pred + mask) * weit).sum(dim=(2, 3))
wiou = 1 - (inter + 1) / (union - inter + 1)
return (wbce + wiou).mean()
Binary case yields IoU > 0.6, but the regression model is inaccurate. My datset is imbalanced (100:1) with the majority of the pixels belonging to the undamaged class. Hence, the optimization is driven towards accurate prediction of undamaged pixels.
The confusion matrix in the (1..5) region shows no correlation between the label and the predicted value.
I cannot balance the set because the undamaged region next to the damaged area is informative to humans, trained to examine the damage.
How can I modify the loss function to assign higher cost to regression errors regarding the degree of damage?
We can encode irrelevant pixels with -1. Then modify the loss function to ignore irrelevant classes this way:
from keras import backend as K
def masked_mse(mask_value):
def f(y_true, y_pred):
mask_true = K.cast(K.not_equal(y_true, mask_value), K.floatx())
masked_squared_error = K.square(mask_true * (y_true - y_pred))
masked_mse = K.sum(masked_squared_error, axis=-1) / K.sum(mask_true, axis=-1)
return masked_mse
f.__name__ = 'Masked MSE (mask_value={})'.format(mask_value)
return f
y_pred = K.constant([[ 1, 1, 1, 1],
[ 1, 1, 1, 3],
[ 1, 1, 1, 3],
[ 1, 1, 1, 3],
[ 1, 1, 1, 3],
[ 1, 1, 1, 3]])
y_true = K.constant([[ 1, 1, 1, 1],
[ 1, 1, 1, 1],
[-1, 1, 1, 1],
[-1,-1, 1, 1],
[-1,-1,-1, 1],
[-1,-1,-1,-1]])
true = K.eval(y_true)
pred = K.eval(y_pred)
loss = K.eval(masked_mse(-1)(y_true, y_pred))
for i in range(true.shape[0]):
print(true[3], pred[3], loss[3], sep='\t')
# [-1. -1. 1. 1.] [ 1. 1. 1. 3.] 2.0

How to change parameters of pre-trained longformer model from huggingface

I am using Hugging-face pre-trained LongformerModel model. I am using to extract embedding for sentence. I want to change the token length, max sentence length parameter but I am not able to do so. Here is the code.
model = LongformerModel.from_pretrained('allenai/longformer-base-4096',output_hidden_states = True)
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
model.eval()
text=[" I like to play cricket"]
input_ids = torch.tensor(tokenizer.encode(text,max_length=20,padding=True,add_special_tokens=True)).unsqueeze(0)
print(tokenizer.encode(text,max_length=20,padding=True,add_special_tokens=True))
# [0, 38, 101, 7, 310, 5630, 2]
I expected encoder to give me list of size 20 with padding as I have passed a parameter max_length=20. But it returned list of size 7 only?
attention_mask = torch.ones(input_ids.shape, dtype=torch.long, device=input_ids.device)
attention_mask[:, [0,-1]] = 2
outputs = model(input_ids, attention_mask=attention_mask, return_dict=True)
hidden_states = outputs[2]
print ("Number of layers:", len(hidden_states), " (initial embeddings + 12 BERT layers)")
layer_i = 0
print ("Number of batches:", len(hidden_states[layer_i]))
batch_i = 0
print ("Number of tokens:", len(hidden_states[layer_i][batch_i]))
token_i = 0
print ("Number of hidden units:", len(hidden_states[layer_i][batch_i][token_i]))
Output:
Number of layers: 13 (initial embeddings + 12 BERT layers)
Number of batches: 1
Number of tokens: 512 # How can I change this parameter to pick up my sentence length during run-time
Number of hidden units: 768
How can I reduce number of tokens to sentence length instead of 512 ? Every-time I input a new sentence, it should pick up that length.
Question regarding padding
padding=True pads your input to the longest sequence. padding=max_length pads your input to the specified max_length (documentation):
from transformers import LongformerTokenizer
tokenizer = LongformerTokenizer.from_pretrained('allenai/longformer-base-4096')
text=[" I like to play cricket"]
print(tokenizer.encode(text[0],max_length=20,padding='max_length',add_special_tokens=True))
Output:
[0, 38, 101, 7, 310, 5630, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Question regarding the number of tokens of the hidden states
The Longformer implementation applies padding to your sequence to match the attention window sizes. You can see the size of the attention windows in your model config:
model.config.attention_window
Output:
[512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512, 512]
This is the corresponding code line: link.

CNN Autoencoder with Embedding(300D GloveVec) layer for 10-15 word sentence not working problem due to padding

Using pretraining GloveVector from stanford to get the meaningful representation of each word but i want representations for a sentence containing 5-15 words, so that i can make use of cosine similarity to do a match when i receive a new sentence. I am setting a 15 words (fixed size) of each sentence and applied embedding layer then the new input shape is going to be 15 X 300 dimensions (If i have less than 15 words then padded values to make it 15 words (one random uniform distribution of 300D vector)
Below are my network shapes
[None, 15] -- Raw inputs embedding and padded(1) ID's
[None, 15, 300, 1], --input
[None, 8, 150, 128], -- conv 1
[None, 4, 75, 64], -- conv 2
[None, 2, 38, 32], -- conv 3
[None, 1, 19, 16], -- conv 4
[None, 1, 10, 4] -- conv 5
[None, 50] ---------Latent shape (new meaningful representati)------
[None, 1, 10, 4] -- encoded input for de-conv
[None, 1, 19, 16], -- conv_trans 5
[None, 2, 38, 32], -- conv_trans 4
[None, 4, 75, 64], -- conv_trans 3
[None, 8, 150, 128], -- conv_trans 2
[None, 15, 300, 1] -- conv_trans 1 -- for loss funtion with input
I have tried the CNN model with embedding layer in tensorflow
self._inputs = tf.placeholder(dtype=tf.int64, shape=[None, self.sent_len], name='input_x') #(?,15)
losses = []
# lookup layer
with tf.variable_scope('embedding') as scope:
self._W_emb = _variable_on_cpu(name='embedding', shape=[self.vocab_size, self.emb_size], initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0))
# assigned pretrained embedding here, so initializer would be overrided
sent_batch = tf.nn.embedding_lookup(params=self._W_emb, ids=self._inputs)
sent_batch = tf.expand_dims(sent_batch, -1)
self._x = sent_batch
encoder = []
shapes = []
current_input = sent_batch
shapes.append(current_input.get_shape().as_list())
for layer_i, n_output in enumerate(n_filters[1:]):
with tf.variable_scope('Encode_conv-%d' % layer_i) as scope:
n_input = current_input.get_shape().as_list()[3]
W, wd = _variable_with_weight_decay('W-%d' % layer_i, shape=[filter_size,filter_size,n_input,n_output],
initializer=tf.random_uniform_initializer(minval=-1.0, maxval=1.0), wd=self.l2_reg)
losses.append(wd)
biases = _variable_on_cpu('bias-%d' % layer_i, shape=[n_output], initializer=tf.constant_initializer(0.00))
encoder.append(W)
output = tf.nn.relu(tf.add(tf.nn.conv2d(current_input, W, strides=[1, 2, 2, 1], padding='SAME'), biases), name=scope.name)
current_input = output
shapes.append(output.get_shape().as_list())
#z = current_input
original_shape = current_input.get_shape().as_list()
flatsize = original_shape[1]*original_shape[2]*original_shape[3]
height,width,channel = original_shape[1]*1,original_shape[2]*1,original_shape[3]*1
current_input = tf.reshape(current_input,[-1,flatsize])
with tf.variable_scope('Encode_Z-%d' % layer_i) as scope:
W_en, wd_en = _variable_with_weight_decay('W', shape=[current_input.get_shape().as_list()[1], outsize],
initializer=tf.truncated_normal_initializer(stddev=0.05),
wd=self.l2_reg)
losses.append(wd_en)
biases_en = _variable_on_cpu('bias', shape=[outsize],initializer=tf.constant_initializer(0.00))
self._z = tf.nn.relu(tf.nn.bias_add(tf.matmul(current_input, W_en), biases_en)) # Compressed representation (?,50)
with tf.variable_scope('Decode_Z-%d' % layer_i) as scope:
W_dc, wd_dc = _variable_with_weight_decay('W', shape=[self._z.get_shape().as_list()[1], current_input.get_shape().as_list()[1]],
initializer=tf.truncated_normal_initializer(stddev=0.05), wd=self.l2_reg)
losses.append(wd_dc)
biases_dc = _variable_on_cpu('bias', shape=[current_input.get_shape().as_list()[1]],initializer=tf.constant_initializer(0.00))
current_input = tf.nn.relu(tf.nn.bias_add(tf.matmul(self._z, W_dc), biases_dc))
current_input = tf.reshape(current_input,[-1,height,width,channel])
encoder.reverse()
shapes.reverse()
for layer_i, shape in enumerate(shapes[1:]):
with tf.variable_scope('Decode_conv-%d' % layer_i) as scope:
W = encoder[layer_i]
b = _variable_on_cpu('bias-%d' % layer_i, shape=[W.get_shape().as_list()[2]], initializer=tf.constant_initializer(0.00))
hh,ww,cc = shape[1], shape[2], shape[3]
output = tf.nn.relu(tf.add( tf.nn.conv2d_transpose(current_input, W, [tf.shape(sent_batch)[0],hh,ww,cc],strides=[1, 2, 2, 1],padding='SAME'), b),name=scope.name)
current_input = output
self._y = current_input
# loss
with tf.variable_scope('loss') as scope:
cross_entropy_loss = tf.reduce_mean(tf.square(current_input - sent_batch))
losses.append(cross_entropy_loss)
self._total_loss = tf.add_n(losses, name='total_loss')
opt = tf.train.AdamOptimizer(0.0001)
grads = opt.compute_gradients(self._total_loss)
self._train_op = opt.apply_gradients(grads)
But the results are not performing well because below two sentence cosine similarity is 0.9895 after getting the latent compressed representation from above model.
Functional disorders of polymorphonuclear neutrophils'
Unspecified fracture of skull, sequela'
And if i take sentences with 2-5 words and the similarity is going up to 0.9999 (suspecting the issue was caused by more default padding values with same uniform distribution from embedding lookups)
Below information may be helpful,
Total of 10,000 training samples with 10 epochs
Used Relu activations
MSE loss function
Adam optimizers
Below is the words distributions of over all sentence [
And finally can anyone suggest what's going wrong? and approach itself is not good to proceed?

keras Concatenate mulitple layers cause AttributeError: 'NoneType' object has no attribute '_inbound_nodes'

I am trying to add some fixed kernels in my CNN, please see my codes below.
This is how I create my kernels:
# Kernels
def create_kernel(x):
t = pipe(
x,
lambda x: tf.constant(x, dtype=tf.float32),
lambda x: tf.reshape(x, [3, 3, 1, 1]))
return t
k_edge1 = create_kernel([1, 0, -1, 0, 0, 0, -1, 0, 1])
k_edge2 = create_kernel([0, 1, 0, 1, -4, 1, 0, 1, 0])
k_edge3 = create_kernel([-1, -1, -1, -1, 8, -1, -1, -1, -1])
and my convolution network is like:
# Convolution network
# Input layer
l_input = Input(shape=(28**2, ))
# Reshape layer
l_reshape = Reshape(target_shape=(28, 28, 1))(l_input)
# Convolution layers
l_conv1 = Conv2D(filters=20, kernel_size=(3, 3), padding='valid')(l_reshape)
l_edge1 = tf.nn.conv2d(l_reshape, k_edge1, strides=[1, 1, 1, 1], padding='VALID')
l_edge2 = tf.nn.conv2d(l_reshape, k_edge2, strides=[1, 1, 1, 1], padding='VALID')
l_edge3 = tf.nn.conv2d(l_reshape, k_edge3, strides=[1, 1, 1, 1], padding='VALID')
l_conv1a = Concatenate(axis=3)([l_conv1, l_edge1, l_edge2, l_edge3]) # <- The error should be caused by this line.
l_conv2 = Conv2D(filters=20, kernel_size=(3, 3), padding='valid')(l_conv1a)
l_pool1 = MaxPooling2D(pool_size=(2, 2), border_mode='valid')(l_conv2)
# Flatten layer
l_flat = Flatten()(l_pool1)
# Fully connected layers
l_fc1 = Dense(50, kernel_initializer='he_normal')(l_flat)
l_act1 = PReLU()(l_fc1)
l_fc3 = Dense(10, kernel_initializer='he_normal')(l_act1)
l_output = Activation('softmax')(l_fc1)
# Model
cnn_model = Model(l_input, l_output)
However, I got the following error:
Traceback (most recent call last):
File "<stdin>", line 2, in <module>
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 93, in __init__
self._init_graph_network(*args, **kwargs)
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 237, in _init_graph_network
self.inputs, self.outputs)
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 1353, in _map_graph_network
tensor_index=tensor_index)
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 1340, in build_map
node_index, tensor_index)
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 1340, in build_map
node_index, tensor_index)
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 1340, in build_map
node_index, tensor_index)
[Previous line repeated 2 more times]
File "C:\Users\Perry Cheng\AppData\Local\conda\conda\envs\ml_py_3_6\lib\site-packages\keras\engine\network.py", line 1312, in build_map
node = layer._inbound_nodes[node_index]
AttributeError: 'NoneType' object has no attribute '_inbound_nodes'
After some testing, I think the error comes from:
l_conv1a = Concatenate(axis=3)([l_conv1, l_edge1, l_edge2, l_edge3])
Is there any way to solve it?
Keras layers accepts Keras Tensors and not Tensors as their input. So if you would like to use tf.nn.conv2d instead of Conv2D layers in Keras, you need to wrap them inside a Lambda layer:
l_edge1 = Lambda(lambda x: tf.nn.conv2d(x, k_edge1, strides=[1, 1, 1, 1], padding='VALID'))(l_reshape)
l_edge2 = Lambda(lambda x: tf.nn.conv2d(x, k_edge2, strides=[1, 1, 1, 1], padding='VALID'))(l_reshape)
l_edge3 = Lambda(lambda x: tf.nn.conv2d(x, k_edge3, strides=[1, 1, 1, 1], padding='VALID'))(l_reshape)
You cannot use TF functions directly on Keras tensors as you are doing here:
l_edge1 = tf.nn.conv2d(l_reshape, k_edge1, strides=[1, 1, 1, 1], padding='VALID')
l_edge2 = tf.nn.conv2d(l_reshape, k_edge2, strides=[1, 1, 1, 1], padding='VALID')
l_edge3 = tf.nn.conv2d(l_reshape, k_edge3, strides=[1, 1, 1, 1], padding='VALID')
What you should do is to just use the Conv2D layer and then set the weights manually using layer.set_weights(array). To keep weights non-trainable, just set layer.trainable = False, like:
conv = Conv2D(filters=1, kernel_size(3, 3), padding='valid')
conv.set_weights(your_weight_array)
conv.trainable = False
l_edge1 = conv(l_reshape)
And similarly for the other two Conv2D layers.

Resources