keras agents fails in DQNAgent using PQC during clonation for target - keras

I have some issues using keras-rl2 with tensorflow_quantum and VQC (using identical architecture as https://www.tensorflow.org/quantum/tutorials/quantum_reinforcement_learning)
After the creation of the model and DqnAgent, in dqn.compile:
############################################################
def generate_model_Qlearning(qubits, n_layers, n_actions, observables, target):
qubits = cirq.GridQubit.rect(1, n_qubits)
ops = [cirq.Z(q) for q in qubits]
observables = [ops[0]*ops[1], ops[2]*ops[3]] # Z_0*Z_1 for
action 0 and Z_2*Z_3 for action 1
input_tensor = tf.keras.Input(shape=(len(qubits), ),
dtype=tf.dtypes.float32, name='input')
re_uploading_pqc = ReUploadingPQC(qubits, n_layers,
observables, activation='tanh')([input_tensor])
process = tf.keras.Sequential(
[Rescaling(len(observables))],
name=target*"Target"+"Q-values"
)
Q_values = process(re_uploading_pqc)
model = tf.keras.Model(inputs=[input_tensor],
outputs=Q_values)
return model
############################################################
model = generate_model_Qlearning(qubits, n_layers, n_actions,
observables, False)
model_target = generate_model_Qlearning(qubits, n_layers,
n_actions, observables, True)
model_target.set_weights(model.get_weights())
dqn = DQNAgent(model=model, enable_double_dqn = True,
nb_actions=num_actions,
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
history = dqn.fit(env, nb_steps=50000, visualize=False,
verbose=2)
The following exception appears:
---------------------------------------------------------------------------
NotImplementedError Traceback (most recent call last)
Input In [119], in <module>
----> 1 dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
3 history = dqn.fit(env, nb_steps=50000,
4 visualize=False,
5 verbose=2)
File ~.local/lib/python3.8/site-packages/rl/agents/dqn.py:167, in DQNAgent.compile(self, optimizer, metrics)
164 metrics += [mean_q] # register default metrics
166 # We never train the target model, hence we can set the optimizer and loss arbitrarily.
--> 167 **self.target_model = clone_model(self.model, self.custom_model_objects)**
168 self.target_model.compile(optimizer='sgd', loss='mse')
169 self.model.compile(optimizer='sgd', loss='mse')
File ~.local/lib/python3.8/site-packages/rl/util.py:13, in clone_model(model, custom_objects)
9 def clone_model(model, custom_objects={}):
10 # Requires Keras 1.0.7 since get_config has breaking changes.
11 config = {
12 'class_name': model.__class__.__name__,
---> 13 **'config': model.get_config(),**
14 }
15 clone = model_from_config(config, custom_objects=custom_objects)
16 clone.set_weights(model.get_weights())
File ~.local/lib/python3.8/site-packages/keras/engine/functional.py:685, in Functional.get_config(self)
684 def get_config(self):
--> 685 return copy.deepcopy(get_network_config(self))
File ~.local/lib/python3.8/site-packages/keras/engine/functional.py:1410, in get_network_config(network, serialize_layer_fn)
1407 node_data = node.serialize(_make_node_key, node_conversion_map)
1408 filtered_inbound_nodes.append(node_data)
-> 1410 layer_config = serialize_layer_fn(layer)
1411 layer_config['name'] = layer.name
1412 layer_config['inbound_nodes'] = filtered_inbound_nodes
File ~.local/lib/python3.8/site-packages/keras/utils/generic_utils.py:510, in serialize_keras_object(instance)
507 if _SKIP_FAILED_SERIALIZATION:
508 return serialize_keras_class_and_config(
509 name, {_LAYER_UNDEFINED_CONFIG_KEY: True})
--> 510 raise e
511 serialization_config = {}
512 for key, item in config.items():
File ~.local/lib/python3.8/site-packages/keras/utils/generic_utils.py:505, in serialize_keras_object(instance)
503 name = get_registered_name(instance.__class__)
504 try:
--> 505 config = instance.get_config()
506 except NotImplementedError as e:
507 if _SKIP_FAILED_SERIALIZATION:
File ~.local/lib/python3.8/site-packages/keras/engine/base_layer_v1.py:497, in Layer.get_config(self)
494 # Check that either the only argument in the `__init__` is `self`,
495 # or that `get_config` has been overridden:
496 if len(extra_args) > 1 and hasattr(self.get_config, '_is_default'):
--> 497 raise NotImplementedError('Layers with arguments in `__init__` must '
498 'override `get_config`.')
499 return config
NotImplementedError: Layers with arguments in `__init__` must override `get_config`.
the topology:
It could be great if this library let us specify the dqn_target instead of doing Clone.
Because working with hybrid neural networks with a cirquit with parameters in a layer, it's difficult to serialize it. So, when it runs the line: model.get_config(), it fails.
Any idea to solve it?
Thanks!

Related

ValueError: Unable to create tensor, you should probably activate padding with 'padding=True'

I am trying to evaluate facebook/hubert-base-ls9601 Huggingface pre-trained model after fine-tuning on a private dataset.
I am using facebook/hubert-base-ls9601 pre-trained model, and Wav2vec2 feature extractor, and pooling mode set to mean.
Here's the evaluation code:
test_dataset = load_dataset("csv", data_files={"test": "/content/drive/MyDrive/freelancing/test.csv"}, delimiter="\t")["test"]
def speech_file_to_array_fn(batch):
speech_array, sampling_rate = torchaudio.load(batch["path"])
resampler = torchaudio.transforms.Resample(sampling_rate, target_sampling_rate)
speech = resampler(speech_array).squeeze().numpy()
batch["speech"] = speech_array
return batch
def predict(batch):
features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)
input_values = features.input_values.to(device)
with torch.no_grad():
logits = model(input_values).logits
pred_ids = torch.argmax(logits, dim=-1).detach().cpu().numpy()
batch["predicted"] = pred_ids
return batch
test_dataset = test_dataset.map(speech_file_to_array_fn)
result = test_dataset.map(predict, batched=True, batch_size=2)
On the last line of code, I encounter the following error block:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
168 if not is_tensor(value):
--> 169 tensor = as_tensor(value)
170
ValueError: could not broadcast input array from shape (2,220683) into shape (2,)
During handling of the above exception, another exception occurred:
ValueError Traceback (most recent call last)
12 frames
<ipython-input-73-7bd88adad349> in <module>()
----> 1 result = test_dataset.map(predict, batched=True, batch_size=2)
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in map(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, num_proc, suffix_template, new_fingerprint, desc)
1970 new_fingerprint=new_fingerprint,
1971 disable_tqdm=disable_tqdm,
-> 1972 desc=desc,
1973 )
1974 else:
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
517 self: "Dataset" = kwargs.pop("self")
518 # apply actual function
--> 519 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
520 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
521 for dataset in datasets:
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in wrapper(*args, **kwargs)
484 }
485 # apply actual function
--> 486 out: Union["Dataset", "DatasetDict"] = func(self, *args, **kwargs)
487 datasets: List["Dataset"] = list(out.values()) if isinstance(out, dict) else [out]
488 # re-apply format to the output
/usr/local/lib/python3.7/dist-packages/datasets/fingerprint.py in wrapper(*args, **kwargs)
456 # Call actual function
457
--> 458 out = func(self, *args, **kwargs)
459
460 # Update fingerprint of in-place transforms + update in-place history of transforms
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in _map_single(self, function, with_indices, with_rank, input_columns, batched, batch_size, drop_last_batch, remove_columns, keep_in_memory, load_from_cache_file, cache_file_name, writer_batch_size, features, disable_nullable, fn_kwargs, new_fingerprint, rank, offset, disable_tqdm, desc, cache_only)
2340 indices,
2341 check_same_num_examples=len(input_dataset.list_indexes()) > 0,
-> 2342 offset=offset,
2343 )
2344 except NumExamplesMismatchError:
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in apply_function_on_filtered_inputs(inputs, indices, check_same_num_examples, offset)
2217 if with_rank:
2218 additional_args += (rank,)
-> 2219 processed_inputs = function(*fn_args, *additional_args, **fn_kwargs)
2220 if update_data is None:
2221 # Check if the function returns updated examples
/usr/local/lib/python3.7/dist-packages/datasets/arrow_dataset.py in decorated(item, *args, **kwargs)
1912 )
1913 # Use the LazyDict internally, while mapping the function
-> 1914 result = f(decorated_item, *args, **kwargs)
1915 # Return a standard dict
1916 return result.data if isinstance(result, LazyDict) else result
<ipython-input-71-6f845da29c00> in predict(batch)
11
12 def predict(batch):
---> 13 features = feature_extractor(batch["speech"], sampling_rate=feature_extractor.sampling_rate, return_tensors="pt", padding=True)
14
15 input_values = features.input_values.to(device)
/usr/local/lib/python3.7/dist-packages/transformers/models/wav2vec2/feature_extraction_wav2vec2.py in __call__(self, raw_speech, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors, sampling_rate, **kwargs)
200 truncation=truncation,
201 pad_to_multiple_of=pad_to_multiple_of,
--> 202 return_attention_mask=return_attention_mask,
203 )
204
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_sequence_utils.py in pad(self, processed_features, padding, max_length, truncation, pad_to_multiple_of, return_attention_mask, return_tensors)
230 batch_outputs[key].append(value)
231
--> 232 return BatchFeature(batch_outputs, tensor_type=return_tensors)
233
234 def _pad(
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in __init__(self, data, tensor_type)
78 def __init__(self, data: Optional[Dict[str, Any]] = None, tensor_type: Union[None, str, TensorType] = None):
79 super().__init__(data)
---> 80 self.convert_to_tensors(tensor_type=tensor_type)
81
82 def __getitem__(self, item: str) -> Union[Any]:
/usr/local/lib/python3.7/dist-packages/transformers/feature_extraction_utils.py in convert_to_tensors(self, tensor_type)
174 raise ValueError("Unable to create tensor returning overflowing values of different lengths. ")
175 raise ValueError(
--> 176 "Unable to create tensor, you should probably activate padding "
177 "with 'padding=True' to have batched tensors with the same length."
178 )
ValueError: Unable to create tensor, you should probably activate padding with 'padding=True' to have batched tensors with the same length.
I am working on Google Colab. Those are the environment variables:
%env LC_ALL=C.UTF-8
%env LANG=C.UTF-8
%env TRANSFORMERS_CACHE=/content/cache
%env HF_DATASETS_CACHE=/content/cache
%env CUDA_LAUNCH_BLOCKING=1
The padding is already activated in the predict function.
Can you please help me fix it?

How to reconstruct the decoder from an LSTM-AE?

I have a trained LSTM-AE, of which the architecture is as follows:
In brief, I have an LSTM-AE of depth 3, the number of cells on the LSTM layers on the encoder side are [120, 80, 50] (and symmetric for the decoder). I built the model using the code shown on this page. For information, because I want to train the LSTM-AT directly on variable-length time series, so I didn't specify the timestamps in the input layer, which means the model is trained on batches of size 1 (one time series per batch).
I can extract the encoder just fine, but I cannot do the same for the decoder :-(... My goal is to check, given a vector of 50 features (which are extracted by the encoder), whether the decoder can reconstruct the input series.
Here's my attempt so far:
# load the full autoencoder
model = load_model(path_to_model)
# reconstruct the decoder
in_layer = Input(shape=(None, 50))
time_dist = model.layers[-1]
dec_1 = model.layers[-2]
dec_2 = model.layers[-3]
dec_3 = model.layers[-4]
rep_vec = model.layers[-5]
out_layer = time_dist(dec_1(dec_2(dec_3(rep_vec(in_layer)))))
decoder = Model(in_layer, out_layer, name='decoder')
res = decoder(input_feature) # input_feature has shape (50,)
I obtained this error:
InvalidArgumentError: slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: decoder/repeat/strided_slice/
If you are interested in the full error log...
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
Input In [86], in <module>
13 out_layer = time_dist(dec_1(dec_2(dec_3(rep_vec(in_layer)))))
14 decoder = Model(in_layer, out_layer, name='decoder')
---> 15 res = decoder(input_feature)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1030, in Layer.__call__(self, *args, **kwargs)
1026 inputs = self._maybe_cast_inputs(inputs, input_list)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1032 if self._activity_regularizer:
1033 self._handle_activity_regularization(inputs, outputs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:420, in Functional.call(self, inputs, training, mask)
401 #doc_controls.do_not_doc_inheritable
402 def call(self, inputs, training=None, mask=None):
403 """Calls the model on new inputs.
404
405 In this case `call` just reapplies
(...)
418 a list of tensors if there are more than one outputs.
419 """
--> 420 return self._run_internal_graph(
421 inputs, training=training, mask=mask)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/functional.py:556, in Functional._run_internal_graph(self, inputs, training, mask)
553 continue # Node is not computable, try skipping.
555 args, kwargs = node.map_arguments(tensor_dict)
--> 556 outputs = node.layer(*args, **kwargs)
558 # Update tensor_dict.
559 for x_id, y in zip(node.flat_output_ids, nest.flatten(outputs)):
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:1030, in Layer.__call__(self, *args, **kwargs)
1026 inputs = self._maybe_cast_inputs(inputs, input_list)
1028 with autocast_variable.enable_auto_cast_variables(
1029 self._compute_dtype_object):
-> 1030 outputs = call_fn(inputs, *args, **kwargs)
1032 if self._activity_regularizer:
1033 self._handle_activity_regularization(inputs, outputs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/keras/layers/core.py:919, in Lambda.call(self, inputs, mask, training)
915 return var
917 with backprop.GradientTape(watch_accessed_variables=True) as tape,\
918 variable_scope.variable_creator_scope(_variable_creator):
--> 919 result = self.function(inputs, **kwargs)
920 self._check_variables(created_variables, tape.watched_variables())
921 return result
File D:/PhD/Code/feature_learning/train_models/train_lstmae.py:30, in repeat_vector(args)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206, in add_dispatch_support.<locals>.wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
209 # TypeError, when given unexpected types. So we need to catch both.
210 result = dispatch(wrapper, args, kwargs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1040, in _slice_helper(tensor, slice_spec, var)
1038 var_empty = constant([], dtype=dtypes.int32)
1039 packed_begin = packed_end = packed_strides = var_empty
-> 1040 return strided_slice(
1041 tensor,
1042 packed_begin,
1043 packed_end,
1044 packed_strides,
1045 begin_mask=begin_mask,
1046 end_mask=end_mask,
1047 shrink_axis_mask=shrink_axis_mask,
1048 new_axis_mask=new_axis_mask,
1049 ellipsis_mask=ellipsis_mask,
1050 var=var,
1051 name=name)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/util/dispatch.py:206, in add_dispatch_support.<locals>.wrapper(*args, **kwargs)
204 """Call target, and fall back on dispatchers if there is a TypeError."""
205 try:
--> 206 return target(*args, **kwargs)
207 except (TypeError, ValueError):
208 # Note: convert_to_eager_tensor currently raises a ValueError, not a
209 # TypeError, when given unexpected types. So we need to catch both.
210 result = dispatch(wrapper, args, kwargs)
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/array_ops.py:1213, in strided_slice(input_, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, var, name)
1210 if strides is None:
1211 strides = ones_like(begin)
-> 1213 op = gen_array_ops.strided_slice(
1214 input=input_,
1215 begin=begin,
1216 end=end,
1217 strides=strides,
1218 name=name,
1219 begin_mask=begin_mask,
1220 end_mask=end_mask,
1221 ellipsis_mask=ellipsis_mask,
1222 new_axis_mask=new_axis_mask,
1223 shrink_axis_mask=shrink_axis_mask)
1225 parent_name = name
1227 if var is not None:
File ~/venv/lib/python3.8/site-packages/tensorflow/python/ops/gen_array_ops.py:10505, in strided_slice(input, begin, end, strides, begin_mask, end_mask, ellipsis_mask, new_axis_mask, shrink_axis_mask, name)
10503 return _result
10504 except _core._NotOkStatusException as e:
> 10505 _ops.raise_from_not_ok_status(e, name)
10506 except _core._FallbackException:
10507 pass
File ~/venv/lib/python3.8/site-packages/tensorflow/python/framework/ops.py:6897, in raise_from_not_ok_status(e, name)
6895 message = e.message + (" name: " + name if name is not None else "")
6896 # pylint: disable=protected-access
-> 6897 six.raise_from(core._status_to_exception(e.code, message), None)
File <string>:3, in raise_from(value, from_value)
InvalidArgumentError: slice index 1 of dimension 0 out of bounds. [Op:StridedSlice] name: decoder/repeat/strided_slice/
I appreciate very much any advice you would give me!
Edit
Here is the code I used to build the mode:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.initializers import GlorotUniform
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.backend import shape
def repeat_vector(args):
"""Builds the repeat vector layer dynamically by the size of the input series"""
layer_to_repeat = args[0]
sequence_layer = args[1]
return RepeatVector(shape(sequence_layer)[1])(layer_to_repeat)
n_atts = 3 # time series of 3 measurements
n_units = [120, 80, 50] # encoder - 1st layer: 120, 2nd layer: 80, 3rd layer: 50 (and symmetric for decoder)
n_layers = len(n_units)
init = GlorotUniform(seed=420)
reg = None
optimizer = Adam(learning_rate=0.0001)
activ = 'tanh'
loss_metric = 'mse'
inputs = Input(shape=(None, n_atts), name='input_layer')
# the encoder
encoded = LSTM(n_units[0], name='encoder_1', return_sequences=(n_layers != 1), kernel_initializer=init,
kernel_regularizer=reg, activation=activ)(inputs)
for i in range(1, n_layers):
if i != n_layers - 1:
encoded = LSTM(n_units[i], name='encoder_{}'.format(i + 1), return_sequences=(n_layers != 1),
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(encoded)
else:
encoded = LSTM(n_units[i], name='encoder_{}'.format(i + 1), return_sequences=False,
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(encoded)
# repeat the vector (plug the encoder to the decoder)
repeated = Lambda(repeat_vector, output_shape=(None, n_units[-1]), name='repeat')([encoded, inputs])
# the decoder
decoded = LSTM(n_units[n_layers - 1], return_sequences=True, name='decoder_1',
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(repeated) # first layer
for i in range(1, n_layers):
decoded = LSTM(n_units[n_layers - 1 - i], return_sequences=True, name='decoder_{}'.format(i + 1),
kernel_initializer=init, kernel_regularizer=reg, activation=activ)(decoded)
# last layer
tdist = TimeDistributed(Dense(n_atts))(decoded)
# compile the model
model = Model(inputs, tdist, name='lstm-ae')
model.compile(optimizer=optimizer, loss=loss_metric)
For information, I use tensorflow 2.5.
Because the number of units is read from a config file, I wrote the code this way to add the layers programmatically.

Removing last 2 layers from a BERT classifier results in " 'tuple' object has no attribute 'dim' " error. Why?

I fine tuned a huggingface transformer using Keras (with ktrain) and then reloaded the model in Pytorch.
I want to access the third to last layer (pre_classifier), so I removed the two last layers:
BERT2 = torch.nn.Sequential(*(list(BERT.children())[:-2]))
Running an encoded sentence through this yields the following error message:
AttributeError Traceback (most recent call last)
<ipython-input-38-640702475573> in <module>
----> 1 ans2=BERT2(torch.tensor([e1]))
2 print (ans2)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\container.py in forward(self, input)
90 def forward(self, input):
91 for module in self._modules.values():
---> 92 input = module(input)
93 return input
94
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\module.py in __call__(self, *input, **kwargs)
539 result = self._slow_forward(*input, **kwargs)
540 else:
--> 541 result = self.forward(*input, **kwargs)
542 for hook in self._forward_hooks.values():
543 hook_result = hook(self, input, result)
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\modules\linear.py in forward(self, input)
85
86 def forward(self, input):
---> 87 return F.linear(input, self.weight, self.bias)
88
89 def extra_repr(self):
C:\ProgramData\Anaconda3\lib\site-packages\torch\nn\functional.py in linear(input, weight, bias)
1366 - Output: :math:`(N, *, out\_features)`
1367 """
-> 1368 if input.dim() == 2 and bias is not None:
1369 # fused op is marginally faster
1370 ret = torch.addmm(bias, input, weight.t())
AttributeError: 'tuple' object has no attribute 'dim'
Meanwhile deleting the classifier entirely (all three layers)
BERT3 = torch.nn.Sequential(*(list(BERT.children())[:-3]))
Yields the expected tensor (within a size 1 tuple) with the expected shape ([sentence_num,token_num,768]).
Why does the removal of two (but not three) layers breaks the model?
And how can I access the pre_classifier results?
It is not accessible by setting config with output_hidden_states=True as this flag returns the hidden values of the BERT transformer stack, not those of the classifier layers downstream to it.
--
PS
The code used to initialize the BERT model:
def collect_data_for_FT():
from sklearn.datasets import fetch_20newsgroups
train_data = fetch_20newsgroups(subset='train', shuffle=True, random_state=42)
test_data = fetch_20newsgroups(subset='test', shuffle=True, random_state=42)
print('size of training set: %s' % (len(train_b['data'])))
print('size of validation set: %s' % (len(test_b['data'])))
print('classes: %s' % (train_b.target_names))
x_train = train_data.data
y_train = train_data.target
x_test = test_data.data
y_test = test_data.target
return(x_train,y_train,x_test,y_test)
bert_name = 'distilbert-base-uncased'
from transformers import DistilBertForSequenceClassification,AutoConfig,AutoTokenizer
import os
dir_path = os.getcwd()
dir_path=os.path.join(dir_path,'models')
config = AutoConfig.from_pretrained(bert_name,num_labels=20) # change model configuration to access hidden values.
try:
BERT = DistilBertForSequenceClassification.from_pretrained(dir_path,config=config)
print ("Finetuned predictor loaded")
except:
import tensorflow.keras as keras
print ("No finetuned predictor found.\nTraining.")
(x_train,y_train,x_test,y_test)=collect_data_for_FT()
####
# prework:
import ktrain
from ktrain import text
t = text.Transformer(bert_name, maxlen=500, classes=train_b.target_names)
trn = t.preprocess_train(x_train, y_train)
val = t.preprocess_test(x_test, y_test)
pre_trained_model = t.get_classifier()
learner = ktrain.get_learner(pre_trained_model, train_data=trn, val_data=val, batch_size=6)
####
####
# Find best learning rate
learner.lr_find()
learner.lr_plot()
####
learner.fit_onecycle(2e-4, 4) # choosen based on the learning rate/loss plot.
####
# prepare and save:
predictor = ktrain.get_predictor(learner.model, preproc=t)
predictor.save('my_distilbertbase_predictor')
predictor.model.save_pretrained(dir_path)
####
BERT = DistilBertForSequenceClassification.from_pretrained(os.path.join(dir_path), from_tf=True,config=config) # re-load tensorflow to pytorch
BERT.save_pretrained(dir_path) # save as a "full blooded" pytorch model
BERT = DistilBertForSequenceClassification.from_pretrained(dir_path,config=config) # re-load
from tensorflow.keras import backend as K
K.clear_session() # loading from tensorflow takes up space and the GPU. This releases it/

How to use SHAP with a linear SVC model from sklearn using Pipeline?

I am doing text classification using a linear SVC model from sklearn. Now I want to visualize which words/tokens have the highest impact on the classification decision by using SHAP (https://github.com/slundberg/shap).
Right now this does not work because I am getting an error that seems to originate from the vectorizer step in the pipeline I have defined - whats wrong here?
Is my general approach on how to use SHAP in this case correct?
x_Train, x_Test, y_Train, y_Test = train_test_split(df_all['PDFText'], df_all['class'], test_size = 0.2, random_state = 1234)
pipeline = Pipeline([
(
'tfidv',
TfidfVectorizer(
ngram_range=(1,3),
analyzer='word',
strip_accents = ascii,
use_idf = True,
sublinear_tf=True,
max_features=6000,
min_df=2,
max_df=1.0
)
),
(
'lin_svc',
svm.SVC(
C=1.0,
probability=True,
kernel='linear'
)
)
])
pipeline.fit(x_Train, y_Train)
shap.initjs()
explainer = shap.KernelExplainer(pipeline.predict_proba, x_Train)
shap_values = explainer.shap_values(x_Test, nsamples=100)
shap.force_plot(explainer.expected_value[0], shap_values[0][0,:], x_Test.iloc[0,:])
This is the error message I get:
Provided model function fails when applied to the provided data set.
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-81-4bca63616b3b> in <module>
3
4 # use Kernel SHAP to explain test set predictions
----> 5 explainer = shap.KernelExplainer(pipeline.predict_proba, x_Train)
6 shap_values = explainer.shap_values(x_Test, nsamples=100)
7
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\shap\explainers\kernel.py in __init__(self, model, data, link, **kwargs)
95 self.keep_index_ordered = kwargs.get("keep_index_ordered", False)
96 self.data = convert_to_data(data, keep_index=self.keep_index)
---> 97 model_null = match_model_to_data(self.model, self.data)
98
99 # enforce our current input type limitations
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\shap\common.py in match_model_to_data(model, data)
80 out_val = model.f(data.convert_to_df())
81 else:
---> 82 out_val = model.f(data.data)
83 except:
84 print("Provided model function fails when applied to the provided data set.")
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\utils\metaestimators.py in <lambda>(*args, **kwargs)
116
117 # lambda, but not partial, allows help() to work with update_wrapper
--> 118 out = lambda *args, **kwargs: self.fn(obj, *args, **kwargs)
119 # update the docstring of the returned function
120 update_wrapper(out, self.fn)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\pipeline.py in predict_proba(self, X)
379 for name, transform in self.steps[:-1]:
380 if transform is not None:
--> 381 Xt = transform.transform(Xt)
382 return self.steps[-1][-1].predict_proba(Xt)
383
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents, copy)
1631 check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted')
1632
-> 1633 X = super(TfidfVectorizer, self).transform(raw_documents)
1634 return self._tfidf.transform(X, copy=False)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in transform(self, raw_documents)
1084
1085 # use the same matrix-building strategy as fit_transform
-> 1086 _, X = self._count_vocab(raw_documents, fixed_vocab=True)
1087 if self.binary:
1088 X.data.fill(1)
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in _count_vocab(self, raw_documents, fixed_vocab)
940 for doc in raw_documents:
941 feature_counter = {}
--> 942 for feature in analyze(doc):
943 try:
944 feature_idx = vocabulary[feature]
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(doc)
326 tokenize)
327 return lambda doc: self._word_ngrams(
--> 328 tokenize(preprocess(self.decode(doc))), stop_words)
329
330 else:
c:\users\s.p\appdata\local\programs\python\python37\lib\site-packages\sklearn\feature_extraction\text.py in <lambda>(x)
254
255 if self.lowercase:
--> 256 return lambda x: strip_accents(x.lower())
257 else:
258 return strip_accents
AttributeError: 'numpy.ndarray' object has no attribute 'lower'
KernelExplainer expects to receive a classification model as the first argument. Please check the use of Pipeline with Shap following the link.
In your case, you can use the Pipeline as follows:
x_Train = pipeline.named_steps['tfidv'].fit_transform(x_Train)
explainer = shap.KernelExplainer(pipeline.named_steps['lin_svc'].predict_proba, x_Train)

TypeError: ('Not JSON Serializable:', Dimension(2048))

The model is input shape of 2048 transfer values taken from inception model.
What I wanted to achieve is try to redo this code https://github.com/Hvass-Labs/TensorFlow-Tutorials/blob/master/08_Transfer_Learning.ipynb into Keras API.
Everything went smooth till I tried to save it.
When I try to save it, it raises TypeError: ('Not JSON Serializable:', Dimension(2048))
I am able to save other models without a problem.
I don't understand why this one doesn't work.
I tried to save it on Windows 10 with python_ver = 3.6, tensorflow_ver = 1.6rcu and Ubuntu 16.04 with python_ver = 3.6, tensorflow_ver = 1.3.
I created the model with the code below.
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import InputLayer
from tensorflow.python.keras.layers import Dense
# Declare variables for model.
transfer_len = 2048
num_classes = 3
# Model creation.
model = Sequential()
# Input layer of shape 2048.
model.add(InputLayer(input_shape = (transfer_len,)))
# Fully connected 1024.
model.add(Dense(1024, activation='relu'))
# Output layer.
model.add(Dense(num_classes, activation='softmax'))
from tensorflow.python.keras.optimizers import Adam
optimizer = Adam(lr=1e-3)
model.compile(optimizer = optimizer,
loss = 'categorical_crossentropy',
metrics=['accuracy'])
model.fit(x = transfer_values_train,
y = labels_train,
epochs = 20, batch_size = 100, verbose=0)
#
output_path = "model.keras"
model.save(output_path)
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-22-6a252d3d7102> in <module>()
----> 1 model.save(output_path)
~\Anaconda3\envs\gpu\lib\site-packages\tensorflow\python\keras\_impl\keras\engine\topology.py in save(self, filepath, overwrite, include_optimizer)
1044 """
1045 from tensorflow.python.keras._impl.keras.models import save_model # pylint: disable=g-import-not-at-top
-> 1046 save_model(self, filepath, overwrite, include_optimizer)
1047
1048 def save_weights(self, filepath, overwrite=True):
~\Anaconda3\envs\gpu\lib\site-packages\tensorflow\python\keras\_impl\keras\models.py in save_model(model, filepath, overwrite, include_optimizer)
131 'config': model.get_config()
132 },
--> 133 default=get_json_type).encode('utf8')
134
135 model_weights_group = f.create_group('model_weights')
~\Anaconda3\envs\gpu\lib\json\__init__.py in dumps(obj, skipkeys, ensure_ascii, check_circular, allow_nan, cls, indent, separators, default, sort_keys, **kw)
236 check_circular=check_circular, allow_nan=allow_nan, indent=indent,
237 separators=separators, default=default, sort_keys=sort_keys,
--> 238 **kw).encode(obj)
239
240
~\Anaconda3\envs\gpu\lib\json\encoder.py in encode(self, o)
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
--> 199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
~\Anaconda3\envs\gpu\lib\json\encoder.py in iterencode(self, o, _one_shot)
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
--> 257 return _iterencode(o, 0)
258
259 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
~\Anaconda3\envs\gpu\lib\site-packages\tensorflow\python\keras\_impl\keras\models.py in get_json_type(obj)
113 return obj.__name__
114
--> 115 raise TypeError('Not JSON Serializable:', obj)
116
117 from tensorflow.python.keras._impl.keras import __version__ as keras_version # pylint: disable=g-import-not-at-top
TypeError: ('Not JSON Serializable:', Dimension(2048)
Okay, so the transfer_len variable was a type 'tensorflow.python.framework.tensor_shape.Dimension'.
Changed to int and it saves normally.

Resources