Cannot interpret SVM model using Shapash - python-3.x

Currently, I'm exploring machine learning interpretability tools for one of my project. I found Shapash quite a new tool and many people suggesting to use it to create a few easily interpretable charts for ML model. When I tried it with RandomForestClassifier it worked fine and generate a webpage full of different charts but the same I cannot achieve while using SVM(just exploring this library, not focusing on the perfect ML model for a problem).
Note - using Shapash link here
#Fit blackbox model
svc = svm.SVC()
svc.fit(X_train_smote, y_train_smote)
y_pred = svc.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")
from shapash import SmartExplainer
xpl = SmartExplainer(model=svc)
error which I'm getting -
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
/tmp/ipykernel_13648/1233939729.py in <module>
----> 1 xpl = SmartExplainer(model=svc)
~/Python_AI/ai_env/lib/python3.8/site-packages/shapash/explainer/smart_explainer.py in __init__(self, model, backend, preprocessing, postprocessing, features_groups, features_dict, label_dict, title_story, palette_name, colors_dict, **kwargs)
194 if isinstance(backend, str):
195 backend_cls = get_backend_cls_from_name(backend)
--> 196 self.backend = backend_cls(
197 model=self.model, preprocessing=preprocessing, **kwargs)
198 elif isinstance(backend, BaseBackend):
~/Python_AI/ai_env/lib/python3.8/site-packages/shapash/backend/shap_backend.py in __init__(self, model, preprocessing, explainer_args, explainer_compute_args)
16 self.explainer_args = explainer_args if explainer_args else {}
17 self.explainer_compute_args = explainer_compute_args if explainer_compute_args else {}
---> 18 self.explainer = shap.Explainer(model=model, **self.explainer_args)
19
20 def run_explainer(self, x: pd.DataFrame) -> dict:
~/Python_AI/ai_env/lib/python3.8/site-packages/shap/explainers/_explainer.py in __init__(self, model, masker, link, algorithm, output_names, feature_names, **kwargs)
166 # if we get here then we don't know how to handle what was given to us
167 else:
--> 168 raise Exception("The passed model is not callable and cannot be analyzed directly with the given masker! Model: " + str(model))
169
170 # build the right subclass
Exception: The passed model is not callable and cannot be analyzed directly with the given masker! Model: SVC()

Related

How to solve YellowbrickTypeError: The supplied model is not a clustering estimator; try a classifier or regression score visualizer instead?

I am trying to visualize the silhouette score for the fuzzy c-means clustering method using yellowbrick's silhouette visualizer. This same code works perfectly fine for k-means clustering. However, it provides the following error for the fuzzy c-means clustering method.
import matplotlib.pyplot as plt
from fcmeans import FCM
from yellowbrick.cluster import SilhouetteVisualizer
model = FCM(n_clusters=5, random_state=0)
visualizer = SilhouetteVisualizer(model, colors='yellowbrick')
visualizer.fit(X)
visualizer.show()
error:
YellowbrickTypeError Traceback (most recent call last)
Input In [24], in <cell line: 6>()
3 from yellowbrick.cluster import SilhouetteVisualizer
5 model = FCM(n_clusters=5, random_state=0)
----> 6 visualizer = SilhouetteVisualizer(model, colors='yellowbrick')
8 visualizer.fit(X)
9 visualizer.show()
File C:\ProgramData\Anaconda3\lib\site-packages\yellowbrick\cluster\silhouette.py:118, in SilhouetteVisualizer.__init__(self, estimator, ax, colors, is_fitted, **kwargs)
115 def __init__(self, estimator, ax=None, colors=None, is_fitted="auto", **kwargs):
116
117 # Initialize the visualizer bases
--> 118 super(SilhouetteVisualizer, self).__init__(estimator, ax=ax, **kwargs)
120 # Visual Properties
121 # Use colors if it is given, otherwise attempt to use colormap which
122 # which will override colors. If neither is found, default to None.
123 # The colormap may yet still be found in resolve_colors
124 self.colors = colors
File C:\ProgramData\Anaconda3\lib\site-packages\yellowbrick\cluster\base.py:45, in
ClusteringScoreVisualizer.__init__(self, estimator, ax, fig, force_model, **kwargs)
43 def __init__(self, estimator, ax=None, fig=None, force_model=False, **kwargs):
44 if not force_model and not isclusterer(estimator):
---> 45 raise YellowbrickTypeError(
46 "The supplied model is not a clustering estimator; try a "
47 "classifier or regression score visualizer instead!"
48 )
49 self.force_model = force_model
50 super(ClusteringScoreVisualizer, self).__init__(
51 estimator, ax=ax, fig=fig, **kwargs
52 )
YellowbrickTypeError: The supplied model is not a clustering estimator; try a classifier or
regression score visualizer instead!
Kindly help me in solving this issue.

Can't find table(s) lexeme_norm for language 'en' in spacy-lookups-data

I want to train new NER entities with the following code:
def train_spacy_model(data, model='en_core_web_trf', n_iter=50):
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
TRAIN_DATA = data
ner = nlp.get_pipe("ner")
examples = []
for text, annotations in TRAIN_DATA:
examples.append(Example.from_dict(nlp.make_doc(text), annotations))
nlp.initialize(lambda: examples)
pipe_exceptions = ["ner"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train NER
for itn in range(n_iter):
random.shuffle(examples)
losses = {}
batches = minibatch(examples, size=compounding(4.0, 64.0, 1.2))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(
batch,
drop=0.20,
losses=losses
)
print("Losses", losses)
return nlp
nlp = train_spacy_model(data=dataset, n_iter=30)
I keep getting this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[296], line 40
36 print("Losses", losses)
38 return nlp
---> 40 nlp = train_spacy_model(data=no_verlaps_dataset, n_iter=30)
42 # save model to output directory
43 output_dir = '_data/models/actor_ner'
Cell In[296], line 16, in train_spacy_model(data, model, n_iter)
14 for text, annotations in TRAIN_DATA:
15 examples.append(Example.from_dict(nlp.make_doc(text), annotations))
---> 16 nlp.initialize(lambda: examples)
17 # for ent in annotations.get('entities'):
18 # ner.add_label(ent[2])
20 pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/language.py:1290, in Language.initialize(self, get_examples, sgd)
1288 config = self.config.interpolate()
1289 # These are the settings provided in the [initialize] block in the config
-> 1290 I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
1291 before_init = I["before_init"]
1292 if before_init is not None:
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/config.py:746, in registry.resolve(cls, config, schema, overrides, validate)
737 #classmethod
738 def resolve(
739 cls,
(...)
744 validate: bool = True,
745 ) -> Dict[str, Any]:
--> 746 resolved, _ = cls._make(
747 config, schema=schema, overrides=overrides, validate=validate, resolve=True
748 )
749 return resolved
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/config.py:795, in registry._make(cls, config, schema, overrides, resolve, validate)
793 if not is_interpolated:
794 config = Config(orig_config).interpolate()
--> 795 filled, _, resolved = cls._fill(
796 config, schema, validate=validate, overrides=overrides, resolve=resolve
797 )
798 filled = Config(filled, section_order=section_order)
799 # Check that overrides didn't include invalid properties not in config
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/config.py:867, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
864 getter = cls.get(reg_name, func_name)
865 # We don't want to try/except this and raise our own error
866 # here, because we want the traceback if the function fails.
--> 867 getter_result = getter(*args, **kwargs)
868 else:
869 # We're not resolving and calling the function, so replace
870 # the getter_result with a Promise class
871 getter_result = Promise(
872 registry=reg_name, name=func_name, args=args, kwargs=kwargs
873 )
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/language.py:108, in load_lookups_data(lang, tables)
105 #registry.misc("spacy.LookupsDataLoader.v1")
106 def load_lookups_data(lang, tables):
107 util.logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
--> 108 lookups = load_lookups(lang=lang, tables=tables)
109 return lookups
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/lookups.py:30, in load_lookups(lang, tables, strict)
28 if lang not in registry.lookups:
29 if strict and len(tables) > 0:
---> 30 raise ValueError(Errors.E955.format(table=", ".join(tables), lang=lang))
31 return lookups
32 data = registry.lookups.get(lang)
ValueError: [E955] Can't find table(s) lexeme_norm for language 'en' in spacy-lookups-data. Make sure you have the package installed or provide your own lookup tables if no default lookups are available for your language.
I have installed the package:
pip install spacy-lookups-data
Collecting spacy-lookups-data
Downloading spacy_lookups_data-1.0.3-py2.py3-none-any.whl (98.5 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.5/98.5 MB 25.9 MB/s eta 0:00:00
But it still persists.
How can I fix this error to commence updating the model to detect new entitities for ner tasks?
EDIT
It got fixed when I restarted the kernel in jupyter notbook that this code ran in.
To answer the narrow question: you probably need to restart your runtime in order for the tables in spacy-lookups-data to be registered.
To answer the question you didn't ask: the quoted script looks like it was only partially updated from v2 and I wouldn't recommend using it, in particular not for en_core_web_trf. One recommended way to update ner components in spacy v3 pipelines is shown in this demo project:
https://github.com/explosion/projects/tree/v3/pipelines/ner_demo_update
It handles a lot of the pipeline/config/training details for you in order to update ner without affecting the performance of the other components in the pipeline. A walkthrough of how to run a project is shown in the v2->v3 examples README.
I've been running this code in Jupyter Notebook and the error persisted until I restarted the kernel. So the answer is to restart the notebook kernel.

unabel to load a ppo model

hello I've trained a PPO model from stabel_baselines3 on collab I saved it
model.save("model")
but when I tried loading it I got the following error:
m = PPO.load("model", env=env)
AttributeError Traceback (most recent call last)
/tmp/ipykernel_25649/121834194.py in <module>
2 env = e.MinitaurBulletEnv(render=False)
3 env.reset()
----> 4 m2 = PPO.load("model", env=env)
5 for episode in range(1, 6):
6 obs = env.reset()
~/anaconda3/lib/python3.8/site-packages/stable_baselines3/common/base_class.py in load(cls, path, env, device, custom_objects, **kwargs)
668 env = cls._wrap_env(env, data["verbose"])
669 # Check if given env is valid
--> 670 check_for_correct_spaces(env, data["observation_space"], data["action_space"])
671 else:
672 # Use stored env, if one exists. If not, continue as is (can be used for predict)
~/anaconda3/lib/python3.8/site-packages/stable_baselines3/common/utils.py in check_for_correct_spaces(env, observation_space, action_space)
217 :param action_space: Action space to check against
218 """
--> 219 if observation_space != env.observation_space:
220 raise ValueError(f"Observation spaces do not match: {observation_space} != {env.observation_space}")
221 if action_space != env.action_space:
~/anaconda3/lib/python3.8/site-packages/gym/spaces/box.py in __eq__(self, other)
138
139 def __eq__(self, other):
--> 140 return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
AttributeError: 'Box' object has no attribute 'shape'
knowing that the env is a box env from pybullet
import pybullet_envs.bullet.minitaur_gym_env as e
import gym
env = e.MinitaurBulletEnv(render=False)
env.reset()
additional info is that the model loaded perfectly in collab
From your question, I can't tell if you are or aren't working on Google Colab, but if you are, I think you should definitely include the whole path to the saved model when you load it. Maybe you need to do this even if not in Colab.
What I mean is that your line of code should probably look something like this when you're loading the model:
m = PPO.load("./model.zip/", env=env)
I hope this helps!

huggingface's ReformerForMaskedLM configuration issue

I'm trying to pass the all of the huggingface's ...ForMaskedLM to the FitBert model for fill-in-the-blank task and see which pretrained yields the best result on the data I've prepared. But in the Reformer module I have this error says that I need to do 'config.is_decoder=False' but I don't really get what this means (This is my first time using huggingface). I tried to pass a ReformerConfig(is_decoder=False) to the model but still get the same error. How can I fix this?
My code:
pretrained_weights = ['google/reformer-crime-and-punishment',
'google/reformer-enwik8']
configurations = ReformerConfig(is_decoder=False)
for weight in pretrained_weights:
print(weight)
model = ReformerForMaskedLM(configurations).from_pretrained(weight)
tokenizer = ReformerTokenizer.from_pretrained(weight)
fb = FitBert(model=model, tokenizer=tokenizer)
predicts = []
for _, row in df.iterrows():
predicts.append(fb.rank(row['question'], options=[row['1'], row['2'], row['3'], row['4']])[0])
print(weight,':', np.sum(df.anwser==predicts) / df.shape[0])
Error:
AssertionError Traceback (most recent call last)
<ipython-input-5-a6016e0015ba> in <module>()
4 for weight in pretrained_weights:
5 print(weight)
----> 6 model = ReformerForMaskedLM(configurations).from_pretrained(weight)
7 tokenizer = ReformerTokenizer.from_pretrained(weight)
8 fb = FitBert(model=model, tokenizer=tokenizer)
/usr/local/lib/python3.7/dist-packages/transformers/modeling_utils.py in from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs)
1032
1033 # Instantiate model.
-> 1034 model = cls(config, *model_args, **model_kwargs)
1035
1036 if state_dict is None and not from_tf:
/usr/local/lib/python3.7/dist-packages/transformers/models/reformer/modeling_reformer.py in __init__(self, config)
2304 assert (
2305 not config.is_decoder
-> 2306 ), "If you want to use `ReformerForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention."
2307 self.reformer = ReformerModel(config)
2308 self.lm_head = ReformerOnlyLMHead(config)
AssertionError: If you want to use `ReformerForMaskedLM` make sure `config.is_decoder=False` for bi-directional self-attention.
You can override certain model configurations by loading the model config separately and providing it as parameter for the from_pretrained() method. This will assure that you are using the proper model configuration with the changes you have made:
from transformers import ReformerConfig, ReformerForMaskedLM
config = ReformerConfig.from_pretrained('google/reformer-crime-and-punishment')
print(config.is_decoder)
config.is_decoder=False
print(config.is_decoder)
model = ReformerForMaskedLM.from_pretrained('google/reformer-crime-and-punishment', config=config)
Output:
True
False

TuneError: ('Trials did not complete')

I wrote a program using keras that detects real texts from fake (I used 5000 training data and 10,000 test data), I used Transformer and 'distilbert-base-uncased' model for detection. Now I decide to hyperparameters tuning using the grid search , which I encountered the following error:
TuneError Traceback (most recent call last)
<ipython-input-15-c4a44a2180d8> in <module>()
156 tune_iris,
157 verbose=1,
--> 158 config=hyperparameter_space,
159 )
160
/usr/local/lib/python3.6/dist-packages/ray/tune/tune.py in run(run_or_experiment, name, stop, config, resources_per_trial, num_samples, local_dir, upload_dir, trial_name_creator, loggers, sync_to_cloud, sync_to_driver, checkpoint_freq, checkpoint_at_end, sync_on_checkpoint, keep_checkpoints_num, checkpoint_score_attr, global_checkpoint_period, export_formats, max_failures, fail_fast, restore, search_alg, scheduler, with_server, server_port, verbose, progress_reporter, resume, queue_trials, reuse_actors, trial_executor, raise_on_failed_trial, return_trials, ray_auto_init)
354 if incomplete_trials:
355 if raise_on_failed_trial:
--> 356 raise TuneError("Trials did not complete", incomplete_trials)
357 else:
358 logger.error("Trials did not complete: %s", incomplete_trials)
TuneError: ('Trials did not complete', [tune_iris_83131_00000, tune_iris_83131_00001, tune_iris_83131_00002, tune_iris_83131_00003, tune_iris_83131_00004, tune_iris_83131_00005, tune_iris_83131_00006, tune_iris_83131_00007, tune_iris_83131_00008, tune_iris_83131_00009, tune_iris_83131_00010, tune_iris_83131_00011, tune_iris_83131_00012, tune_iris_83131_00013, tune_iris_83131_00014, tune_iris_83131_00015, tune_iris_83131_00016, tune_iris_83131_00017])
The program I wrote is as follows:
data = pd.concat([train_webtext,train_gen,valid_webtext,valid_gen])
sentences=data['text']
labels=labels1+labels2
len(sentences),len(labels)
DistilBertTokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-cased",do_lower_case=False)
input_ids=[]
attention_masks=[]
for sent in sentences:
bert_inp=DistilBertTokenizer.encode_plus(sent,add_special_tokens = True,max_length =64,pad_to_max_length = True,return_attention_mask = True)
input_ids.append(bert_inp['input_ids'])
attention_masks.append(bert_inp['attention_mask'])
input_ids=np.asarray(input_ids)
attention_masks=np.array(attention_masks)
labels=np.array(labels)
class TuneReporterCallback(keras.callbacks.Callback):
"""Tune Callback for Keras.
The callback is invoked every epoch.
"""
def __init__(self, logs={}):
self.iteration = 0
super(TuneReporterCallback, self).__init__()
def on_epoch_end(self, batch, logs={}):
self.iteration += 1
tune.report(keras_info=logs, mean_accuracy=logs.get("accuracy"), mean_loss=logs.get("loss"))
def tune_gpt(config):
train_inp,val_inp,train_label,val_label,train_mask,val_mask=train_test_split(input_ids,labels,attention_masks,test_size=0.6666666666666666)
DistilBert_model = TFDistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased',num_labels=2)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
optimizer = tf.keras.optimizers.Adam(learning_rate=config["learning_rate"],epsilon=1e-08)
DistilBert_model.compile(loss=loss,optimizer=optimizer,metrics=[metric])
checkpoint_callback = [tf.keras.callbacks.ModelCheckpoint( "DistilBert_model.h5",monitor='val_loss',mode='min',save_best_only=True)]
callbacks = [checkpoint_callback, TuneReporterCallback()]
history=DistilBert_model.fit([train_inp,train_mask],train_label,batch_size=config["batch_size"],epochs=config["epochs"],validation_data=([val_inp,val_mask],val_label),callbacks=callbacks)
assert len(inspect.getargspec(tune_gpt).args) == 1, "The `tune_gpt` function needs to take in the arg `config`."
hyperparameter_space ={
"batch_size": tune.grid_search([16, 32]),
"learning_rate": tune.grid_search([2e-5, 3e-5, 5e-5]),
"epochs": tune.grid_search([2, 3, 4])
}
analysis = tune.run(
tune_gpt,
verbose=1,
config=hyperparameter_space,
)
It seems your code has some errors, but the detailed error messages do not appear due to the verbose option.
Please change the verbose option
verbose=1
to
verbose=3
to see the detailed error.
( Verbosity mode. 0 = silent, 1 = only status updates, 2 = status and brief trial results, 3 = status and detailed trial results. Defaults to 3.)

Resources