hello I've trained a PPO model from stabel_baselines3 on collab I saved it
model.save("model")
but when I tried loading it I got the following error:
m = PPO.load("model", env=env)
AttributeError Traceback (most recent call last)
/tmp/ipykernel_25649/121834194.py in <module>
2 env = e.MinitaurBulletEnv(render=False)
3 env.reset()
----> 4 m2 = PPO.load("model", env=env)
5 for episode in range(1, 6):
6 obs = env.reset()
~/anaconda3/lib/python3.8/site-packages/stable_baselines3/common/base_class.py in load(cls, path, env, device, custom_objects, **kwargs)
668 env = cls._wrap_env(env, data["verbose"])
669 # Check if given env is valid
--> 670 check_for_correct_spaces(env, data["observation_space"], data["action_space"])
671 else:
672 # Use stored env, if one exists. If not, continue as is (can be used for predict)
~/anaconda3/lib/python3.8/site-packages/stable_baselines3/common/utils.py in check_for_correct_spaces(env, observation_space, action_space)
217 :param action_space: Action space to check against
218 """
--> 219 if observation_space != env.observation_space:
220 raise ValueError(f"Observation spaces do not match: {observation_space} != {env.observation_space}")
221 if action_space != env.action_space:
~/anaconda3/lib/python3.8/site-packages/gym/spaces/box.py in __eq__(self, other)
138
139 def __eq__(self, other):
--> 140 return isinstance(other, Box) and (self.shape == other.shape) and np.allclose(self.low, other.low) and np.allclose(self.high, other.high)
AttributeError: 'Box' object has no attribute 'shape'
knowing that the env is a box env from pybullet
import pybullet_envs.bullet.minitaur_gym_env as e
import gym
env = e.MinitaurBulletEnv(render=False)
env.reset()
additional info is that the model loaded perfectly in collab
From your question, I can't tell if you are or aren't working on Google Colab, but if you are, I think you should definitely include the whole path to the saved model when you load it. Maybe you need to do this even if not in Colab.
What I mean is that your line of code should probably look something like this when you're loading the model:
m = PPO.load("./model.zip/", env=env)
I hope this helps!
Related
Currently, I'm exploring machine learning interpretability tools for one of my project. I found Shapash quite a new tool and many people suggesting to use it to create a few easily interpretable charts for ML model. When I tried it with RandomForestClassifier it worked fine and generate a webpage full of different charts but the same I cannot achieve while using SVM(just exploring this library, not focusing on the perfect ML model for a problem).
Note - using Shapash link here
#Fit blackbox model
svc = svm.SVC()
svc.fit(X_train_smote, y_train_smote)
y_pred = svc.predict(X_test)
print(f"F1 Score {f1_score(y_test, y_pred, average='macro')}")
print(f"Accuracy {accuracy_score(y_test, y_pred)}")
from shapash import SmartExplainer
xpl = SmartExplainer(model=svc)
error which I'm getting -
---------------------------------------------------------------------------
Exception Traceback (most recent call last)
/tmp/ipykernel_13648/1233939729.py in <module>
----> 1 xpl = SmartExplainer(model=svc)
~/Python_AI/ai_env/lib/python3.8/site-packages/shapash/explainer/smart_explainer.py in __init__(self, model, backend, preprocessing, postprocessing, features_groups, features_dict, label_dict, title_story, palette_name, colors_dict, **kwargs)
194 if isinstance(backend, str):
195 backend_cls = get_backend_cls_from_name(backend)
--> 196 self.backend = backend_cls(
197 model=self.model, preprocessing=preprocessing, **kwargs)
198 elif isinstance(backend, BaseBackend):
~/Python_AI/ai_env/lib/python3.8/site-packages/shapash/backend/shap_backend.py in __init__(self, model, preprocessing, explainer_args, explainer_compute_args)
16 self.explainer_args = explainer_args if explainer_args else {}
17 self.explainer_compute_args = explainer_compute_args if explainer_compute_args else {}
---> 18 self.explainer = shap.Explainer(model=model, **self.explainer_args)
19
20 def run_explainer(self, x: pd.DataFrame) -> dict:
~/Python_AI/ai_env/lib/python3.8/site-packages/shap/explainers/_explainer.py in __init__(self, model, masker, link, algorithm, output_names, feature_names, **kwargs)
166 # if we get here then we don't know how to handle what was given to us
167 else:
--> 168 raise Exception("The passed model is not callable and cannot be analyzed directly with the given masker! Model: " + str(model))
169
170 # build the right subclass
Exception: The passed model is not callable and cannot be analyzed directly with the given masker! Model: SVC()
Is anyone else facing an issue while running the Prophet model after the latest v1.1.2 release?
I get the following error when executing model.fit():
RuntimeError Traceback (most recent call last)
<ipython-input-86-1e4ae74985f6> in <module>
----> 1 model_training(top_5_aro, X_trainARO, X_validARO, df_ARO, predict_index)
<ipython-input-85-9ee9b229fcde> in model_training(list_accounts, df_train, df_validation, df_original, predict_index)
98 model = Prophet()
99 # fit the model
--> 100 model.fit(train_data3)
101
102 # use the model to make a forecast
/opt/app-root/lib64/python3.8/site-packages/prophet/forecaster.py in fit(self, df, **kwargs)
1179 self.params = self.stan_backend.sampling(stan_init, dat, self.mcmc_samples, **kwargs)
1180 else:
-> 1181 self.params = self.stan_backend.fit(stan_init, dat, **kwargs)
1182
1183 self.stan_fit = self.stan_backend.stan_fit
/opt/app-root/lib64/python3.8/site-packages/prophet/models.py in fit(self, stan_init, stan_data, **kwargs)
98 # Fall back on Newton
99 if not self.newton_fallback or args['algorithm'] == 'Newton':
--> 100 raise e
101 logger.warning('Optimization terminated abnormally. Falling back to Newton.')
102 args['algorithm'] = 'Newton'
/opt/app-root/lib64/python3.8/site-packages/prophet/models.py in fit(self, stan_init, stan_data, **kwargs)
94
95 try:
---> 96 self.stan_fit = self.model.optimize(**args)
97 except RuntimeError as e:
98 # Fall back on Newton
/opt/app-root/lib64/python3.8/site-packages/cmdstanpy/model.py in optimize(self, data, seed, inits, output_dir, sig_figs, save_profile, algorithm, init_alpha, tol_obj, tol_rel_obj, tol_grad, tol_rel_grad, tol_param, history_size, iter, save_iterations, require_converged, show_console, refresh, time_fmt, timeout)
736 get_logger().warning(msg)
737 else:
--> 738 raise RuntimeError(msg)
739 mle = CmdStanMLE(runset)
740 return mle
RuntimeError: Error during optimization! Command '/opt/app-root/lib/python3.8/site-packages/prophet/stan_model/prophet_model.bin random seed=97108 data file=/tmp/tmplilnrs7m/fk4f5y6m.json init=/tmp/tmplilnrs7m/q_6vjmch.json output file=/tmp/tmplilnrs7m/prophet_model1ny0mums/prophet_model-20230125181922.csv method=optimize algorithm=newton iter=10000' failed
I did not have this issue in the previous version. Any help appreciated!
Python version: 3.8.3
Prophet version: 1.1.2
When using the earlier version I was successfully able to train the model and view the model predictions. After upgrading to the latest v1.1.2 of prophet, I face this error when training the model.
I want to train new NER entities with the following code:
def train_spacy_model(data, model='en_core_web_trf', n_iter=50):
if model is not None:
nlp = spacy.load(model) # load existing spaCy model
print("Loaded model '%s'" % model)
TRAIN_DATA = data
ner = nlp.get_pipe("ner")
examples = []
for text, annotations in TRAIN_DATA:
examples.append(Example.from_dict(nlp.make_doc(text), annotations))
nlp.initialize(lambda: examples)
pipe_exceptions = ["ner"]
other_pipes = [pipe for pipe in nlp.pipe_names if pipe not in pipe_exceptions]
with nlp.disable_pipes(*other_pipes): # only train NER
for itn in range(n_iter):
random.shuffle(examples)
losses = {}
batches = minibatch(examples, size=compounding(4.0, 64.0, 1.2))
for batch in batches:
texts, annotations = zip(*batch)
nlp.update(
batch,
drop=0.20,
losses=losses
)
print("Losses", losses)
return nlp
nlp = train_spacy_model(data=dataset, n_iter=30)
I keep getting this error:
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[296], line 40
36 print("Losses", losses)
38 return nlp
---> 40 nlp = train_spacy_model(data=no_verlaps_dataset, n_iter=30)
42 # save model to output directory
43 output_dir = '_data/models/actor_ner'
Cell In[296], line 16, in train_spacy_model(data, model, n_iter)
14 for text, annotations in TRAIN_DATA:
15 examples.append(Example.from_dict(nlp.make_doc(text), annotations))
---> 16 nlp.initialize(lambda: examples)
17 # for ent in annotations.get('entities'):
18 # ner.add_label(ent[2])
20 pipe_exceptions = ["ner", "trf_wordpiecer", "trf_tok2vec"]
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/language.py:1290, in Language.initialize(self, get_examples, sgd)
1288 config = self.config.interpolate()
1289 # These are the settings provided in the [initialize] block in the config
-> 1290 I = registry.resolve(config["initialize"], schema=ConfigSchemaInit)
1291 before_init = I["before_init"]
1292 if before_init is not None:
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/config.py:746, in registry.resolve(cls, config, schema, overrides, validate)
737 #classmethod
738 def resolve(
739 cls,
(...)
744 validate: bool = True,
745 ) -> Dict[str, Any]:
--> 746 resolved, _ = cls._make(
747 config, schema=schema, overrides=overrides, validate=validate, resolve=True
748 )
749 return resolved
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/config.py:795, in registry._make(cls, config, schema, overrides, resolve, validate)
793 if not is_interpolated:
794 config = Config(orig_config).interpolate()
--> 795 filled, _, resolved = cls._fill(
796 config, schema, validate=validate, overrides=overrides, resolve=resolve
797 )
798 filled = Config(filled, section_order=section_order)
799 # Check that overrides didn't include invalid properties not in config
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/thinc/config.py:867, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
864 getter = cls.get(reg_name, func_name)
865 # We don't want to try/except this and raise our own error
866 # here, because we want the traceback if the function fails.
--> 867 getter_result = getter(*args, **kwargs)
868 else:
869 # We're not resolving and calling the function, so replace
870 # the getter_result with a Promise class
871 getter_result = Promise(
872 registry=reg_name, name=func_name, args=args, kwargs=kwargs
873 )
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/language.py:108, in load_lookups_data(lang, tables)
105 #registry.misc("spacy.LookupsDataLoader.v1")
106 def load_lookups_data(lang, tables):
107 util.logger.debug(f"Loading lookups from spacy-lookups-data: {tables}")
--> 108 lookups = load_lookups(lang=lang, tables=tables)
109 return lookups
File ~/miniconda3/envs/tvman_ENV/lib/python3.9/site-packages/spacy/lookups.py:30, in load_lookups(lang, tables, strict)
28 if lang not in registry.lookups:
29 if strict and len(tables) > 0:
---> 30 raise ValueError(Errors.E955.format(table=", ".join(tables), lang=lang))
31 return lookups
32 data = registry.lookups.get(lang)
ValueError: [E955] Can't find table(s) lexeme_norm for language 'en' in spacy-lookups-data. Make sure you have the package installed or provide your own lookup tables if no default lookups are available for your language.
I have installed the package:
pip install spacy-lookups-data
Collecting spacy-lookups-data
Downloading spacy_lookups_data-1.0.3-py2.py3-none-any.whl (98.5 MB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 98.5/98.5 MB 25.9 MB/s eta 0:00:00
But it still persists.
How can I fix this error to commence updating the model to detect new entitities for ner tasks?
EDIT
It got fixed when I restarted the kernel in jupyter notbook that this code ran in.
To answer the narrow question: you probably need to restart your runtime in order for the tables in spacy-lookups-data to be registered.
To answer the question you didn't ask: the quoted script looks like it was only partially updated from v2 and I wouldn't recommend using it, in particular not for en_core_web_trf. One recommended way to update ner components in spacy v3 pipelines is shown in this demo project:
https://github.com/explosion/projects/tree/v3/pipelines/ner_demo_update
It handles a lot of the pipeline/config/training details for you in order to update ner without affecting the performance of the other components in the pipeline. A walkthrough of how to run a project is shown in the v2->v3 examples README.
I've been running this code in Jupyter Notebook and the error persisted until I restarted the kernel. So the answer is to restart the notebook kernel.
I am using JupyterLab on AWS SageMaker. Kernel: conda_pytorch_latest_p36.
I have successfully performed training.
Now, I attempt to set up the model for predictions, i.e. testing.
I suspect last.ckpt file is corrupt; as it fails on line:
model = OntologyTaggerModel.load_from_checkpoint('last.ckpt.2cCC2f52', map_location=torch.device(device), from_checkpoint=True)
Where does last.ckpt file come from - BERT download or my own model definition?
How do I regenerate it?
Update: I was able to re-generate it: last.ckpt.E342d53e.
Run model load with last.ckpt.**E342d53e**:
RuntimeError: [enforce fail at inline_container.cc:145] . PytorchStreamReader failed reading zip archive: failed finding central directory
Run model load with last.ckpt (without unique string in filename):
FileNotFoundError: [Errno 2] No such file or directory: '/home/ec2-user/SageMaker/last.ckpt'
I launched a new AWS SageMaker instance without luck.
Suspect Code (2nd last line):
def get_device():
device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
return device
def preprocess(input, preprocessor):
result = [torch.tensor(preprocessor.tokenise(i)).unsqueeze(dim=0) for i in input]
result = torch.cat(result)
return result
def predict_fn(input, model_artifacts):
preprocessor, model, label_mapper = model_artifacts
# Pre-process
input_tensor = preprocess(input, preprocessor)
# Copy input to gpu if available
device = get_device()
input_tensor = input_tensor.to(device=device)
# Invoke
model.eval()
classes = []
probs = []
with torch.no_grad():
output_tensors = model(input_tensor)[1]
# Convert to probabilities
softmax = torch.nn.Softmax()
for class_index, output_tensor in enumerate(output_tensors):
output_tensor = softmax(output_tensor)
prob, predictions = torch.max(output_tensor, dim=1)
classes.append(label_mapper.reverse_map(predictions, class_index))
probs.append(prob)
classes = [c for c in zip(*classes)]
probs = [c for c in zip(*probs)]
return classes, probs
device = get_device()
tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
label_mapper = LabelMapper('classes.txt')
model = OntologyTaggerModel.load_from_checkpoint('last.ckpt.2cCC2f52', map_location=torch.device(device), from_checkpoint=True) # CRASH !
model = model.to(device)
Traceback:
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
<ipython-input-8-ba98e0974205> in <module>
36 tokenizer = AutoTokenizer.from_pretrained('bert-base-cased')
37 label_mapper = LabelMapper('classes.txt')
---> 38 model = OntologyTaggerModel.load_from_checkpoint('last.ckpt.2cCC2f52', map_location=torch.device(device), from_checkpoint=True)
39 model = model.to(device)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/core/saving.py in load_from_checkpoint(cls, checkpoint_path, map_location, hparams_file, strict, **kwargs)
131 """
132 if map_location is not None:
--> 133 checkpoint = pl_load(checkpoint_path, map_location=map_location)
134 else:
135 checkpoint = pl_load(checkpoint_path, map_location=lambda storage, loc: storage)
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/pytorch_lightning/utilities/cloud_io.py in load(path_or_url, map_location)
44 fs = get_filesystem(path_or_url)
45 with fs.open(path_or_url, "rb") as f:
---> 46 return torch.load(f, map_location=map_location)
47
48
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/serialization.py in load(f, map_location, pickle_module, **pickle_load_args)
585 # reset back to the original position.
586 orig_position = opened_file.tell()
--> 587 with _open_zipfile_reader(opened_file) as opened_zipfile:
588 if _is_torchscript_zip(opened_zipfile):
589 warnings.warn("'torch.load' received a zip file that looks like a TorchScript archive"
~/anaconda3/envs/pytorch_latest_p36/lib/python3.6/site-packages/torch/serialization.py in __init__(self, name_or_buffer)
240 class _open_zipfile_reader(_opener):
241 def __init__(self, name_or_buffer) -> None:
--> 242 super(_open_zipfile_reader, self).__init__(torch._C.PyTorchFileReader(name_or_buffer))
243
244
RuntimeError: [enforce fail at inline_container.cc:145] . PytorchStreamReader failed reading zip archive: failed finding central directory
Please let me know if I should add anything else.
After messing with this for quite a while, in Spark 2.3 I am finally able to get a pure python custom transformer saved. But I get an error while loading the transformer back.
I checked the content of what was saved and find all the relevant variable saved in the file on HDFS. Would be great if someone can spot what I am missing to do in this simple transformer.
from pyspark.ml import Transformer
from pyspark.ml.param.shared import Param,Params,TypeConverters
class AggregateTransformer(Transformer,DefaultParamsWritable,DefaultParamsReadable):
aggCols = Param(Params._dummy(), "aggCols", "",TypeConverters.toListString)
valCols = Param(Params._dummy(), "valCols", "",TypeConverters.toListString)
def __init__(self,aggCols,valCols):
super(AggregateTransformer, self).__init__()
self._setDefault(aggCols=[''])
self._set(aggCols = aggCols)
self._setDefault(valCols=[''])
self._set(valCols = valCols)
def getAggCols(self):
return self.getOrDefault(self.aggCols)
def setAggCols(self, aggCols):
self._set(aggCols=aggCols)
def getValCols(self):
return self.getOrDefault(self.valCols)
def setValCols(self, valCols):
self._set(valCols=valCols)
def _transform(self, dataset):
aggFuncs = []
for valCol in self.getValCols():
aggFuncs.append(F.sum(valCol).alias("sum_"+valCol))
aggFuncs.append(F.min(valCol).alias("min_"+valCol))
aggFuncs.append(F.max(valCol).alias("max_"+valCol))
aggFuncs.append(F.count(valCol).alias("cnt_"+valCol))
aggFuncs.append(F.avg(valCol).alias("avg_"+valCol))
aggFuncs.append(F.stddev(valCol).alias("stddev_"+valCol))
dataset = dataset.groupBy(self.getAggCols()).agg(*aggFuncs)
return dataset
I get this error when I load an instance of this transformer after saving it.
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-172-44e20f7e3842> in <module>()
----> 1 x = agg.load("/tmp/test")
/usr/hdp/current/spark2.3-client/python/pyspark/ml/util.py in load(cls, path)
309 def load(cls, path):
310 """Reads an ML instance from the input path, a shortcut of `read().load(path)`."""
--> 311 return cls.read().load(path)
312
313
/usr/hdp/current/spark2.3-client/python/pyspark/ml/util.py in load(self, path)
482 metadata = DefaultParamsReader.loadMetadata(path, self.sc)
483 py_type = DefaultParamsReader.__get_class(metadata['class'])
--> 484 instance = py_type()
485 instance._resetUid(metadata['uid'])
486 DefaultParamsReader.getAndSetParams(instance, metadata)
TypeError: __init__() missing 2 required positional arguments: 'aggCols' and 'valCols'
Figured out the answer!
The problem was that a new Transformer class was being initialized by the reader but the init function for my AggregateTransformer didnt have default values for the arguments.
So changing the following line of code fixed the issue!
def __init__(self,aggCols=[],valCols=[]):
Going to leave this answer and question here since it was incredibly difficult for me to find a working example of a pure python transformer that could be saved and read back anywhere! It could help someone looking for this.