create registered model in the "staging" state - mlflow

Currently, when I add a model to the MLFlow registry, the default state is "None"- I wonder if it is possible to create the model directly in "Staging" or "Production". Currently, I am registering the model as:
def log_model(
model,
artifact_path,
# conda_env=None,
# code_paths=None,
registered_model_name=None,
# signature: ModelSignature = None,
# input_example: ModelInputExample = None,
# pip_requirements=None,
# extra_pip_requirements=None,
**kwargs,
):
return Model.log(
proj_model=model,
artifact_path=str(artifact_path),
flavor=proj.mlflow.flavor,
registered_model_name=registered_model_name,
**kwargs,
)
I wonder if the staging can be specified somehow already in the log command.

Related

Hyperparameter-Search while adding Special tokens

# define get_model function
def get_model(params):
db_config = config
if params is not None:
db_config.update({'attention_probs_dropout_prob': params['attention_drop_out'],
'hidden_dropout_prob': params['hidden_drop_out']
})
model = AutoModelForSequenceClassification.from_pretrained(
model_args.model_name_or_path,
config=db_config,
cache_dir=model_args.cache_dir,
revision=model_args.model_revision,
use_auth_token=True if model_args.use_auth_token else None,
ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
)
if special_tokens is not None:
model.resize_token_embeddings(len(tokenizer))
# setup label_to_id
model.config.label2id = label_to_id
model.config.id2label = {
id: label for label, id in config.label2id.items()}
return model
def ray_hp_space(trial):
return {
"attention_drop_out": tune.uniform(0.1, 0.5),
"hidden_drop_out": tune.uniform(0.1, 0.5),
"learning_rate": tune.uniform(1e-5, 2e-5),
"weight_decay": tune.uniform(0.005, 0.01),
"gradient_accumulation_steps": tune.choice([1, 2, 4]),
"label_smoothing_factor": tune.choice([.7,.8,.9,.91])
}
trainer = Trainer(
model_init=get_model,
args=training_args,
train_dataset=train_dataset if training_args.do_train else None,
eval_dataset=validation_dataset if training_args.do_eval else None,
compute_metrics=compute_metrics,
tokenizer=tokenizer,
data_collator=data_collator,
callbacks = [EarlyStoppingCallback(early_stopping_patience=7)]
scheduler = ASHAScheduler(
metric="f1",
mode="max",
max_t=1,
grace_period=1,
reduction_factor=2)
reporter = CLIReporter(
parameter_columns={
"weight_decay": "w_decay",
"learning_rate": "lr",
"gradient_accumulation_steps": "gradient_accum_steps",
"label_smoothing_factor": "label_smooth",
"hidden_drop_out": "hidden_drop_out",
"attention_drop_out": "attention_drop_out"
},
metric_columns=[
"eval_accuracy", "eval_f1", "eval_loss", "steps"
])
best_trail = trainer.hyperparameter_search(direction="maximize",
backend='ray',
hp_space=ray_hp_space,
n_trials=1,
resources_per_trial={"cpu":2, "gpu":1},
scheduler=scheduler,
keep_checkpoints_num=1,
checkpoint_score_attr="training_iteration",
progress_reporter=reporter,
local_dir="experiments/ray-tune-results/"
)
The problem is at some point in the training, create a new instance from the original config (without additional special tokens) and try to copy weights from the current instance which has additional special tokens) and it throws a mismatch errors ? It looks I have to force the model creating the first instance from the original config and after that he has to start use the current config (with additional special tokens). How to fix that ?

In Django REST framework's authentication, how do I set the default access scheme to allow all endpoints to not require authentication?

I'm using Django 3.2 and djangorestframework==3.12.2. I recently added this to my settings file because I want to add some secured endpoints to my application ...
REST_FRAMEWORK = {
'DEFAULT_PERMISSION_CLASSES': [
'rest_framework.permissions.IsAuthenticated',
'rest_framework.permissions.IsAdminUser',
],
'DEFAULT_AUTHENTICATION_CLASSES': (
'rest_framework_jwt.authentication.JSONWebTokenAuthentication',
)
}
JWT_AUTH = {
# how long the original token is valid for
'JWT_EXPIRATION_DELTA': datetime.timedelta(hours=1),
}
However, this seems to have caused all my endpoints to require authentication. For example, I had this view set up in my views.py file
class CoopList(APIView):
"""
List all coops, or create a new coop.
"""
def get(self, request, format=None):
contains = request.GET.get("contains", "")
if contains:
coops = Coop.objects.find(
partial_name=contains,
enabled=True
)
else:
partial_name = request.GET.get("name", "")
enabled_req_param = request.GET.get("enabled", None)
enabled = enabled_req_param.lower() == "true" if enabled_req_param else None
city = request.GET.get("city", None)
zip = request.GET.get("zip", None)
street = request.GET.get("street", None)
state = request.GET.get("state", None)
coop_types = request.GET.get("coop_type", None)
types_arr = coop_types.split(",") if coop_types else None
coops = Coop.objects.find(
partial_name=partial_name,
enabled=enabled,
street=street,
city=city,
zip=zip,
state_abbrev=state,
types_arr=types_arr
)
serializer = CoopSearchSerializer(coops, many=True)
return Response(serializer.data)
accessible in my urls.py file using
path('coops/', views.CoopList.as_view()),
But now when I try and call that I get the below response
{"detail":"Authentication credentials were not provided."}
I only want certain views/endpoints secured. How do I make the default that all views are accessible and only specify some views/endpoints to be validated using a provided JWT?
'DEFAULT_PERMISSION_CLASSES' is conventiently applied to all views, unless manually overridden. In your case both listed permissions require the user to be authenticated. FYI, the list is evaluated in an OR fashion.
If you want to allow everyone by default and only tighten down specific views, you want to set
'DEFAULT_PERMISSION_CLASSES': ['rest_framework.permissions.AllowAny']
which does not require the user to be authenticated. Then set more strict permissions explicitly on the view (e.g. permissions_classes = [IsAuthenticated]) The DEFAULT_AUTHENTICATION_CLASS can stay as is.
NOTE: It is generally advisable to do it the other way round. It's very easy to accidentally expose an unsecured endpoint like this and potentially create a security breach in your API. The default should be secure and then exceptions should be be manually lifted.
Set the below configuration in settings.py
'DEFAULT_PERMISSION_CLASSES': [
'rest_framework.permissions.IsAuthenticated',
]
For class based views you can set permission class to empty list.
class CoopList(APIView):
permission_classes = []
def get(self, request, format=None):
pass
For Function based views add the decorator #permission_classes
from rest_framework.decorators import permission_classes
#permission_classes([])
def CoopList(request, format=None):
pass

How to load a specific catalog dataset instance in kedro 0.17.0?

We were using kedro version 0.15.8 and we were loading one specific item from the catalog this way:
from kedro.context import load_context
get_context().catalog.datasets.__dict__[key]
Now, we are changing to kedro 0.17.0 and trying to load the catalogs datasets the same way(using the framework context):
from kedro.framework.context import load_context
get_context().catalog.datasets.__dict__[key]
And now we get the error:
kedro.framework.context.context.KedroContextError: Expected an instance of ConfigLoader, got NoneType instead.
It's because the hook register_config_loader from the project it's not being used by the hook_manager that calls the function.
The project hooks are the defined the following way:
class ProjectHooks:
#hook_impl
def register_pipelines(self) -> Dict[str, Pipeline]:
"""Register the project's pipeline.
Returns:
A mapping from a pipeline name to a ``Pipeline`` object.
"""
pm = pre_master.create_pipeline()
return {
"pre_master": pm,
"__default__": pm
}
#hook_impl
def register_config_loader(self, conf_paths: Iterable[str]) -> ConfigLoader:
return ConfigLoader(conf_paths)
#hook_impl
def register_catalog(
self,
catalog: Optional[Dict[str, Dict[str, Any]]],
credentials: Dict[str, Dict[str, Any]],
load_versions: Dict[str, str],
save_version: str,
journal: Journal,
) -> DataCatalog:
return DataCatalog.from_config(
catalog, credentials, load_versions, save_version, journal
)
project_hooks = ProjectHooks()
And the settings are called the following way:
"""Project settings."""
from price_based_trading.hooks import ProjectHooks
HOOKS = (ProjectHooks(),)
How can we configure that in a way that the hooks are used calling the method load_context(_working_dir).catalog.datasets ?
I posted the same question in the kedro community: https://discourse.kedro.community/t/how-to-load-a-specific-catalog-item-in-kedro-0-17-0/310
It was a silly mistake because I was not creating the Kedro session. To load an item of the catalog it can be done with the following code:
from kedro.framework.session import get_current_session
from kedro.framework.session import KedroSession
KedroSession.create("name_of_proyect") as session:
key = "item_of_catalog"
session = get_current_session()
context = session.load_context()
kedro_connector = context.catalog.datasets.__dict__[key]
// or kedro_connector = context.catalog._get_datasets(key)

SecretManagerServiceClient in Google Cloud Run and authentication via service account

I can create a SecretManagerServiceClient without using a key file successfully in Google Cloud Shell:
from google.cloud import secretmanager
from google.oauth2 import service_account
from google.auth.exceptions import DefaultCredentialsError
import logging
import sys
import os
def list_secrets(client, project_id):
"""
Retrieve all secrets associated with a project
:param project_id: the alpha-numeric name of the project
:return: a generator of Secrets
"""
try:
secret_list = client.list_secrets(request={"parent": "projects/{}".format(project_id)})
except Exception as e:
sys.exit("Did not successfully retrieve secret list.")
return secret_list
def set_env_secrets(client, secret_ids, label=None):
"""
Sets secrets retrieved from Google Secret Manager in the runtime environment
of the Python process
:param secret_ids: a generator of Secrets
:param label: Secrets with this label will be set in the environment
"""
for s in secret_ids:
# we only want secrets with matching labels (or all of them if label wasn't specified)
if not label or label in s.labels:
version = client.access_secret_version(request={'name': '{}/versions/latest'.format(s.name)})
payload_str = version.payload.data.decode("UTF-8")
os.environ[s.name.split('/')[-1]] = payload_str
if __name__ == "__main__":
client = secretmanager.SecretManagerServiceClient()
secrets = list_secrets(client, "myprojectid-123456")
set_env_secrets(client, secrets)
print(os.getenv("DATA_DB_HOST"))
However, when I use similar code as the basis for an entry point of a container in Google Cloud Run, the attempt to retrieve a client using the default service account's credentials fails with
File "entry_point.py", line 27, in get_client
client = secretmanager.SecretManagerServiceClient()
File "/usr/local/lib/python3.6/site-packages/google/cloud/secretmanager_v1/services/secret_manager_service/client.py", line 274, in __init__
client_info=client_info,
File "/usr/local/lib/python3.6/site-packages/google/cloud/secretmanager_v1/services/secret_manager_service/transports/grpc.py", line 162, in __init__
scopes=self.AUTH_SCOPES, quota_project_id=quota_project_id
File "/usr/local/lib/python3.6/site-packages/google/auth/_default.py", line 340, in default
credentials, project_id = checker()
File "/usr/local/lib/python3.6/site-packages/google/auth/_default.py", line 186, in _get_explicit_environ_credentials
os.environ[environment_vars.CREDENTIALS]
File "/usr/local/lib/python3.6/site-packages/google/auth/_default.py", line 97, in load_credentials_from_file
"File {} was not found.".format(filename)
google.auth.exceptions.DefaultCredentialsError: File was not found.
The default service account has the Editor and Secret Manager Admin roles (thanks to #DanielOcando for his comment). Why is it that the ADC library, as described here does not pick up the permissions of the default service account and use them to instantiate the client?
Update 1
#guillaumeblaquiere asked about dependencies. The container is built with Python 3.6.12 and the following libraries:
Django==2.1.15
django-admin-rangefilter==0.3.7
django-extensions==2.1.2
django-ipware==1.1.6
pytz==2017.3
psycopg2==2.7.3.2
waitress==1.4.1
geoip2==2.6
gunicorn==19.9.0
social-auth-app-django==3.1.0
semver==2.8.1
sentry-sdk==0.6.9
google-api-core==1.23.0
google-auth==1.23.0
google-cloud-secret-manager==2.0.0
I created a custom service account, added Editor and Secret Manager Admin roles to it, and then used the Console to deploy a new revision with that account, but the same error resulted.
Update 2
Thinking that matching the CPython version in Cloud Shell would do the trick, I rebuilt the image with Python 3.7. No luck.
Update 3
Taking a different tack, I added Service Account Token Creator role to the default service account of the project and created a terraform file and configured it for service account impersonation. I also ran gcloud auth application-default login in the shell prior to invoking terraform.
provider "google" {
alias = "tokengen"
}
data "google_client_config" "default" {
provider = google.tokengen
}
data "google_service_account_access_token" "sa" {
provider = "google.tokengen"
target_service_account = "XXXXXXXXXXXX-compute#developer.gserviceaccount.com"
lifetime = "600s"
scopes = [
"https://www.googleapis.com/auth/cloud-platform",
]
}
provider "google" {
project = "myprojectid-123456"
region = "us-central1"
zone = "us-central1-f"
#impersonate_service_account = "XXXXXXXXXXXX-compute#developer.gserviceaccount.com
}
resource "google_cloud_run_service" "default" {
name = "myprojectid-123456"
location = "us-central1"
template {
spec {
containers {
image = "us.gcr.io/myprojectid-123456/testimage"
}
}
}
traffic {
percent = 100
latest_revision = true
}
}
This did work to create the service, but again, when the endpoint attempted to instantiate SecretManagerServiceClient, the same error resulted.

Flask/Flasgger - Document does not appear if `endpoint` parameter is set

I have a Blueprint which I wrote an OpenAPI documentation for. Without the endpoint definition, it's working just fine but it doesn't with the endpoint definition.
Working code:
#my_blueprint.route('/')
#swag_from('open_api/root.yml')
def main():
return str('This is the root api')
Not Working (notice how I defined the endpoint in parameters):
#my_blueprint.route('/', endpoint='foo')
#swag_from('open_api/root.yml', endpoint='foo')
def main():
return str('This is the root api')
You have working code, why'd you ask?
The use case for me is when I have multi-endpoint for just a single function which I have to define multiple yml file for each docs.
#my_blueprint.route('/', endpoint='foo')
#my_blueprint.route('/<some_id>', endpoint='foo_with_id')
#swag_from('open_api/root.yml', endpoint='foo')
#swag_from('open_api/root_with_id.yml', endpoint='foo_with_id')
def main(some_id):
if (some_id):
return str('Here's your ID')
return str('This is the root api')
Setting an endpoint in #swag_from should also contain the name of the Blueprint. Example: #swag_from('my_yaml.yml', endpoint='{}.your_endpoint'.format(my_blueprint.name))
Full example:
#my_blueprint.route('/', endpoint='foo') # endpoint is foo
#my_blueprint.route('/<some_id>', endpoint='foo_with_id') # endpoint is foo_with_id
#swag_from('open_api/root.yml', endpoint='{}.foo'.format(my_blueprint.name)) # blueprint is set as the prefix for the endpoint
#swag_from('open_api/root_with_id.yml', endpoint='{}.foo_with_id'.format(my_blueprint.name)) # same goes here
def main(some_id):
if (some_id):
return str("Here's your ID")
return str('This is the root api')

Resources