I have this Python code to register a Google Cloud Storage (GCS) repository:
import requests
from grabconfig import grabconfig
(HOSTS, ign) = grabconfig()
reqHeaders = {'content-type' : 'application/json'}
for h in HOSTS:
url = f'http://{h}:9200'
r = requests.put(f'{url}/_snapshot/prod_backup2',
'''{ \"type\" : \"gcs\" }, { \"settings\" : { \"client\" : \"secondary\", \"bucket\" : \"prod_backup2\" },
{ \"compress\" : \"true\" }}''',
headers=reqHeaders)
print(r)
print(r.json())
r2 = requests.get(f'{url}/_cat/snapshots')
print(r2)
print(r2.json())
The configuration file I am using is the prod.py one:
HOSTS = ['10.x.x.x']
BACKUP_REPO = ['prod_backup2']
But when I run the code I get this error, always:
<Response [500]>
{'error': {'root_cause': [{'type': 'repository_exception', 'reason': '[prod_backup2] repository type [gcs] does not exist'}], 'type': 'repository_exception', 'reason': '[prod_backup2] repository type [gcs] does not exist'}, 'status': 500}
I think I found it: the gcs plugin was not installed on the server I was targeting.
That's supposed to be fixed by Monday, so I'm on to the next task.
I am trying to write a code which will read values from excel file and will create VMs in Google Cloud. I am facing problem at two locations, while creating tags if I use 'items': [tag] or while creating service account scope it starts giving me error.
import os, json
import googleapiclient.discovery
from google.oauth2 import service_account
import csv
credentials = service_account.Credentials.from_service_account_file('G:/python/json/mykids-280210.json')
compute = googleapiclient.discovery.build('compute', 'v1', credentials=credentials)
def create_instance(compute, vm_name, image_project, image_family, machinetype, startupscript, zone, network,
subnet, project, scope, tag):
# Get the latest Debian Jessie image.
image_response = compute.images().getFromFamily(
project=image_project, family=image_family).execute()
source_disk_image = image_response['selfLink']
# Configure the machine
machine_type = "zones/" + zone + "/machineTypes/" + machinetype
startup_script = startupscript
config = {
'name': vm_name,
'machineType': machine_type,
'description': 'This VM was created with python code',
'tags': {
'items': ['external', 'home', 'local'] #'items': [tag] <~~~~~~~~~~~
},
'deletionProtection': False,
'labels': {'env': 'dev', 'server': 'mytower', 'purpose': 'personal'},
# Specify the boot disk and the image to use as a source.
'disks': [
{
'boot': True,
'autoDelete': True,
'initializeParams': {
'sourceImage': source_disk_image,
}
}
],
# Specify a network interface with NAT to access the public
# internet.
'networkInterfaces': [{
'network': 'global/networks/' + network,
'subnetwork': 'regions/us-central1/subnetworks/' + subnet,
'accessConfigs': [
{'type': 'ONE_TO_ONE_NAT', 'name': 'External NAT'}
]
}],
# Allow the instance to access cloud storage and logging.
'serviceAccounts': [{
'email': 'default',
'scopes': [
#'https://www.googleapis.com/auth/devstorage.read_write', 'https://www.googleapis.com/auth/logging.write'
#scope # scope <~~~~~~~~~~~~~~~~~~~~
]
}],
'scheduling': {
"preemptible": True
},
# Metadata is readable from the instance and allows you to
# pass configuration from deployment scripts to instances.
'metadata': {
'items': [{
# Startup script is automatically executed by the
# instance upon startup.
'key': 'startup-script',
'value': startup_script
}]
}
}
return compute.instances().insert(
project=project,
zone=zone,
body=config).execute()
# [END create_instance]
with open('vms.csv', newline='') as csvfile:
data = csv.DictReader(csvfile)
for row in data:
vm_name = row['vm_name']
image_project = row['image_project']
image_family = row['image_family']
machinetype = row['machinetype']
startupscript = row['startupscript']
zone = row['zone']
network = row['network']
subnet = row['subnet']
project = row['project']
scope = row['scopes']
tag = row['tags']
print(create_instance(compute, vm_name, image_project, image_family, machinetype, startupscript, zone, network,
subnet, project, scope, tag))
csvfile.close()
error when use scope variable
G:\python\pythonProject\venv\Scripts\python.exe G:/python/pythonProject/read-excel-gcp/vm/create_vm.py
Traceback (most recent call last):
File "G:\python\pythonProject\read-excel-gcp\vm\create_vm.py", line 100, in <module>
print(create_instance(compute, vm_name, image_project, image_family, machinetype, startupscript, zone, network,
File "G:\python\pythonProject\read-excel-gcp\vm\create_vm.py", line 79, in create_instance
return compute.instances().insert(
File "G:\python\pythonProject\venv\lib\site-packages\googleapiclient\_helpers.py", line 134, in positional_wrapper
return wrapped(*args, **kwargs)
File "G:\python\pythonProject\venv\lib\site-packages\googleapiclient\http.py", line 915, in execute
raise HttpError(resp, content, uri=self.uri)
googleapiclient.errors.HttpError: <HttpError 400 when requesting https://compute.googleapis.com/compute/v1/projects/mykids-280210/zones/us-central1-a/instances?alt=json returned "One or more of the service account scopes are invalid: 'https://www.googleapis.com/auth/devstorage.read_write', 'https://www.googleapis.com/auth/logging.write''". Details: "One or more of the service account scopes are invalid: 'https://www.googleapis.com/auth/devstorage.read_write', 'https://www.googleapis.com/auth/logging.write''">
Process finished with exit code 1
I get a similar error when I use tag variable.
I have # the value the way I am passing in the above code.
Below is my csv file details
vm_name,image_project,image_family,machinetype,startupscript,zone,network,subnet,project,scopes,tags
python-vm1,debian-cloud,debian-9,e2-micro,G:/python/json/startup-script.sh,us-central1-a,myvpc,subnet-a,mykids-280210,"https://www.googleapis.com/auth/devstorage.read_write', 'https://www.googleapis.com/auth/logging.write'","external', 'home', 'local'"
python-vm2,debian-cloud,debian-9,e2-micro,G:/python/json/startup-script.sh,us-central1-a,myvpc,subnet-a,mykids-280210,"https://www.googleapis.com/auth/devstorage.read_write', 'https://www.googleapis.com/auth/logging.write'","external', 'home', 'local'"
I am not sure that when the value are passed directly it works, but when passing the value through variable, it fails.
I have marked the problem area with <~~~~~~~~~~~~
Please suggest if anyone understands the issue.
#d.s can you try changing your scope format to something like this:
'serviceAccounts': [
{
'email': 'default'
'scopes':[
'https://www.googleapis.com/auth/compute',
'https://www.googleapis.com/auth/servicecontrol',
'https://www.googleapis.com/auth/service.management.readonly',
'https://www.googleapis.com/auth/logging.write',
'https://www.googleapis.com/auth/monitoring.write',
'https://www.googleapis.com/auth/trace.append',
'https://www.googleapis.com/auth/devstorage.read_write']}]
The listed scopes are the default scopes that you will need for an instance. I think the problem you are facing is you where trying to only list two scopes which are not enough to allow you to deploy your instance.
I've followed the documentation pretty well as outlined here.
I've setup my azure machine learning environment the following way:
from azureml.core import Workspace
# Connect to the workspace
ws = Workspace.from_config()
from azureml.core import Environment
from azureml.core import ContainerRegistry
myenv = Environment(name = "myenv")
myenv.inferencing_stack_version = "latest" # This will install the inference specific apt packages.
# Docker
myenv.docker.enabled = True
myenv.docker.base_image_registry.address = "myazureregistry.azurecr.io"
myenv.docker.base_image_registry.username = "myusername"
myenv.docker.base_image_registry.password = "mypassword"
myenv.docker.base_image = "4fb3..."
myenv.docker.arguments = None
# Environment variable (I need python to look at folders
myenv.environment_variables = {"PYTHONPATH":"/root"}
# python
myenv.python.user_managed_dependencies = True
myenv.python.interpreter_path = "/opt/miniconda/envs/myenv/bin/python"
from azureml.core.conda_dependencies import CondaDependencies
conda_dep = CondaDependencies()
conda_dep.add_pip_package("azureml-defaults")
myenv.python.conda_dependencies=conda_dep
myenv.register(workspace=ws) # works!
I have a score.py file configured for inference (not relevant to the problem I'm having)...
I then setup inference configuration
from azureml.core.model import InferenceConfig
inference_config = InferenceConfig(entry_script="score.py", environment=myenv)
I setup my compute cluster:
from azureml.core.compute import ComputeTarget, AksCompute
from azureml.exceptions import ComputeTargetException
# Choose a name for your cluster
aks_name = "theclustername"
# Check to see if the cluster already exists
try:
aks_target = ComputeTarget(workspace=ws, name=aks_name)
print('Found existing compute target')
except ComputeTargetException:
print('Creating a new compute target...')
prov_config = AksCompute.provisioning_configuration(vm_size="Standard_NC6_Promo")
aks_target = ComputeTarget.create(workspace=ws, name=aks_name, provisioning_configuration=prov_config)
aks_target.wait_for_completion(show_output=True)
from azureml.core.webservice import AksWebservice
# Example
gpu_aks_config = AksWebservice.deploy_configuration(autoscale_enabled=False,
num_replicas=3,
cpu_cores=4,
memory_gb=10)
Everything succeeds; then I try and deploy the model for inference:
from azureml.core.model import Model
model = Model(ws, name="thenameofmymodel")
# Name of the web service that is deployed
aks_service_name = 'tryingtodeply'
# Deploy the model
aks_service = Model.deploy(ws,
aks_service_name,
models=[model],
inference_config=inference_config,
deployment_config=gpu_aks_config,
deployment_target=aks_target,
overwrite=True)
aks_service.wait_for_deployment(show_output=True)
print(aks_service.state)
And it fails saying that it can't find the environment. More specifically, my environment version is version 11, but it keeps trying to find an environment with a version number that is 1 higher (i.e., version 12) than the current environment:
FailedERROR - Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: 0f03a025-3407-4dc1-9922-a53cc27267d4
More information can be found here:
Error:
{
"code": "BadRequest",
"statusCode": 400,
"message": "The request is invalid",
"details": [
{
"code": "EnvironmentDetailsFetchFailedUserError",
"message": "Failed to fetch details for Environment with Name: myenv Version: 12."
}
]
}
I have tried to manually edit the environment JSON to match the version that azureml is trying to fetch, but nothing works. Can anyone see anything wrong with this code?
Update
Changing the name of the environment (e.g., my_inference_env) and passing it to InferenceConfig seems to be on the right track. However, the error now changes to the following
Running..........
Failed
ERROR - Service deployment polling reached non-successful terminal state, current service state: Failed
Operation ID: f0dfc13b-6fb6-494b-91a7-de42b9384692
More information can be found here: https://some_long_http_address_that_leads_to_nothing
Error:
{
"code": "DeploymentFailed",
"statusCode": 404,
"message": "Deployment not found"
}
Solution
The answer from Anders below is indeed correct regarding the use of azure ML environments. However, the last error I was getting was because I was setting the container image using the digest value (a sha) and NOT the image name and tag (e.g., imagename:tag). Note the line of code in the first block:
myenv.docker.base_image = "4fb3..."
I reference the digest value, but it should be changed to
myenv.docker.base_image = "imagename:tag"
Once I made that change, the deployment succeeded! :)
One concept that took me a while to get was the bifurcation of registering and using an Azure ML Environment. If you have already registered your env, myenv, and none of the details of the your environment have changed, there is no need re-register it with myenv.register(). You can simply get the already register env using Environment.get() like so:
myenv = Environment.get(ws, name='myenv', version=11)
My recommendation would be to name your environment something new: like "model_scoring_env". Register it once, then pass it to the InferenceConfig.
I have written a python script in my local machine and trying to run it and getting below error:
Error
{'error': {'code': 400,
'details': [{'#type': 'type.googleapis.com/google.rpc.Help',
'links': [{'description': 'Google developer console '
'API key',
'url': 'https://console.developers.google.com/project/[project_id]/apiui/credential'}]}],
'message': 'The API Key and the authentication credential are from '
'different projects.',
'status': 'INVALID_ARGUMENT'}}
python script to Build trigger
bashCommand = "gcloud auth print-access-token"
process = subprocess.Popen(bashCommand.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
print(error)
headers = {
'Authorization' : 'Bearer '+str(output)[2:-3],
'Accept' : 'application/json',
'Content-Type' : 'application/json'
}
cloudbuild = {"build":
{"source":
{"repoSource":
{"projectId":"[PROJECT_ID]",
"repoName":"[repoName]",
"branchName":".*"
}
}
},
"description":"API TRigger for all branch",
"name":"[TRIGGER NAME]"
}
data = json.dumps(cloudbuild)
response = requests.post('https://cloudbuild.googleapis.com/v1/projects/[PROJECT_ID]/triggers?key=[API KEY]', headers=headers, data=data)
results_output = response.json()
pprint(results_output)
I also set the project in my local machine
gcloud config set project [project-name]
please give some solution for this.
Thanks in advance.
I removed API Key from request
Access-token is enough to run the above python script
I'm creating stack instance, using python boto3 SDK. According to the documentation I should be able to use ParameterOverrides but I'm getting following error..
botocore.exceptions.ParamValidationError: Parameter validation failed:
Unknown parameter in input: "ParameterOverrides", must be one of: StackSetName, Accounts, Regions, OperationPreferences, OperationId
Environment :
aws-cli/1.11.172 Python/2.7.14 botocore/1.7.30
imports used
import boto3
import botocore
Following is the code
try:
stackset_instance_response = stackset_client.create_stack_instances(
StackSetName=cloudtrail_stackset_name,
Accounts=[
account_id
],
Regions=[
stack_region
],
OperationPreferences={
'RegionOrder': [
stack_region
],
'FailureToleranceCount': 0,
'MaxConcurrentCount': 1
},
ParameterOverrides=[
{
'ParameterKey': 'CloudtrailBucket',
'ParameterValue': 'test-bucket'
},
{
'ParameterKey': 'Environment',
'ParameterValue': 'SANDBOX'
},
{
'ParameterKey': 'IsCloudTrailEnabled',
'ParameterValue': 'NO'
}
]
)
print("Stackset create Response : " + str(stackset_instance_response))
operation_id = stackset_instance_response['OperationId']
print (operation_id)
except botocore.exceptions.ClientError as e:
print("Stackset creation error : " + str(e))
I'm not sure where I'm doing wrong, any help would be greatly appreciated.
Thank you.
1.8.0 is the first version of Botocore that has parameteroverrides defined.
https://github.com/boto/botocore/blob/1.8.0/botocore/data/cloudformation/2010-05-15/service-2.json#L1087-L1090
1.7.30 doesn't have that defined. https://github.com/boto/botocore/blob/1.7.30/botocore/data/cloudformation/2010-05-15/service-2.json