Python unknown number of commandline arguments in boto3 - python-3.x

I am trying to add tags based on commandline arguments passed to python script something like below:
./snapshot-create.py --id abcd --key1 Env --value1 Test
The script is like below:
client = boto3.client('ec2')
response = client.create_tags(
Resources=[
ID,
],
Tags=[
{
'Key': 'key1',
'Value': 'value1'
},
]
)
I want to use --key1 and --values as Tags as above but the problem is that there could be more than one tags that need to be added like:
./snapshot-create.py --id abcd --key1 Env --value1 Test -key2 Loca --value2 US -key1 Size --value1 small ...
How would I use those key-values if their number of arguments is not fixed.
I don't mind using function or any other way than what I came up with.

One option would be loading a json string as a dictionary and iterating it when creating the tags.
For example, consider this invocation:
$ my_script.py --tags "{'tag1': 'value1', 'tag2': 'value2'}" --id i-1234567890 i-0987654321
and this code snippet:
import json
import boto3
import argparse
parser.add_argument('-t', '--tags', type=str)
parser.add_argument('-i', '--id', nargs='+')
args = parser.parse_args()
client = boto3.client('ec2')
def create_tags(key, value, resources, c):
c.create_tags(
Resources=
resources,
,
Tags=[
{
'Key': key,
'Value': value
},
]
)
my_tags = json.loads(args.tags) # {'tag1': 'value1', 'tag2': 'value2'}
resources = args.id # ['i-1234567890', 'i-0987654321']
for k, v in my_tags.items():
create_tags(k, v, resources, client)
This should cause instances i-1234567890 & i-0987654321 to be tagged with both tags tag1 and tag2 described in --tags above.
If you require a more dynamic interface for resources as well, consider adding it to the json as such:
{ 'instance_id': [{'tag_key': 'tag_value'} ... ] ... }
You can the take a single argument --tags which will contain a mapping of resources and tags, instead of the above example where resources is statically mapped to the tags.
Pretty sure there are better, more pythonic, solutions than this though - this is one viable solution.

Related

How to use Ansible Python API to run ansible task

I would like to use Ansible 2.9.9 Python API to get config file and parse it to json format from servers in hosts file.
I don't know how to call an existing ansible task using Python API.
Through the Ansible API document, how to integrate ansible task with the sample code.
Sample.py
#!/usr/bin/env python
import json
import shutil
from ansible.module_utils.common.collections import ImmutableDict
from ansible.parsing.dataloader import DataLoader
from ansible.vars.manager import VariableManager
from ansible.inventory.manager import InventoryManager
from ansible.playbook.play import Play
from ansible.executor.task_queue_manager import TaskQueueManager
from ansible.plugins.callback import CallbackBase
from ansible import context
import ansible.constants as C
class ResultCallback(CallbackBase):
"""A sample callback plugin used for performing an action as results come in
If you want to collect all results into a single object for processing at
the end of the execution, look into utilizing the ``json`` callback plugin
or writing your own custom callback plugin
"""
def v2_runner_on_ok(self, result, **kwargs):
"""Print a json representation of the result
This method could store the result in an instance attribute for retrieval later
"""
host = result._host
print(json.dumps({host.name: result._result}, indent=4))
# since the API is constructed for CLI it expects certain options to always be set in the context object
context.CLIARGS = ImmutableDict(connection='local', module_path=['/to/mymodules'], forks=10, become=None,
become_method=None, become_user=None, check=False, diff=False)
# initialize needed objects
loader = DataLoader() # Takes care of finding and reading yaml, json and ini files
passwords = dict(vault_pass='secret')
# Instantiate our ResultCallback for handling results as they come in. Ansible expects this to be one of its main display outlets
results_callback = ResultCallback()
# create inventory, use path to host config file as source or hosts in a comma separated string
inventory = InventoryManager(loader=loader, sources='localhost,')
# variable manager takes care of merging all the different sources to give you a unified view of variables available in each context
variable_manager = VariableManager(loader=loader, inventory=inventory)
# create data structure that represents our play, including tasks, this is basically what our YAML loader does internally.
play_source = dict(
name = "Ansible Play",
hosts = 'localhost',
gather_facts = 'no',
tasks = [
dict(action=dict(module='shell', args='ls'), register='shell_out'),
dict(action=dict(module='debug', args=dict(msg='{{shell_out.stdout}}')))
]
)
# Create play object, playbook objects use .load instead of init or new methods,
# this will also automatically create the task objects from the info provided in play_source
play = Play().load(play_source, variable_manager=variable_manager, loader=loader)
# Run it - instantiate task queue manager, which takes care of forking and setting up all objects to iterate over host list and tasks
tqm = None
try:
tqm = TaskQueueManager(
inventory=inventory,
variable_manager=variable_manager,
loader=loader,
passwords=passwords,
stdout_callback=results_callback, # Use our custom callback instead of the ``default`` callback plugin, which prints to stdout
)
result = tqm.run(play) # most interesting data for a play is actually sent to the callback's methods
finally:
# we always need to cleanup child procs and the structures we use to communicate with them
if tqm is not None:
tqm.cleanup()
# Remove ansible tmpdir
shutil.rmtree(C.DEFAULT_LOCAL_TMP, True)
sum.yml : generated summary file for each host
- hosts: staging
tasks:
- name: pt_mysql_sum
shell: PTDEST=/tmp/collected;mkdir -p $PTDEST;cd /tmp;wget percona.com/get/pt-mysql-summary;chmod +x pt*;./pt-mysql-summary -- --user=adm --password=***** > $PTDEST/pt-mysql-summary.txt;cat $PTDEST/pt-mysql-summary.out;
register: result
environment:
http_proxy: http://proxy.example.com:8080
https_proxy: https://proxy.example.com:8080
- name: ansible_result
debug: var=result.stdout_lines
- name: fetch_log
fetch:
src: /tmp/collected/pt-mysql-summary.txt
dest: /tmp/collected/pt-mysql-summary-{{ inventory_hostname }}.txt
flat: yes
hosts file
[staging]
vm1 ansible_ssh_host=10.40.50.41 ansible_ssh_user=testuser ansible_ssh_pass=*****

How to export metrics from a containerized component in kubeflow pipelines 0.2.5

I have a pipeline made up out of 3 containerized components. In the last component I write the metrics I want to a file named /mlpipeline-metrics.json, just like it's explained here.
This is the Python code I used.
metrics = {
'metrics': [
{
'name': 'accuracy',
'numberValue': accuracy,
'format': 'PERCENTAGE',
},
{
'name': 'average-f1-score',
'numberValue': average_f1_score,
'format': 'PERCENTAGE'
},
]
}
with open('/mlpipeline-metrics.json', 'w') as f:
json.dump(metrics, f)
I also tried writing the file with the following code, just like in the example linked above.
with file_io.FileIO('/mlpipeline-metrics.json', 'w') as f:
json.dump(metrics, f)
The pipeline runs just fine without any errors. But it won't show the metrics in the front-end UI.
I'm thinking it has something to do with the following codeblock.
def metric_op(accuracy, f1_scores):
return dsl.ContainerOp(
name='visualize_metrics',
image='gcr.io/mgcp-1190085-asml-lpd-dev/kfp/jonas/container_tests/image_metric_comp',
arguments=[
'--accuracy', accuracy,
'--f1_scores', f1_scores,
]
)
This is the code I use to create a ContainerOp from the containerized component. Notice I have not specified any file_outputs.
In other ContainerOp I have to specify file_outputs to be able to pass variables to the next steps in the pipeline. Should I do something similar here to map the /mlpipeline-metrics.json onto something so that kubeflow pipelines detects it?
I'm using a managed AI platform pipelines deployment running Kubeflow Pipelines 0.2.5 with Python 3.6.8.
Any help is appreciated.
So after some trial and error I finally came to a solution. And I'm happy to say that my intuition was right. It did have something to do with the file_outputs I didn't specify.
To be able to export your metrics you will have to set file_outputs as follows.
def metric_op(accuracy, f1_scores):
return dsl.ContainerOp(
name='visualize_metrics',
image='gcr.io/mgcp-1190085-asml-lpd-dev/kfp/jonas/container_tests/image_metric_comp',
arguments=[
'--accuracy', accuracy,
'--f1_scores', f1_scores,
],
file_outputs={
'mlpipeline-metrics': '/mlpipeline-metrics.json'
}
)
Here is another way of showing metrics when you write python functions based method:
# Define your components code as standalone python functions:======================
def add(a: float, b: float) -> NamedTuple(
'AddOutput',
[
('sum', float),
('mlpipeline_metrics', 'Metrics')
]
):
'''Calculates sum of two arguments'''
sum = a+b
metrics = {
'add_metrics': [
{
'name': 'sum',
'numberValue': float(sum),
}
]
}
print("Add Result: ", sum) # this will print it online in the 'main-logs' of each task
from collections import namedtuple
addOutput = namedtuple(
'AddOutput',
['sum', 'mlpipeline_metrics'])
return addOutput(sum, metrics) # the metrics will be uploaded to the cloud
Note: I am jsut using a basci function here. I am not using your function.

HOW-TO push/pull to/from Airflow X_COM with spark_task and pythonOperator?

I have a dag that creates a spark-task and executes a certain script located in a particular directory. There are two tasks like this. Both of these tasks need to receive the same ID generated in the DAG file before these tasks are executed. If I simply store and pass a value solely via the python script, the IDs are different, which is normal. So I am trying to push the value to XCOM with a PythonOperator and task.
I need to pull the values from XCOM and update a 'params' dictionary with that information in order to be able to pass it to my spark task.
Could you please help me, i am hitting my head in the wall and just can't figure it out.
I tried the following:
create a function just to retrieve the data from xcom and the return it. Assigned this function to the params variable, but doesn't work. I cannot return from a python function inside the DAG which uses the xcom_pull function
tried assigning an empty list and appending to it from the python function. and then the final list to provide directly to my spark task. Doesn't work either. Please help!
Thanks a lot in advance for any help related to this. I will need this value the same for this and multiple other spark tasks that may come into the same DAG file.
DAG FILE
import..
from common.base_tasks import spark_task
default_args = {
'owner': 'airflow',
'start_date': days_ago(1),
'email_on_failure': True,
'email_on_retry': False,
}
dag = DAG(
dag_id='dag',
default_args=default_args,
schedule_interval=timedelta(days=1)
)
log_level = "info"
id_info = {
"id": str(uuid.uuid1()),
"start_time": str(datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S,%f'))
}
# this stores the value to XCOM successfully
def store_id(**kwargs):
kwargs['ti'].xcom_push(key='id_info', value=id_info)
store_trace_task = PythonOperator(task_id='store_id', provide_context=True, python_callable=store_id, dag=dag)
extra_config = {'log_level': log_level}
config = '''{"config":"data"}'''
params = {'config': config,'extra_config': json.dumps(extra_config}
# ---------- this doesn't work ----------
pars = []
pars.append(params)
def task1_pull_params(**kwargs):
tracing = kwargs['ti'].xcom_pull(task_ids='store_trace_task')
pars.append(tracing)
# params = {
# 'parsed_config': parsed_config,
# 'extra_config': json.dumps(extra_config),
# 'trace_data': tracing
# }
# return params # return pushes to xcom, xcom_push does the same
task1_pull_params = PythonOperator(task_id='task1_pull_params', provide_context=True, python_callable=task1_pull_params, dag=dag)
store_trace_task >> task1_pull_params
# returning value from the function and calling it to assign res to the params variable below also doesn't work
# params = task1_pull_params
# this prints only what's outside of the function, i.e. params
print("===== pars =====> ", pars)
pipeline_task1 = spark_task(
name='task1',
script='app.py',
params=params,
dag=dag
)
task1_pull_params >> pipeline_task1

How to save the output of Azure-cli commands in a variable

When using azure-cli in python 3.5 and calling the commands from a script I have no control on the output in the console.
When a command is executed it prints the result to the console, but I'm struggling to just take the result and put it in a variable to analyze it.
from azure.cli.core import get_default_cli
class AzureCmd():
def __init__(self, username, password):
self.username = username
self.password = password
def login(self, tenant):
login_successfull = get_default_cli().invoke(['login',
'--tenant', tenant,
'--username', self.username,
'--password', self.password]) == 0
return login_successfull
def list_vm(self, tenant):
list_vm = get_default_cli().invoke(['vm', 'list', '--output', 'json'])
print(list_vm)
tenant = 'mytenant.onmicrosoft.com'
cmd = AzureCmd('login', 'mypassword')
cmd.login(tenant)
cmd.list_vm(tenant)
Here is my my script attempt.
What I want to achieve : not getting any output when cmd.login(tenant) is executed.
Instead of getting 0 (success) or 1 (failure) in my variables login_successfull and list_vm, I want to save the output of the get_default_cli().invoke() in it.
I ran into the same problem, and found a solution, I also found out many people offered the standard solution that normally works in most cases, but they didn't verify it works for this scenario, and it turns out az cli is an edge case.
I think the issue has something to do with az cli is based on python.
Win10CommandPrompt:\> where az
C:\Program Files (x86)\Microsoft SDKs\Azure\CLI2\wbin\az.cmd
If you look in that file you'll see something like this and discover that Azure CLI is just python:
python.exe -IBm azure.cli
So to do what you want to do, try this (it works for me):
import subprocess
out = subprocess.run(['python', '-IBm', 'azure.cli', '-h'], stdout=subprocess.PIPE).stdout.decode('utf-8')
print(out)
#this is equivalent to "az -h'
The above syntax won't work unless every single arg is a comma separated list of strings, I found a syntax I like alot more after reading how to do multiple args with python popen:
import subprocess
azcmd = "az ad sp create-for-rbac --name " + SPName + " --scopes /subscriptions/" + subscriptionid
out = subprocess.run(azcmd, shell=True, stdout=subprocess.PIPE).stdout.decode('utf-8')
print(out)
I faced the same problem while trying to save the log of Azure Container Instance. None of the above solutions worked exactly as they are. After debugging the azure cli python code
(File : \Python39\Lib\site-packages\azure\cli\command_modules\container\custom.py , function : container_logs() ), i see that the container logs are just printed to the console but not returned. If you want to save the logs to any variable, add the return line (Not exactly a great solution but works for now). Hoping MS Azure updates their azure cli in upcoming versions.
def container_logs(cmd, resource_group_name, name, container_name=None, follow=False):
"""Tail a container instance log. """
container_client = cf_container(cmd.cli_ctx)
container_group_client = cf_container_groups(cmd.cli_ctx)
container_group = container_group_client.get(resource_group_name, name)
# If container name is not present, use the first container.
if container_name is None:
container_name = container_group.containers[0].name
if not follow:
log = container_client.list_logs(resource_group_name, name, container_name)
print(log.content)
# Return the log
return(log.content)
else:
_start_streaming(
terminate_condition=_is_container_terminated,
terminate_condition_args=(container_group_client, resource_group_name, name, container_name),
shupdown_grace_period=5,
stream_target=_stream_logs,
stream_args=(container_client, resource_group_name, name, container_name, container_group.restart_policy))
With this modification and along with the above solutions given (Using the get_default_cli), we can store the log of the Azure container instance in a variable.
from azure.cli.core import get_default_cli
def az_cli(args_str):
args = args_str.split()
cli = get_default_cli()
res = cli.invoke(args)
if cli.result.result:
jsondata = cli.result.result
elif cli.result.error:
print(cli.result.error)
I think you can use the subprocess and call the az cli to get the output instead using get_default_cli.
import subprocess
import json
process = subprocess.Popen(['az','network', 'ddos-protection', 'list'], stdout=subprocess.PIPE)
out, err = process.communicate()
d = json.loads(out)
print(d)
Well, we can execute the Azure CLI commands in Python as shown below.
Here, the res variable usually stores a value of integer type and therefore we might not be able to access the json response. To store the response in a variable, we need to do cli.result.result.
from azure.cli.core import get_default_cli
def az_cli(args_str):
args = args_str.split()
cli = get_default_cli()
res = cli.invoke(args)```
if cli.result.result:
jsondata = cli.result.result
elif cli.result.error:
print(cli.result.error)

AvroTypeException: When writing in python3

My avsc file is as follows:
{"type":"record",
"namespace":"testing.avro",
"name":"product",
"aliases":["items","services","plans","deliverables"],
"fields":
[
{"name":"id", "type":"string" ,"aliases":["productid","itemid","item","product"]},
{"name":"brand", "type":"string","doc":"The brand associated", "default":"-1"},
{"name":"category","type":{"type":"map","values":"string"},"doc":"the list of categoryId, categoryName associated, send Id as key, name as value" },
{"name":"keywords", "type":{"type":"array","items":"string"},"doc":"this helps in long run in long run analysis, send the search keywords used for product"},
{"name":"groupid", "type":["string","null"],"doc":"Use this to represent or flag value of group to which it belong, e.g. it may be variation of same product"},
{"name":"price", "type":"double","aliases":["cost","unitprice"]},
{"name":"unit", "type":"string", "default":"Each"},
{"name":"unittype", "type":"string","aliases":["UOM"], "default":"Each"},
{"name":"url", "type":["string","null"],"doc":"URL of the product to return for more details on product, this will be used for event analysis. Provide full url"},
{"name":"imageurl","type":["string","null"],"doc":"Image url to display for return values"},
{"name":"updatedtime", "type":"string"},
{"name":"currency","type":"string", "default":"INR"},
{"name":"image", "type":["bytes","null"] , "doc":"fallback in case we cant provide the image url, use this judiciously and limit size"},
{"name":"features","type":{"type":"map","values":"string"},"doc":"Pass your classification attributes as features in key-value pair"}
]}
I am able to parse this but when I try to write on this as follows, I keep getting issue. What am I missing ? This is in python3. I verified it is well formated json, too.
from avro import schema as sc
from avro import datafile as df
from avro import io as avio
import os
_prodschema = 'product.avsc'
_namespace = 'testing.avro'
dirname = os.path.dirname(__file__)
avroschemaname = os.path.join( os.path.dirname(__file__),_prodschema)
sch = {}
with open(avroschemaname,'r') as f:
sch= f.read().encode(encoding='utf-8')
f.close()
proschema = sc.Parse(sch)
print("Schema processed")
writer = df.DataFileWriter(open(os.path.join(dirname,"products.json"),'wb'),
avio.DatumWriter(),proschema)
print("Just about to append the json")
writer.append({ "id":"23232",
"brand":"Relaxo",
"category":[{"123":"shoe","122":"accessories"}],
"keywords":["relaxo","shoe"],
"groupid":"",
"price":"799.99",
"unit":"Each",
"unittype":"Each",
"url":"",
"imageurl":"",
"updatedtime": "03/23/2017",
"currency":"INR",
"image":"",
"features":[{"color":"black","size":"10","style":"contemperory"}]
})
writer.close()
What am I missing here ?

Resources