Data alignment in Python

Data alignment in Python - excel

I am new to Python. I am writhing a code to generate a excel file having the data sourced by calling API and correlate those to get desired result.
basically taking input from one database and search that in others and fetch related information.
The 4 databases have below data :
EEp
---------------------
{u'data': [{u'_id': u'5c30702c8ca9f51da8178df4',
u'encap': u'vlan-24',
u'ip': u'7.12.12.16',
u'mac': u'5B:P9:01:9E:42:08'}]}
PathEp
-----------
{u'data': [{u'_id': u'5c54a81a8ca9f51da84ae08e',
u'paths': u'paths-1507',
u'endpoint': u'eth1/10',
u'cep': u'5B:P9:01:9E:42:08',
u'tenant': u'ESX'}]}
ip4_address
-----------------------
{u'data': [{u'Allocation': u'Build_Reserved',
u'address': u'7.12.12.16',
u'name': u'fecitrix-1',
u'state': u'RESERVED'}]}
asset
---------------
{u'data': [{u'_id': u'57ccce8110dd54f02881fedc',
u'client': u'CES',
u'hostname': u'fecitrix-1'
u'os_team': u'Window'}]}
Logic:
If "mac" of EEp and "cep" of PathEp is same than take "encap","ip" ,"mac"
"paths" ,'endpoint","cep" and "tenant" (these values need to be exported
to excel)
Take ip of EEp and search in "ip4_address"
and get the "name" from ip4_address ( name need to be exported to excel).
If "name" of ip4_address is equal to "hostname" of database "asset" then take
"client" and "os_team" ( export that to excel)
I have written the script but not getting the desired result.
def get_host_details(self):
data = {
"find": {
"hostname": self.controller
},
"projection":{
"tenant": 1,
"paths": 1,
"endpoint":1
}
}
host_details = self.post("https://database.app.com/api/data/devices/PathEp/find", data)
#print host_details
hosts = []
for record in host_details:
if "mig" not in record["endpoint"]:
hosts.append(record)
return hosts
def get_ipaddress(self, controller):
host_record = {"tenant": "UNKNOWN",
"paths": "UNKNOWN",
"endpoint": "UNKNOWN",
"ip": "UNKNOWN",
"mac": "UNKNOWN",
"encap": "UNKNOWN"}
data = {
"find": {
"hostname": controller,
"ip": {
"$ne": "0.0.0.0"
}
},
"projection": {
"ip": 1,
"mac":1,
"encap":1,
}
}
endpoints = self.post("https://database.app.com/api/data/devices/EEp/find", data)
IPAM = self.get_dns()
print endpoints
host_details = self.get_host_details()
host_details_record = []
for record in endpoints:
for host in host_details:
if record["mac"] == host["cep"]:
host_record = {"tenant": host["tenant"],
"paths": host["paths"],
"endpoint": host["endpoint"],
"ip": record["ip"],
"mac": record["mac"],
"encap": record["encap"]}
host_details_record.append(host_record)
self.get_excel(host_details_record)
def get_dns(self, endpoints):
ip_dns_record = []
for each_endpoint in endpoints:
data = {
"find":
{
"address": {
"$eq": each_endpoint["ip"]
},
},
"projection":
{
"name": 1
}
}
dns_record = {"client":"UNKNOWN",
"os_team":"UNKNOWN",
ipam_record = self.post("https://database.app.com/api/data/"
"internal/ip4_address/find", data)
if ipam_record:
dns_record["ip_address"] = each_endpoint["ip"]
dns_record["hostname"] = ipam_record[0]["name"]
dns_record = self.get_remedy_details(ipam_record[0]["name"],
dns_record)
ip_dns_record.append(dns_record)
else:
dns_record["ip_address"] = each_endpoint["ip"]
dns_record["hostname"] = "UNKNOWN"
ip_dns_record.append(dns_record)
self.get_excel(ip_dns_record)
def get_remedy_details(self, hostname, dns_record):
data = {
"find":
{
"hostname": hostname.upper(),
}
}
remedy_data = self.post("https://database.app.com/api/data/internal/asset/find", data)
print(remedy_data)
#remedy_data = remedy_data["data"]
if remedy_data:
dns_record["client"] = remedy_data[0].get("client","UNKNOWN")
dns_record["os_team"] = remedy_data[0].get("os_team", "UNKNOWN")
else:
dns_record["client"] = "UNKNOWN"
dns_record["os_team"] = "UNKNOWN"
return dns_record
def get_excel(self, ip_dns_record):
filename = self.controller + ".xls"
excel_file = xlwt.Workbook()
sheet = excel_file.add_sheet('HOSTLIST')
sheet.write(0, 0, "IP Address")
sheet.write(0, 1, "HostName")
sheet.write(0, 2, "Client")
sheet.write(0, 3, "OS Team")
for count in xrange(1, len(ip_dns_record)+1):
sheet.write(count, 0,ip_dns_record[count - 1]["ip_address"])
sheet.write(count, 1,ip_dns_record[count - 1]["hostname"])
sheet.write(count, 2,ip_dns_record[count - 1]["client"])
sheet.write(count, 3,ip_dns_record[count - 1]["os_team"])
excel_file.save(filename)
if __name__ == "__main__":
controller = sys.argv[1]
OBJ = ACIHostList(controller)
print "SCRIPT COMPLETED"
No idea where I am going wrong and what needs to be done .

Your question leaves too much out. You should include all errors that you get. You should also comment your code as well so we can understand what you are trying to achieve in each step.
This is not an answer but something to try:
Rather than trying to wrap your head around a module like excel, wright your data to a simple CSV file. A CSV file can be opened up in excel and it formats correctly but is a lot easier to create.
import csv
data = [["a", "b"], ["c", "d"]]
with open("file.csv", "w+") as csv_file:
create_csv = csv.writer(csv_file)
create_csv .writerows(data)
simply grab all your data into a 2D list and using the above code dump it into a file so you can easily read it.
check the output of the file and see if you are getting the data you expect.
If you are not getting the desired data into this CSV file then there is an issue with your database queries.

Related

How to mock Athena query results values with Moto3 for a specific table?

I am using pytest and moto3 to test some code similar to this:
response = athena_client.start_query_execution(
QueryString='SELECT * FROM xyz',
QueryExecutionContext={'Database': myDb},
ResultConfiguration={'OutputLocation': someLocation},
WorkGroup=myWG
)
execution_id = response['QueryExecutionId']
if response['QueryExecution']['Status']['State'] == 'SUCCEEDED':
response = athena_client.get_query_results(
QueryExecutionId=execution_id
)
results = response['ResultSet']['Rows']
...etc
In my test I need that the values from results = response['ResultSet']['Rows'] are controlled by the test. I am using some code like this:
backend = athena_backends[DEFAULT_ACCOUNT_ID]["us-east-1"]
rows = [{"Data": [{"VarCharValue": "xyz"}]}, {"Data": [{"VarCharValue": ...}, etc]}]
column_info = [
{
"CatalogName": "string",
"SchemaName": "string",
"TableName": "xyz",
"Name": "string",
"Label": "string",
"Type": "string",
"Precision": 123,
"Scale": 123,
"Nullable": "NOT_NULL",
"CaseSensitive": True,
}
]
results = QueryResults(rows=rows, column_info=column_info)
backend.query_results[NEEDED_QUERY_EXECUTION_ID] = results
but that is not working as I guess NEEDED_QUERY_EXECUTION_ID is not known before from the test. How can I control it?
UPDATE
Based on suggestion I tried to use:
results = QueryResults(rows=rows, column_info=column_info)
d = defaultdict(lambda: results.to_dict())
backend.query_results = d
to force a return of values, but it seems not working as from the moto3's models.AthenaBackend.get_query_results, I have this code:
results = (
self.query_results[exec_id]
if exec_id in self.query_results
else QueryResults(rows=[], column_info=[])
)
return results
which will fail as the if condition won't be satifsfied.

Extending the solution of the defaultdict, you could create a custom dictionary that contains all execution_ids, and always returns the same object:
class QueryDict(dict):
def __contains__(self, item):
return True
def __getitem__(self, item):
rows = [{"Data": [{"VarCharValue": "xyz"}]}, {"Data": [{"VarCharValue": "..."}]}]
column_info = [
{
"CatalogName": "string",
"SchemaName": "string",
"TableName": "xyz",
"Name": "string",
"Label": "string",
"Type": "string",
"Precision": 123,
"Scale": 123,
"Nullable": "NOT_NULL",
"CaseSensitive": True,
}
]
return QueryResults(rows=rows, column_info=column_info)
backend = athena_backends[DEFAULT_ACCOUNT_ID]["us-east-1"]
backend.query_results = QueryDict()

An alternative solution to using custom dictionaries would to be seed Moto.
Seeding Moto ensures that it will always generate the same 'random' identifiers, which means you always know what the value of NEEDED_QUERY_EXECUTION_ID is going to be.
backend = athena_backends[DEFAULT_ACCOUNT_ID]["us-east-1"]
rows = [{"Data": [{"VarCharValue": "xyz"}]}, {"Data": [{"VarCharValue": "..."}]}]
column_info = [...]
results = QueryResults(rows=rows, column_info=column_info)
backend.query_results["bdd640fb-0667-4ad1-9c80-317fa3b1799d"] = results
import requests
requests.post("http://motoapi.amazonaws.com/moto-api/seed?a=42")
# Test - the execution id will always be the same because we just seeded Moto
execution_id = athena_client.start_query_execution(...)
Documentation on seeding Moto can be found here: http://docs.getmoto.org/en/latest/docs/configuration/recorder/index.html#deterministic-identifiers
(It only talks about seeding Moto in the context of recording/replaying requests, but the functionality can be used on it's own.)

Python nested json

Can any one have solution for this, i want there should be api data in this manner ??
I wanted api data in for similar state comes in one hood rather than seprate, different state data can be different obj,
data = [{
state_name:New_jersi, data:{
category:Phishing,
sub_cat_data:[{
name:SubCat1,
count:20
},
{
name:SubCat2,
count:30
}]
}
category: malware,
sub_cat_data:[{
name:SubCat1,
count:20
},
{
name:SubCat2,
count:30
}]
},
{
state_name:Washinton, data:{
category:Phishing,
data:[{
name:SubCat1,
count:20
},
{
name:SubCat2,
count:30
}]
}
}]
But may api response be:
{
"state": "South Carolina",
"state_count": 2,
"Website Compromise/Intrusion": {
"sub_category": {
"Insecure Direct Object Reference": 2,
"Memory Corruption": 2,
"SQLI": 1,
"Stack Overflow": 1,
"XSRF": 1,
"Heap Overflow": 1,
"Security Misconfiguration": 1
}
}
},
{
"state": "South Carolina",
"state_count": 1,
"Phishing": {
"sub_category": {
"Spear Phishing Attacks": 2,
"Fast Flux": 2,
"Rock fish": 2,
"Identify Theft/Social Engineering": 1,
"Phishing Redirector": 1,
"Pharming": 1,
"Exploitation of Hardware Vulnerability": 1
}
}
},
i wanted same state data be in same object buut in my case state data comes in seprate object because of data comes through category, rather that seprate.
My logic are below
cat_count = incnum.values('incident_category__cat_name','incident_category__cat_id').annotate(count=Count('incident_category__cat_id'))
subcat_count = incnum.values('incident_sub_category__sub_cat_name','incident_sub_category__cat_id','incident_sub_category__id').annotate(count=Count('incident_sub_category__cat_id'))
reporter_state_count1 = incnum.values('incident_category__cat_id','reporter__comp_individual_state','reporter__comp_individual_state__name').annotate(count=Count('incident_category__cat_id'))
for x, state_ in enumerate(reporter_state_count1):
for i, cat_ in enumerate(cat_count):
if state_['incident_category__cat_id'] == cat_['incident_category__cat_id']:
for i, cat_ in enumerate(cat_count):
if state_['incident_category__cat_id'] == cat_['incident_category__cat_id']:
arr16.append({'state':state_['reporter__comp_individual_state__name'], 'state_count':state_['count'], cat_['incident_category__cat_name']:{'sub_category':{}}})
for sub_ in subcat_count:
if cat_['incident_category__cat_id'] == sub_['incident_sub_category__cat_id']:
arr16[i][cat_['incident_category__cat_name']]['sub_category'].update({sub_['incident_sub_category__sub_cat_name']:sub_['count']})

cat_count = incnum.values('incident_category__cat_name', 'incident_category__cat_id').annotate(
count=Count('incident_category__cat_id'))
subcat_count = incnum.values('incident_sub_category__sub_cat_name', 'incident_sub_category__cat_id',
'incident_sub_category__id').annotate(count=Count('incident_sub_category__cat_id'))
reporter_state_count1 = incnum.values('incident_category__cat_id', 'reporter__comp_individual_state',
'reporter__comp_individual_state__name').annotate(
count=Count('incident_category__cat_id'))
arr16 = []
for state_ in reporter_state_count1:
state_data = {"state_name" : state_['reporter__comp_individual_state__name'], "data":[]}
for cat_ in cat_count:
if state_['incident_category__cat_id'] == cat_['incident_category__cat_id']:
sub_cat_data = [{sub_['incident_sub_category__sub_cat_name']: sub_['count']} for sub_ in subcat_count if cat_['incident_category__cat_id'] == sub_['incident_sub_category__cat_id']]
category_data = {"category": cat_['incident_category__cat_name'], "sub_cat_data": sub_cat_data}
state_data["data"].append(category_data)
arr16.append(state_data)
1 State might have multiple category, the way you are trying to make your api, it won't be able to show multiple category for a state. This is why i modify a little bit. you will find all the category in state object
Edit
Creating a dictionary which will store category_id as key and all the subcategory of that category as value
cat_to_subcat_list = {}
for cat_ in cat_count:
sub_cat_data = [{"name":sub_['incident_sub_category__sub_cat_name'],"count": sub_['count']} for sub_ in subcat_count if
cat_['incident_category__cat_id'] == sub_['incident_sub_category__cat_id']]
cat_to_subcat_list[cat_['incident_category__cat_id']] = {"category": cat_['incident_category__cat_name'], "sub_cat_data": sub_cat_data}
Createing a dictionary which will store state__name as key and a list of category object will save as value
state_data = {}
for state_ in reporter_state_count1:
if state_['reporter__comp_individual_state__name'] not in state_data:
'''This if statement is checking whether state_name exit or not.
if state_name does not exist in dictionary it'll create a empty list as it's value'''
state_data[state_['reporter__comp_individual_state__name']] = []
state_data[state_['reporter__comp_individual_state__name']].append(cat_to_subcat_list[state_['incident_category__cat_id']])
Re-formatting json as api needed
arr16 = [
{
"state_name": state_name,
"data": state_data
}for state_name, state_data in state_data.items()
]

search a list in api/ format url seach for api

I want to generate 6 random numbers for pokemon api ID.
Put in list.
Then use the 6 numbers in url search.
The url doesn't recognise the list.
I need to convert the list to numbers. I'm not sure how to format them into the url.
import random
import requests
pokemon_ID = []
# pokemon_ID_add = str(pokemon_ID)[1:-1]
# pokemon_ID2 = str(pokemon_ID)[1:-1]
for i in range(0,6):
number = random.randint(1 ,151)
while i in pokemon_ID:
number = random.randint(1, 151)
pokemon_ID.append(number)
url = 'https://pokeapi.co/api/v2/pokemon/{}/'.format(pokemon_ID)
response = requests.get(url)
pokemon = response.json()
print(pokemon)

You can use loop to iterate over random IDs and store the result to a list:
import json
import random
import requests
url = "https://pokeapi.co/api/v2/pokemon/{}/"
random_pokemon_ids = [random.randint(1, 151) for i in range(6)]
result = []
for id_ in random_pokemon_ids:
pokemon = requests.get(url.format(id_)).json()
result.append(pokemon)
# pretty print the result:
print(json.dumps(result, indent=4))
Prints:
[
{
"abilities": [
{
"ability": {
"name": "rock-head",
"url": "https://pokeapi.co/api/v2/ability/69/"
},
"is_hidden": false,
"slot": 1
},
{
"ability": {
"name": "lightning-rod",
"url": "https://pokeapi.co/api/v2/ability/31/"
},
"is_hidden": false,
"slot": 2
},
{
"ability": {
"name": "battle-armor",
"url": "https://pokeapi.co/api/v2/ability/4/"
},
"is_hidden": true,
"slot": 3
}
],
"base_experience": 64,
"forms": [
{
"name": "cubone",
"url": "https://pokeapi.co/api/v2/pokemon-form/104/"
}
],
...

Dynamically call function name in python

I'm trying to call function name dynamically if present in the list of dictionary, if present call the function else exit silently. How can I achieve this, tried below locals approach but didn't works
jobs = [{
"job": "IT",
"company": "google"
},
{
"job": "Sales",
"company": "walmart"
}
]
def IT(name):
print('Full %s' %name )
def Sales(name):
print('View %s' %name)
name = 'department'
Input_Job = 'Sales'
locals()[jobs['job'][Input_Job]](name)
expecting output is Input_Job = 'Sales'
View department
expecting output is Input_Job = 'IT'
Full department

How to find and get values from a json file which are in the same level?

I have a JSON file which looks like below. I am getting the parameters for name and product_version. Using both of them I need to get the relese_version and latest boolean value.
eg:- If var1 = section1 var2 = 2.6.0 then the release_version should be taken as 2.6.0.9 and latest as false in groovy.
file.json
{
"platforms": [
{
"name": "section1",
"versions": [
{
"product_version": "2.6.0",
"release_version": "2.6.0.9",
"latest": false
},
{
"product_version": "3.0.0",
"release_version": "3.0.0.3",
"latest": false
}
]
},
{
"name": "section2",
"versions": [
{
"product_version": "2.6.0",
"release_version": "2.6.0.9",
"latest": false
},
{
"product_version": "3.0.0",
"release_version": "3.0.0.3",
"latest": false
}
]
}
]
}
This is the code snippet I tried out.
filename = "file.json"
def jsonSlurper = new JsonSlurper()
parsed_json = jsonSlurper.parse(new File(filename))
release_tag = json.parsed_json.find {platforms.name == "section1".version[].product_version == "2.6.0".release_version}
println release_tag
But this didn't work. Please help me with this

You first have to find the platform by name (which could fail); next
find in the versions the product version. E.g.
def data = new groovy.json.JsonSlurper().parse("data.json" as File)
def name='section1'
def productVersion = '2.6.0'
// XXX
def result = data.platforms.find{ it.name == name }?.versions?.find{ it.product_version == productVersion }
assert result.release_version == '2.6.0.9'
assert result.latest == false
Note the use of the "elvis operator" after the first find to
short-circuit.
If you have to do many such lookups on the same data file, it might make
sense to shape the data into a better form for the lookup you are doing
(e.g. turn that into maps of maps for your two lookup keys)

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Data alignment in Python - excel

Related

How to mock Athena query results values with Moto3 for a specific table?

Python nested json

search a list in api/ format url seach for api

Dynamically call function name in python

How to find and get values from a json file which are in the same level?

Categories

Resources