I'm trying to convert data from a post to Json formed.
But I still haven't had success.
I tried to do in this format
Unfortunately I couldn't think of anything.
Could anyone help?
Post
{'csrfmiddlewaretoken': 'AdbaFrsoWeZTnT07m3VjncmYnYHztaQ214qh8AYH2cI40veXfe0dmfSwkI1o2ma1',
'det[0][CNPJ]': '8768678678678',
'det[0][UF]': 'SP',
'det[0][dhEmi]': '2021-07-13T08:26:30-03:00',
'det[0][nNF]': '8267',
'det[0][xNome]': 'INDÚSTRIA',
'prod[0][0][CFOP]': '6102',
'prod[0][0][NCM]': '84384000',
'prod[0][0][UF]': 'SP',
'prod[0][0][aliquotaInterna]': '18',
'prod[0][0][counter]': '1',
'prod[0][0][mva]': '34',
'prod[0][0][tributacaoEstadual]': '7',
'prod[0][0][vICMSST]': '0',
'prod[0][0][vICMS]': '25.74',
'prod[0][0][vIPI]': '0',
'prod[0][0][vProd]': '367.68',
'prod[0][0][xProd]': 'FUSO',
'prod[0][1][CFOP]': '6102',
'prod[0][1][NCM]': '84384000',
'prod[0][1][UF]': 'SP',
'prod[0][1][aliquotaInterna]': '18',
'prod[0][1][counter]': '2',
'prod[0][1][mva]': '23',
'prod[0][1][tributacaoEstadual]': '7',
'prod[0][1][vICMSST]': '0',
'prod[0][1][vICMS]': '15.96',
'prod[0][1][vIPI]': '0',
'prod[0][1][vProd]': '228.07',
'prod[0][1][xProd]': 'PORCA',
'xNome': 'COMERCIAL'}
View
if post:
import re
pattDet = re.compile('^([a-zA-Z_]\w+.)\[([0-9_\-][\w\-]*)\]\[([a-zA-Z_\-][\w\-]*)\]$')
pattProd = re.compile('^([a-zA-Z_]\w+.)\[([0-9_\-][\w\-]*)\]\[([0-9_\-][\w\-]*)\]\[([a-zA-Z_\-][\w\-]*)\]$')
pprint.pprint(post)
det = []
prodtem = []
count = 0
for post_name, value in post.items():
try:
det_count = int(pattDet.match(post_name).group(2))
if pattDet.match(post_name).group(1) == 'det':
det[pattDet.match(post_name).group(3)] = value
except:
pass
try:
if pattProd.match(post_name).group(1) == 'prod':
if count == int(pattProd.match(post_name).group(3)):
prodtem.insert(count, {pattProd.match(post_name).group(4): value})
else:
count += 1
except Exception as e:
print(e)
pass
result.append({
'det': det,
'prod': prodtem
})
many month ago i have create this for django rest framwork, a parser mutli dimensional, source is here
i have adapted the parser for you
import re
class ParserMultiDimensional:
_reg_split = re.compile(r"(\[.*?\])")
REG_NAME = r"\s*[a-zA-Z_]\w*\s*"
_reg_name = re.compile(r"^" + REG_NAME + r"$")
REG_INDEX_LIST = r"\s*(\d+)?\s*"
_reg_index_list = re.compile(r"^\[(" + REG_INDEX_LIST + r")\]$") # can be number or nothing
_reg_index_object = re.compile(r"^\[(" + REG_NAME + r")\]$") # need to start with char + alpaha
_reg_list = re.compile(r"^\[" + REG_INDEX_LIST + r"]$")
_reg_object = re.compile(r"^\[" + REG_NAME + r"]$")
def __init__(self, data):
self.data = data
self._valid = None
def conv_list_index(self, key):
ret = self._reg_index_list.search(key).groups()[0]
if not ret:
return -1
return int(ret)
def conv_object_index(self, key):
return self._reg_index_object.search(key).groups()[0]
def conv_index(self, index):
if self.is_list(index):
return self.conv_list_index(index)
elif self.is_object(index):
return self.conv_object_index(index)
else:
return index
def is_list(self, key):
if not key or self._reg_list.match(key):
return True
return False
def is_object(self, key):
if self._reg_object.match(key):
return True
return False
def is_name(self, key):
if self._reg_name.match(key):
return True
return False
def split_key(self, key):
# remove space
key = key.replace(" ", "")
results = self._reg_split.split(key)
# remove empty string
return list(filter(None, results))
def valid_key(self, key):
results = self.split_key(key)
# not result or check first element
if not results or not self.is_name(results[0]):
return []
for r in results[1:]:
if not self.is_list(r) and not self.is_object(r):
return []
return results
def set_type(self, dtc, key, value):
index = self.conv_index(key)
if self.is_list(key):
if not len(dtc) or index == len(dtc):
dtc.append(value)
key = len(dtc) - 1
elif index not in dtc:
# TODO dict same as list
dtc[index] = value
return index
def construct(self, data):
dictionary = {}
for key, value in data.items():
keys = self.valid_key(key)
if not keys:
raise Exception(f"invalid key {keys}")
tmp = dictionary
for curr, nxt in zip(keys, keys[1:]):
set_type = [] if self.is_list(nxt) else {}
tmp = tmp[self.set_type(tmp, curr, set_type)]
self.set_type(tmp, keys[-1], data.get(key))
self.__validate_data = dictionary
def is_valid(self):
self._valid = False
try:
self.construct(self.data)
self._valid = True
except Exception as err:
self.errors = err
return self._valid
#property
def validate_data(self):
if self._valid is None:
raise ValueError("You need to be call is_valid() before access validate_data")
if self._valid is False:
raise ValueError("You can't get validate data")
return self.__validate_data
to use it
parser = ParserMultiDimensional(data_query) # add your post data
if parser.is_valid():
data = parser.validate_data
# do your things
else:
print(parser.errors)
the result with your data is
{
"csrfmiddlewaretoken": "AdbaFrsoWeZTnT07m3VjncmYnYHztaQ214qh8AYH2cI40veXfe0dmfSwkI1o2ma1",
"det": [
{
"CNPJ": "8768678678678",
"UF": "SP",
"dhEmi": "2021-07-13T08:26:30-03:00",
"nNF": "8267",
"xNome": "INDÚSTRIA"
}
],
"prod": [
[
{
"CFOP": "6102",
"NCM": "84384000",
"UF": "SP",
"aliquotaInterna": "18",
"counter": "1",
"mva": "34",
"tributacaoEstadual": "7",
"vICMSST": "0",
"vICMS": "25.74",
"vIPI": "0",
"vProd": "367.68",
"xProd": "FUSO"
},
{
"CFOP": "6102",
"NCM": "84384000",
"UF": "SP",
"aliquotaInterna": "18",
"counter": "2",
"mva": "23",
"tributacaoEstadual": "7",
"vICMSST": "0",
"vICMS": "15.96",
"vIPI": "0",
"vProd": "228.07",
"xProd": "PORCA"
}
]
],
"xNome": "COMERCIAL"
}
have fun with it ! ;)
Related
I am trying to import student data from an Excel workbook. I have to select column_name of the class StudentMasterResource dynamically which is present in the file. I got all column name in constants module has one dictionary which name column_name. When I do it for the first time, it works, then it fails
constants.py
column_name = dict()
resource.py
from common_account import constants
from import_export import widgets, fields, resources
def getClassName(key):
if key in constants.column_name:
return constants.column_name[key]
return key
class StudentMasterResource(resources.ModelResource):
organisation_id = fields.Field(
column_name=getClassName('organisation_id'),
attribute='organisation_id',
widget=widgets.ForeignKeyWidget(OrganisationMaster, 'organisation_name'),
saves_null_values=True
)
name = fields.Field(
column_name=getClassName('Name'),
attribute='name',
saves_null_values=True,
widget=widgets.CharWidget()
)
date_of_birth = fields.Field(
column_name=getClassName('date'),
attribute='date_of_birth',
saves_null_values=True,
widget=widgets.DateWidget()
)
views.py
from common_account import constants
from tablib import Dataset
#api_view(['POST'])
#permission_classes([IsAuthenticated])
def student_import(request):
if request.method == 'POST':
context_data = dict()
data_set = Dataset()
file = request.FILES['myfile']
extension = file.name.split(".")[-1].lower()
column_data = request.data
is_import = column_name['is_import']
constants.valid_data.clear()
constants.invalid_data.clear()
if extension == 'csv':
data = data_set.load(file.read().decode('utf-8'), format=extension)
else:
data = data_set.load(file.read(), format=extension)
constants.column_name = {
'date' : column_data.get('birth'),
'name' : column_data.get('name'),
}
if is_import == 'No':
result = student_resource.import_data(data_set, organisation_id = request.user.organisation_id,
offering_id = offering_id,all_invalid_data = False, dry_run=True, raise_errors=True)
context_data['valid_data'] = constants.valid_data
context_data['invalid_data'] = constants.invalid_data
context_data[constants.RESPONSE_RESULT] = {"Total records":student_resource.total_cnt,
"skip records":len(constants.invalid_data),
"Records imported": len(constants.valid_data),
}
return JsonResponse(context_data)
elif is_import == 'Yes':
result = student_resource.import_data(data_set, organisation_id = request.user.organisation_id,
offering_id = offering_id,all_invalid_data = False, dry_run=False, raise_errors=False)
context_data[constants.RESPONSE_ERROR] = False
context_data[constants.RESPONSE_MESSAGE] = 'Data Imported !!!'
context_data[constants.RESPONSE_RESULT] = {"Total records":student_resource.total_cnt,
"skip records":len(constants.invalid_data),
"Records imported": len(constants.valid_data),
}
return JsonResponse(context_data)
I'm looking to take a log file in the following format and turn it into the json format of the snippet below.
2020:03:29-23:07:22 sslvpnpa ulogd[19880]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth0"
and turn it into the json format of the snippet below.
{"timestamp": "2020:03:29-23:07:22", "object": "sslvpnpa", "code": "ulogd[19880]", "id":"2001", severity="info", sys="SecureNet", sub="packetfilter" ...}
My start was to loop like this:
log_fields = log_row.split()
obj={}
for k in log_fields:
if k.find('=') > -1:
obj[k.split('=')[0]] = k.split('=')[1]
But then i realized some of the values have spaces and that there might be some list comprehension or generator expression that is more efficient or easier to read.
The object/json this generates will then be added to a field in a larger object.
Thanks in advance.
I think this will work out for you:
def split_string(s):
d = {}
ind = 0
split_s = s.split()
while ind < len(split_s):
current_s = split_s[ind]
if "=" in current_s:
key, value, ind = get_full_string(split_s, ind)
d[key] = value
else:
d[f"key{ind}"] = current_s
ind += 1
return d
def get_full_string(split_s, ind):
current_s = split_s[ind]
current_s_split = current_s.split("=")
key = current_s_split[0]
current_value = current_s_split[1]
if current_value[-1] == '"':
current_value = current_value.replace('"', '')
return key, current_value, ind
value_list = [current_value]
ind += 1
while ind < len(split_s):
current_value = split_s[ind]
value_list.append(current_value)
if current_value[-1] == '"':
break
ind += 1
value = " ".join(value_list)
value = value.replace('"', '')
return key, value, ind
Input:
s = '2020:03:29-23:07:22 sslvpnpa ulogd[19880]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth0"'
print(split_string(s))
Output:
{'key0': '2020:03:29-23:07:22', 'key1': 'sslvpnpa', 'key2': 'ulogd[19880]:', 'id': '2001', 'severity': 'info', 'sys': 'SecureNet', 'sub': 'packetfilter', 'name': 'Packet dropped', 'action': 'drop', 'fwrule': '60001', 'initf': 'eth0'}
I am reading log files in my python code which contains some nested json data. I have a nested for loop containing 4 for-loops from which values of certain keys are extracted and appended to a dataframe.
The nested for-loop is taking too much time and I saw from other answers that multiprocessing is the way to go for nested for-loops but did not find an example for json data.
What is the best approach for this ? Below is my code to extract data from log files and into dataframes. recommendation_list is a list of json objects.
for recommendation in recommendation_list:
if recommendation['type'] == "httpRequest":
session_id = recommendation['query'].split('sessionId=')[1].split('&')[0]
category_id = recommendation['query'].split('categoryId=')[1].split('&')[0]
if recommendation['type'] == "httpResponse":
recommendation_count = recommendation_count + 1
user_id = recommendation['userId']
time_stamp = recommendation['ts']
event_date = time_stamp.split("T")[0]
time = time_stamp.split("T")[-1]
try:
product_list = json.loads(recommendation['body'])['products']
except:
product_list = []
if len(product_list) > 0:
for product in product_list:
product_id = product["id"]
if 'recommendationMeta' in product:
data_frame = data_frame.append({
"transaction_id": last_id,
"user_id": user_id,
"session_id": session_id,
"category_id": category_id,
"product_id": product_id,
"date": event_date,
"time": time[0:12],
"event": "recommendation",
"ab_bucket": "B",
"recommendation_count": recommendation_count,
}, ignore_index=True)
for learning_unit in product['recommendationMeta']:
lu_name = learning_unit['lu']
lu_value = learning_unit['value']
recommendation_mode = learning_unit['recommendationMode']
prod_def1 = products[(products["product_id"] == product_id) &
(products["lu_value"].str.lower() == lu_value)]
if len(prod_def1) != 0:
product_list = prod_def1.to_dict('records')
for product_id in product_list:
category = categories[(categories["category_def_id"] == product_id["category_def_id"]) &
(categories["lu_name"].str.lower() == lu_name)]
if len(category) != 0:
product_def_id = product_id['product_def_id']
lu_df = lu_df.append({
"lu_data_id": lu_id,
"product_def_id": product_def_id,
"transaction_id": last_id,
"rec_mode": recommendation_mode,
}, ignore_index=True)
lu_id = lu_id+1
last_id = last_id + 1
I figure that the innermost for-loop executes most number of times and decided to use multiprocessing for it.
I replaced
for product_id in product_list:
category = categories[(categories["category_def_id"] == product_id["category_def_id"]) &
(categories["lu_name"].str.lower() == lu_name)]
if len(category) != 0:
product_def_id = product_id['product_def_id']
lu_df = lu_df.append({
"lu_data_id": lu_id,
"product_def_id": product_def_id,
"transaction_id": last_id,
"rec_mode": recommendation_mode,
}, ignore_index=True)
lu_id = lu_id+1
with this...
for product_id in product_list:
pool = Pool() # Create a multiprocessing Pool
data = pool.starmap(create_lu_data, [last_id, categories, recommendation_mode,
lu_name, lu_df, lu_id, product_id])
lu_id = lu_id + 1
p.close()
print(data)
where create_lu_data is
def create_lu_data(last_id, categories, recommendation_mode, lu_name, lu_df, lu_id, product_id):
category = categories[(categories["category_def_id"] == product_id["category_def_id"]) &
(categories["lu_name"].str.lower() == lu_name)]
if len(category) != 0:
product_def_id = product_id['product_def_id']
lu_df = lu_df.append({
"lu_data_id": lu_id,
"product_def_id": product_def_id,
"transaction_id": last_id,
"rec_mode": recommendation_mode,
}, ignore_index=True)
return lu_df
I didn't get any errors, but the output dataframe has several times the expected number of rows.
I´m new to programming and I chose python (3.7) as the first working language. I have been working for 5 days on code that consumes an API and returns a nested JSON. My intention is to insert this data into a table in a PostgreSQL database. I can already insert data from other data that this API provides, but this particular JSON structure is giving me problems because my code stops working when it doesn't find a certain key inside the python object (when it doesn't exist. I need it to return as a null value on a table).
I'll set you up to make it clearer. In short, and with a piece of dummy data:
#Note that some keys are sometimes present and sometimes not.
myapidata = [
{
"MaxRpm": 2300,
"StartPosition": {
"Longitude": -12.3456,
"Latitude": -78.9456
},
"Engine": 10623,
"Fuel": 20.133
},
{
"MaxRpm": 0.0,
"StartPosition": {
"Longitude": -74.1258,
"Latitude": -96.3258
},
"EndPosition": {
"Longitude": -78.9456,
"Latitude": -85.2369
},
"Engine": 0,
"Fuel": 150.35
},
{
"MaxRpm": 800,
"StartPosition": {
"Longitude": 85.4125,
"Latitude": -45.62145
},
"EndPosition": {
"Longitude": 85.2145,
"Latitude": 74.6789
},
"Engine": 104,
"Fuel": 0.021,
"Pulse": 7
}
]
#Python Code:
import json
import psycopg2
api_json_list = json.loads(myapidata.content)
#Tried to add Null to keys not present (works with non Nested JSON):
allkeys = frozenset().union(*api_json_list)
for a in api_json_list:
for b in allkeys:
if b not in a:
a[b] = None
#Insert data on PostgreSQL:
conn = psycopg2.connect ("host = my_host dbname = my_db user = my_user password = my_pass")
cur = conn.cursor()
cur.execute("TRUNCATE TABLE mytable")
data_extract = []
def get_data():
for data in api_json_list:
dictionary = data
maxrpm = dictionary['MaxRpm']
start_lng = dictionary['StartPosition']['Longitude']
start_lat = dictionary['StartPosition']['Latitude']
end_lng = dictionary['EndPosition']['Longitude']
end_lat = dictionary['EndPosition']['Latitude']
engine = dictionary['Engine']
fuel = dictionary['Fuel']
pulse = dictionary['Pulse']
data_extract.append([maxrpm,start_lng,start_lat,end_lng,end_lat,engine,fuel,pulse])
get_data() #Get a TypeError
def post_gre():
for item in data_extract:
my_data = tuple(item)
cur.execute('INSERT INTO mytable VALUES (%s,%s,%s,%s,%s,%s,%s,%s)', my_data)
post_gre()
conn.commit()
conn.close()
The result I hope to achieve in my database is something like the table below:
Table with null items
Thank you for any help!
EDIT : Answer with the correct code
import json
import psycopg2
api_json_list = json.loads(myapidata.content)
#Insert data on PostgreSQL:
conn = psycopg2.connect ("host = my_host dbname = my_db user = my_user password = my_pass")
cur = conn.cursor()
cur.execute("TRUNCATE TABLE mytable")
data_extract = []
def get_data():
for data in api_json_list:
dictionary = data
maxrpm = dictionary.get('MaxRpm')
if 'StartPosition' in dictionary:
start_lng = dictionary['StartPosition'].get('Longitude')
start_lat = dictionary['StartPosition'].get('Latitude')
else:
start_lng = None
start_lat = None
if 'EndPosition' in dictionary:
end_lng = dictionary['EndPosition'].get('Longitude')
end_lat = dictionary['EndPosition'].get('Latitude')
else:
end_lng = None
end_lat = None
engine = dictionary.get('Engine')
fuel = dictionary.get('Fuel')
pulse = dictionary.get('Pulse')
data_extract.append([maxrpm,start_lng,start_lat,end_lng,end_lat,engine,fuel,pulse])
get_data()
def post_gre():
for item in data_extract:
my_data = tuple(item)
cur.execute('INSERT INTO mytable VALUES (%s,%s,%s,%s,%s,%s,%s,%s)', my_data)
post_gre()
conn.commit()
conn.close()
You could do something like this:
maxrpm = dictionary.get('MaxRpm')
if 'StartPosition' in dictionary:
start_lng = dictionary['StartPosition'].get('Longitude')
start_lat = dictionary['StartPosition'].get('Latitude')
else:
start_lng = None
start_lat = None
if 'EndPosition' in dictionary:
end_lng = dictionary['EndPosition'].get('Longitude')
end_lat = dictionary['EndPosition'].get('Latitude')
else:
end_lng = None
end_lat = None
engine = dictionary.get('Engine')
fuel = dictionary.get('Fuel')
pulse = dictionary.get('Pulse')
Using the get method on a dictionary will return the value if it exists or None if it doesn't.
I have a Map
[email:[hus#gmail.com, vin#gmail.com], jobTitle:[SE, SD], isLaptopRequired:[on, on], phone:[9908899876, 7765666543], name:[hus, Vin]]
for which i need to have a another Map like
[hus:[hus#gmail.com,SE,99087665343],vin:[vin#gmail.com,SE,7765666543]]
How can do it in Groovy?
You could do it like:
def map = [email:['hus#gmail.com', 'vin#gmail.com'], jobTitle:['SE', 'SD'], isLaptopRequired:['on', 'on'], phone:['9908899876', '7765666543'], name:['hus', 'Vin']]
def result = [:]
map.name.eachWithIndex { name, idx ->
result << [ (name): map.values()*.getAt( idx ) - name ]
}
assert result == [hus:['hus#gmail.com', 'SE', 'on', '9908899876'], Vin:['vin#gmail.com', 'SD', 'on', '7765666543']]
Or, you could also do:
def result = [map.name,map.findAll { it.key != 'name' }.values().toList().transpose()].transpose().collectEntries()
But this is just less code at the expense of both readability and resource usage ;-)
The most visual solution i have:
def map = [email:['hus#gmail.com', 'vin#gmail.com'], jobTitle:['SE', 'SD'], isLaptopRequired:['on', 'on'], phone:['9908899876', '7765666543'], name:['hus', 'Vin']]
def names = map.name
def emails = map.email
def jobTitles = map.jobTitle
def isLaptopRequireds = map.isLaptopRequired //sorry for the variable name
def phones = map.phone
def result = [:]
for(i in 0..names.size()-1) {
result << [(names[i]): [emails[i], jobTitles[i], isLaptopRequireds[i], phones[i]]]
}
assert result == [hus:['hus#gmail.com', 'SE', 'on', '9908899876'], Vin:['vin#gmail.com', 'SD', 'on', '7765666543']]
}