Convert .log record to json - python-3.x

I'm looking to take a log file in the following format and turn it into the json format of the snippet below.
2020:03:29-23:07:22 sslvpnpa ulogd[19880]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth0"
and turn it into the json format of the snippet below.
{"timestamp": "2020:03:29-23:07:22", "object": "sslvpnpa", "code": "ulogd[19880]", "id":"2001", severity="info", sys="SecureNet", sub="packetfilter" ...}
My start was to loop like this:
log_fields = log_row.split()
obj={}
for k in log_fields:
if k.find('=') > -1:
obj[k.split('=')[0]] = k.split('=')[1]
But then i realized some of the values have spaces and that there might be some list comprehension or generator expression that is more efficient or easier to read.
The object/json this generates will then be added to a field in a larger object.
Thanks in advance.

I think this will work out for you:
def split_string(s):
d = {}
ind = 0
split_s = s.split()
while ind < len(split_s):
current_s = split_s[ind]
if "=" in current_s:
key, value, ind = get_full_string(split_s, ind)
d[key] = value
else:
d[f"key{ind}"] = current_s
ind += 1
return d
def get_full_string(split_s, ind):
current_s = split_s[ind]
current_s_split = current_s.split("=")
key = current_s_split[0]
current_value = current_s_split[1]
if current_value[-1] == '"':
current_value = current_value.replace('"', '')
return key, current_value, ind
value_list = [current_value]
ind += 1
while ind < len(split_s):
current_value = split_s[ind]
value_list.append(current_value)
if current_value[-1] == '"':
break
ind += 1
value = " ".join(value_list)
value = value.replace('"', '')
return key, value, ind
Input:
s = '2020:03:29-23:07:22 sslvpnpa ulogd[19880]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth0"'
print(split_string(s))
Output:
{'key0': '2020:03:29-23:07:22', 'key1': 'sslvpnpa', 'key2': 'ulogd[19880]:', 'id': '2001', 'severity': 'info', 'sys': 'SecureNet', 'sub': 'packetfilter', 'name': 'Packet dropped', 'action': 'drop', 'fwrule': '60001', 'initf': 'eth0'}

Related

Is there a way in which I can iterate over a dictionary and use it in a while loop?

This is how my code looks in python. in the line where df = get_data_df(id,start_at) , instead of defining id one by one, I would like for my program to iterate over id and use it in the program below. please help me with how to iterate over the dictionary (id) and use it in the while loop.
id= {'O': 6232,
'S': 5819,
'S': 5759,
'R': 6056,
'M': 6145,}
whole_df = pd.DataFrame()
start_at = int(datetime(2020,8,1,6,0,0,0, pytz.UTC).timestamp() * 1e6)
while True:
df = get_data_df(id,start_at)
if df.shape[0] <= 1:
break
else:
whole_df = whole_df.append(df)
last_timestamp = whole_df.last_valid_index().timestamp()
start_at = int(last_timestamp * 1e6)
#print(whole_df)
for key in id:
print(key)
Is one way you could do it. Or you Could do it this way
i = 0
while True:
list(id)[i]
i += 1
By Just itterating a index and grabbing a value at each point from that Index
There a multiple ways of iterating over a python dictionary using a for loop:
for key in your_dict:
value = your_dict[key]
print(value)
for value in your_dict.values():
print(value)
for key, value in your_dict.items():
print(key, '=', value)
If you realy want a while loop:
keys = your_dict.keys()
i = 0
while i < len(keys):
value = your_dict[keys[i]]
print(key, '=', value)

Pytest unittest function doesn't return any value

Could you please help me understand issue with below unittest?
Here's my function for which i am writing unittest.
def running_config_from_database(device):
try:
data = databaseproxy(cluster='https://xxx.xxxx.xxx.net')
datadb = 'test'
query = f'''fGetrunningconfigData('{device}')
'''
raw_data = data.execute_query(datadb, query)
# pdb.set_trace()
for items in raw_data.fetchall():
config = items['Config'].split('\r\n')
for index, line in enumerate(config):
if '$PASS$' in line:
if line.startswith('set groups ospf_test'):
config_line = line.replace('$PASS$', get_auth('ospf'))
config[index] = config_line
elif line.startswith('set groups rip_test'):
config_line = line.replace('$PASS$', get_auth('rsvp'))
config[index] = config_line
config = config + overload_config
return True, '\r\n'.join(config)
except Exception as e:
return False, f'Failed to get the running config from database, error: {e}'
Here's my unittest for this function:
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
assert status and out1 == data
When I run this unittest to test the function, I get the below assertion error - looks like the function doesn't return any value.
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
> assert status and out1 == data
E AssertionError: assert (True and '' == 'set groups d...rload_config2'
E + set groups ospf_test secret xyz
E +
E + sample_overload_config1
E + sample_overload_config2)
tests/test_scripts.py:80: AssertionError
I edited my function to reduce the complexity but it still doesn't work. not sure why.
Main Function:
==============
def running_config_from_database(device):
try:
pdb.set_trace()
config = running_config_database(device)
for index, line in enumerate(config):
if '$PASS$' in line:
if line.startswith('set groups ospf_test'):
config_line = line.replace('$PASS$', get_cred('ospf'))
config[index] = config_line
config = config + overload_config
return True, '\r\n'.join(config)
except Exception as e:
return False, f'Failed to get the running config from Database, error: {e}'
UnitTest Result for above Function:
=========================================================================================================== FAILURES ============================================================================================================
________________________________________________________________________________________________ test_running_config_from_database _________________________________________________________________________________________________
mock_cred = <MagicMock name='get_cred' id='140210277622336'>, mock_overload = ['sample_overload_config1', 'sample_overload_config2'], mock_running_config = <MagicMock name='running_config_database' id='140210277652128'>
#patch("test.test1.scripts.running_config_database")
#patch("test.test1.scripts.overload_config")
#patch("test.test1.scripts.get_cred")
def test_running_config_from_database(mock_cred, mock_overload, mock_running_config):
mock_running_config.return_value = ['set groups ospf_test secret $PASS$', '']
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = test.test1.scripts.test_running_config_from_database('devA')
> assert status and out1 == data
E AssertionError: assert (True and '' == 'set groups d...rload_config2'
E + set groups ospf_test secret xyz
E +
E + sample_overload_config1
E + sample_overload_config2)
validation_tests/test_scripts.py:152: AssertionError
================================================================================================== 1 failed, 6 passed in 4.79s ==================================================================================================
The problem here is the assignment to mock_overload. If you want to adapt your mocked object you have to make sure that the object itself is changed. If you just assign another object (in this case, a list), your variable now points to the list object, while the original mock_overload is no longer referenced (and is not changed). So instead of writing:
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
you can for example write
mock_overload[:] = ['sample_overload_config1', 'sample_overload_config2']
For clarification, here is a simplified version of the original code:
>>> mock_overload = []
>>> id(mock_overload)
1477793866440
>>> mock_overload = [5, 6]
>>> id(mock_overload)
1477791015560 <- changed id, no longer pointing to the mock
Now the same with the fixed code:
>>> mock_overload = []
>>> id(mock_overload)
140732764763024
>>> mock_overload[:] = [5, 6]
>>> id(mock_overload)
140732764763024 <- unchanged id, still points to the mock
Note that mock_overload[:] = [5, 6] is basically a shortcut for:
mock_object.clear()
mock_object.extend([5, 6])
Answer is already provided in comment section by #MrBean Bremen. here's the UT after making changes suggested.
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
***mock_overload[:]*** = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
assert status and out1 == data

how to generate tuples with key pair values from list which has only keys

I have list
keywords = ['car','bike','motorcycle', 'airplane', 'boat', 'boat']
and I have a api that returns data
[('bike','http://bikes.com/bikes'),
('boat':'http://boat.com/boat')]
so I need a tuple/list which can be like
[('car','none'),
('bike','http://bikes.com/bikes'),
('motorcycle','none'),
('airplane','none'),
('boat','http://boat.com/boat'),
('boat','http://boat.com/boat')]
Basically I was able to create with dictionary how ever I wanted to retain duplicates.
keyword_list = keyword.split(",")
response = requests.request("GET", url, headers=headers, params=querystring)
field_json = json.loads(response.text)
data = field_json['response']
field_json = json.loads(response.text)
data = field_json['response']
if len(data)> 0:
list1 = {}
for i in data['docs']:
list1[i['keyword_s']]= i['url_s']
print(list(list1.items()))
keyword_dict = {k: None for k in keyword_list}
keyword_dict.update(list1)
return keyword_dict
Your code:
keywords = ['car','bike','motorcycle', 'airplane', 'boat', 'boat']
api = [('bike','http://bikes.com/bikes'), ('boat', 'http://boat.com/boat')]
Solution:
api_dict = dict(api)
out = [api_dict.get(k) for k in keywords]
api = [('bike','http://bikes.com/bikes'),
('boat','http://boat.com/boat')]
keywords = ['car','bike','motorcycle', 'airplane', 'boat', 'boat']
ans = []
missing = set(keywords)
for k, v in api:
if k in keywords:
ans.append((k, v))
missing.remove(k)
for miss in missing:
ans.append((miss, None))
print(ans)

Find the value of a specific key in a nested dictionary

I have a nested dictionary
customer_order = {order0
{'Orientation': what_orientation, 'Size': what_size, 'sizecost': size_cost,
'eyelets': how_many_eyelets, 'eyeletcost': eyelet_cost, 'material': what_material,
'materialcost': material_cost, 'ropes': need_ropes, 'ropecost': rope_cost,
'image': need_image, 'imagecost': 0, 'wording': what_wording, 'wordcost':word_cost}
order1{'Orientation': what_orientation, 'Size': what_size, 'sizecost': size_cost,
'eyelets': how_many_eyelets, 'eyeletcost': eyelet_cost, 'material': what_material,
'materialcost': material_cost, 'ropes': need_ropes, 'ropecost': rope_cost,
'image': need_image, 'imagecost': 0, 'wording': what_wording, 'wordcost':word_cost}}
what I need to do is fetch the value of the following keys
sizecost
eyeletcost
materialcost
ropecost
wordcost
how do I loop through to get these values and add them to a running total?
Thanks
I tried the code below but get the error
for key, value in cust_details:
ValueError: too many values to unpack (expected 2)
for cust_order, cust_details in customer_order.items():
print("\nOrder: ", cust_order)
for key, value in cust_details:
if (key == "sizecost"):
totalcosts += value
if (key == "eyeletcost"):
totalcosts += value
if (key == "materialcost"):
totalcosts += value
if (key == "ropecost"):
totalcosts += value
if (key == "wordcost"):
totalcosts += value
totalcost += value
You can use recurssion
def look(key,d,val = None):
if val is None:
val = []
if key in d.keys():
val.append(d.get(key))
else:
for i,j in d.items():
if isinstance(j,dict):
look(key,j,val)
return val
now try calling: look("sizecost", customer_order )

Python return all items from loop?

I cannot figure how to return all the items using this code:
#staticmethod
def create_dataset():
cols = Colleagues.get_all_colleagues()
cols_abs = ((col['Firstname'] + " " + col['Surname'], col['Absences']) for col in cols)
for col in cols_abs:
dataset = list()
sum_days = list()
for d in col[1]:
start_date = d[0]
end_date = d[1]
s = datetime.strptime(start_date, "%Y-%m-%d")
e = datetime.strptime(end_date, "%Y-%m-%d")
startdate = s.strftime("%b-%y")
days = numpy.busday_count(s, e) + 1
sum_days.append(days)
days_per_month = startdate, days
dataset.append(days_per_month)
dict_gen1 = dict(dataset)
comb_days = sum(sum_days)
dict_gen2 = {'Name': col[0], 'Spells': len(col[1]), 'Total(Days)': comb_days}
dict_comb = [{**dict_gen1, **dict_gen2}]
return dict_comb
It only returns the first "col". If I move the return statement outside of the loop it returns only the last item in my set of data. This is the output that is returned from col_abs:
('Jonny Briggs', [['2015-08-01', '2015-08-05'], ['2015-11-02', '2015-11-06'], ['2016-01-06', '2016-01-08'], ['2016-03-07', '2016-03-11']])
('Matt Monroe[['2015-12-08', '2015-12-11'], ['2016-05-23', '2016-05-26']])
('Marcia Jones', [['2016-02-02', '2016-02-04']])
('Pat Collins', [])
('Sofia Marowzich', [['2015-10-21', '2015-10-30'], ['2016-03-09', '2016-03-24']])
('Mickey Quinn', [['2016-06-06', '2016-06-08'], ['2016-01-18', '2016-01-21'], ['2016-07-21', '2016-07-22']])
('Jenifer Andersson', [])
('Jon Fletcher', [])
('James Gray', [['2016-04-01', '2016-04-06'], ['2016-07-04', '2016-07-07']])
('Matt Chambers', [['2016-05-02', '2016-05-04']])
Can anyone help me understand this better as I want to return a "dict_comb" for each entry in col_abs ?
Replace your return statement with a yield statement. This will allow your method to continue to loop while "yielding" or returning values after each iteration.

Resources