Extracting the data from pandas dataframe - python-3.x

I have a pandas dataframe having data in each row like below
Joel Thompson / Tracy K. Smith</h2>
</div>
<div>
<p>New work (World Premiere–New York Philharmonic Commission)
How would I filter this so I can get results to work with like this:
name : Joel Thompson, Tracy K. Smith
information : New work (World Premiere–New York Philharmonic Commission)

You should try to use the split function for string variables. You can do this this way :
#Get your row in a string variable text
text = "Joel Thompson / Tracy K. Smith</h2></div><div><p>New work (World Premiere–New York Philharmonic Commission)"
#Extracting the name
Names_string = text.split("</h2>")[0]
Names_list = Names_string.split(" / ")
#Extracting information
Information = text.split("<p>")[-1]
result = {
"name" : Names_list,
'information' : Information
}
print(result)
It will display this :
{'name': ['Joel Thompson', 'Tracy K. Smith'], 'information': 'New work (World Premiere–New York Philharmonic Commission)'}
You should make it a function this way :
def getDictFromRow(text):
#Extracting the name
Names_string = text.split("</h2>")[0]
Names_list = Names_string.split(" / ")
#Extracting information
Information = text.split("<p>")[-1]
result = {
"name" : Names_list,
'information' : Information
}
return result
print(getDictFromRow("Joel Thompson / Tracy K. Smith</h2></div><div><p>New work (World Premiere–New York Philharmonic Commission)"))

Related

Regex Error and Improvement Driving Licence Data Extraction

I am trying to extract the Name, License No., Date Of Issue and Validity from an Image I processed using Pytesseract. I am quite a lot confused with regex but still went through few documentations and codes over the web.
I got till here:
import pytesseract
import cv2
import re
import cv2
from PIL import Image
import numpy as np
import datetime
from dateutil.relativedelta import relativedelta
def driver_license(filename):
"""
This function will handle the core OCR processing of images.
"""
i = cv2.imread(filename)
newdata=pytesseract.image_to_osd(i)
angle = re.search('(?<=Rotate: )\d+', newdata).group(0)
angle = int(angle)
i = Image.open(filename)
if angle != 0:
#with Image.open("ro2.jpg") as i:
rot_angle = 360 - angle
i = i.rotate(rot_angle, expand="True")
i.save(filename)
i = cv2.imread(filename)
# Convert to gray
i = cv2.cvtColor(i, cv2.COLOR_BGR2GRAY)
# Apply dilation and erosion to remove some noise
kernel = np.ones((1, 1), np.uint8)
i = cv2.dilate(i, kernel, iterations=1)
i = cv2.erode(i, kernel, iterations=1)
txt = pytesseract.image_to_string(i)
print(txt)
text = []
data = {
'firstName': None,
'lastName': None,
'age': None,
'documentNumber': None
}
c = 0
print(txt)
#Splitting lines
lines = txt.split('\n')
for lin in lines:
c = c + 1
s = lin.strip()
s = s.replace('\n','')
if s:
s = s.rstrip()
s = s.lstrip()
text.append(s)
try:
if re.match(r".*Name|.*name|.*NAME", s):
name = re.sub('[^a-zA-Z]+', ' ', s)
name = name.replace('Name', '')
name = name.replace('name', '')
name = name.replace('NAME', '')
name = name.replace(':', '')
name = name.rstrip()
name = name.lstrip()
nmlt = name.split(" ")
data['firstName'] = " ".join(nmlt[:len(nmlt)-1])
data['lastName'] = nmlt[-1]
if re.search(r"[a-zA-Z][a-zA-Z]-\d{13}", s):
data['documentNumber'] = re.search(r'[a-zA-Z][a-zA-Z]-\d{13}', s)
data['documentNumber'] = data['documentNumber'].group().replace('-', '')
if not data['firstName']:
name = lines[c]
name = re.sub('[^a-zA-Z]+', ' ', name)
name = name.rstrip()
name = name.lstrip()
nmlt = name.split(" ")
data['firstName'] = " ".join(nmlt[:len(nmlt)-1])
data['lastName'] = nmlt[-1]
if re.search(r"[a-zA-Z][a-zA-Z]\d{2} \d{11}", s):
data['documentNumber'] = re.search(r'[a-zA-Z][a-zA-Z]\d{2} \d{11}', s)
data['documentNumber'] = data['documentNumber'].group().replace(' ', '')
if not data['firstName']:
name = lines[c]
name = re.sub('[^a-zA-Z]+', ' ', name)
name = name.rstrip()
name = name.lstrip()
nmlt = name.split(" ")
data['firstName'] = " ".join(nmlt[:len(nmlt)-1])
data['lastName'] = nmlt[-1]
if re.match(r".*DOB|.*dob|.*Dob", s):
yob = re.sub('[^0-9]+', ' ', s)
yob = re.search(r'\d\d\d\d', yob)
data['age'] = datetime.datetime.now().year - int(yob.group())
except:
pass
print(data)
I need to extract the Validity and Issue Date as well. But not getting anywhere near it. Also, I have seen using regex shortens the code like a lot so is there any better optimal way for it?
My input data is a string somewhat like this:
Transport Department Government of NCT of Delhi
Licence to Drive Vehicles Throughout India
Licence No. : DL-0820100052000 (P) R
N : PARMINDER PAL SINGH GILL
: SHRI DARSHAN SINGH GILL
DOB: 10/05/1966 BG: U
Address :
104 SHARDA APPTT WEST ENCLAVE
PITAMPURA DELHI 110034
Auth to Drive Date of Issue
M.CYL. 24/02/2010
LMV-NT 24/02/2010
(Holder's Sig natu re)
Issue Date : 20/05/2016
Validity(NT) : 19/05/2021 : c
Validity(T) : NA Issuing Authority
InvCarrNo : NA NWZ-I, WAZIRPUR
Or like this:
in
Transport Department Government of NCT of Delhi
Licence to Drive Vehicles Throughout India
2
Licence No. : DL-0320170595326 () WN
Name : AZAZ AHAMADSIDDIQUIE
s/w/D : SALAHUDDIN ALI
____... DOB: 26/12/1992 BG: O+
\ \ Address:
—.~J ~—; ROO NO-25 AMK BOYS HOSTEL, J.
— NAGAR, DELHI 110025
Auth to Drive Date of Issue
M.CYL. 12/12/2017
4 wt 4
Iseue Date: 12/12/2017 a
falidity(NT) < 2037
Validity(T) +: NA /
Inv CarrNo : NA te sntian sana
Note: In the second example you wouldn't get the validity, will optimise the OCR for later. Any proper guide which can help me with regex which is a bit simpler would be good.
You can use this pattern: (?<=KEY\s*:\s*)\b[^\n]+ and replace KEY with one of the issues of the date, License No. and others.
Also for this pattern, you need to use regex library.
Code:
import regex
text1 = """
Transport Department Government of NCT of Delhi
Licence to Drive Vehicles Throughout India
Licence No. : DL-0820100052000 (P) R
N : PARMINDER PAL SINGH GILL
: SHRI DARSHAN SINGH GILL
DOB: 10/05/1966 BG: U
Address :
104 SHARDA APPTT WEST ENCLAVE
PITAMPURA DELHI 110034
Auth to Drive Date of Issue
M.CYL. 24/02/2010
LMV-NT 24/02/2010
(Holder's Sig natu re)
Issue Date : 20/05/2016
Validity(NT) : 19/05/2021 : c
Validity(T) : NA Issuing Authority
InvCarrNo : NA NWZ-I, WAZIRPUR
"""
for key in ('Issue Date', 'Licence No\.', 'N', 'Validity\(NT\)'):
print(regex.findall(fr"(?<={key}\s*:\s*)\b[^\n]+", text1, regex.IGNORECASE))
Output:
['20/05/2016']
['DL-0820100052000 (P) R']
['PARMINDER PAL SINGH GILL']
['19/05/2021 : c']
You can also use re with a single regex based on alternation that will capture your keys and values:
import re
text = "Transport Department Government of NCT of Delhi\nLicence to Drive Vehicles Throughout India\n\nLicence No. : DL-0820100052000 (P) R\nN : PARMINDER PAL SINGH GILL\n\n: SHRI DARSHAN SINGH GILL\n\nDOB: 10/05/1966 BG: U\nAddress :\n\n104 SHARDA APPTT WEST ENCLAVE\nPITAMPURA DELHI 110034\n\n\n\nAuth to Drive Date of Issue\nM.CYL. 24/02/2010\nLMV-NT 24/02/2010\n\n(Holder's Sig natu re)\n\nIssue Date : 20/05/2016\nValidity(NT) : 19/05/2021 : c\nValidity(T) : NA Issuing Authority\nInvCarrNo : NA NWZ-I, WAZIRPUR"
search_phrases = ['Issue Date', 'Licence No.', 'N', 'Validity(NT)']
reg = r"\b({})\s*:\W*(.+)".format( "|".join(sorted(map(re.escape, search_phrases), key=len, reverse=True)) )
print(re.findall(reg, text, re.IGNORECASE))
Output of this short online Python demo:
[('Licence No.', 'DL-0820100052000 (P) R'), ('N', 'PARMINDER PAL SINGH GILL'), ('Issue Date', '20/05/2016'), ('Validity(NT)', '19/05/2021 : c')]
The regex is
\b(Validity\(NT\)|Licence\ No\.|Issue\ Date|N)\s*:\W*(.+)
See its online demo.
Details:
map(re.escape, search_phrases) - escapes all special chars in your search phrases to be used as literal texts in a regex (else, . will match any chars, ? won't match a ? char, etc.)
sorted(..., key=len, reverse=True) - sorts the search phrases by length in descending order (to get longer matches first)
"|".join(...) - creates an alternation pattern, a|b|c|...
r"\b({})\s*:\W*(.+)".format( ... ) - creates the final regex.
Regex details
\b - a word boundary (NOTE: replace with (?m)^ if your matches occur at the beginning of a line)
(Validity\(NT\)|Licence\ No\.|Issue\ Date|N) - Group 1: one of the search phrases
\s* - zero or more whitespaces
: - a colon
\W* - zero or more non-word chars
(.+) - (capturing) Group 2: one or more chars other than line break chars, as many as possible.

Analysis from unique values from one column naming files from another

I have the following code that I am using to loop through unique values in my data set and it works great, but I would like to change the export name to a more appropriate unique value that I can use in a dashboard. The following is the code that I have where x in path is taken from teams unique names. However, for this part only, I'd like the name to be assigned from a list outside of the original dataframe.
team = df['RSA'].unique()
for x in team:
path2 = r'C:\Users\davidlopez\Desktop\regions\%s.csv' %x
r = HROs['RSA'] == x
Completed = HROs['Current Team Simple'].isin(['Completed'])
table = HROs[Completed & r]
top20 = table.groupby(['To Position Title']).RequestNumber.count().sort_values().nlargest(20)
top20.to_csv(path2, index=True, header=True)
Couple of ways I've tried to solve this:
1) Create a list and assign x in the path to the list instead of x.
mylist = ['HR_DASH_0034','HR_DASH_0035','HR_DASH_0036','HR_DASH_0037','HR_DASH_0038','HR_DASH_0039','HR_DASH_0040',
'HR_DASH_0041','HR_DASH_0042','HR_DASH_0043','HR_DASH_0044','HR_DASH_0045','empty']
for x in team:
path2 = r'C:\Users\davidlopez\Desktop\regions\%s.csv' %mylist
r = HROs['RSA'] == x
Completed = HROs['Current Team Simple'].isin(['Completed'])
table = HROs[Completed & r]
top20 = table.groupby(['To Position Title']).RequestNumber.count().sort_values().nlargest(20)
top20.to_csv(path2, index=True, header=True)
That doesn't work because it doesn't loop and it doesn't align the new values to the original dataframe values. Cross that off the list.
2) I thought maybe a loop inside the loop would do the trick:
team = df['RSA'].unique()
mylist = ['HR_DASH_0034','HR_DASH_0035','HR_DASH_0036','HR_DASH_0037','HR_DASH_0038','HR_DASH_0039','HR_DASH_0040',
'HR_DASH_0041','HR_DASH_0042','HR_DASH_0043','HR_DASH_0044','HR_DASH_0045','empty']
for x in team:
for name in mylist:
path2 = r'C:\Users\davidlopez\Desktop\regions\%s.csv' %name
r = HROs['RSA'] == x
Completed = HROs['Current Team Simple'].isin(['Completed'])
table = HROs[Completed & r]
top20 = table.groupby(['To Position Title']).RequestNumber.count().sort_values().nlargest(20)
top20.to_csv(path2, index=True, header=True)
That didn't work either. It just gave me the last value in mylist, but also it doesn't align the the unique values in team list appropriately.
3) Next I created a dataframe with the unique values from team and the new list.
team = df['RSA'].unique()
mylist = ['HR_DASH_0034','HR_DASH_0035','HR_DASH_0036','HR_DASH_0037','HR_DASH_0038','HR_DASH_0039','HR_DASH_0040',
'HR_DASH_0041','HR_DASH_0042','HR_DASH_0043','HR_DASH_0044','HR_DASH_0045','empty']
dict = {'RSA': team, 'DASH_ID': mylist}
newdf = pd.DataFrame(dict)
print (newdf)
RSA DASH_ID
0 Intermountain Region, R4 HR_DASH_0034
1 Pacific Southwest Region, R5 HR_DASH_0035
2 Alaska Region, R10 HR_DASH_0036
3 Pacific Northwest Region, R6 HR_DASH_0037
4 Northern Region, R1 HR_DASH_0038
5 Eastern Region, R9 HR_DASH_0039
6 Albuquerque Service Center(ASC) HR_DASH_0040
7 Rocky Mountain Region, R2 HR_DASH_0041
8 Research & Development(RES) HR_DASH_0042
9 Washington Office(WO) HR_DASH_0043
10 Southwestern Region, R3 HR_DASH_0044
11 Southern Region, R8 HR_DASH_0045
12 L2 Desc Not Available empty
However, I still don't know how to get the DASH_ID column element names to export in my path mentioned above.
So in the end, HR_DASH_0034, name should align to Intermountain Region, R4 when the file is sent out.
Any help appreciated!
Inside your first approach, just use:
mylist = [...] # your list definition
ml_iter = iter(mylist)
and inside the loop, replace mylist with:
path2 = r'C:\Users\davidlopez\Desktop\regions\%s.csv' %str(next(ml_iter))
More info: https://www.programiz.com/python-programming/methods/built-in/iter
Lemme know if this helps!
UPDATE: Second Solution
for x, m in zip(team, mylist):
path2 = r'C:\Users\davidlopez\Desktop\regions\%s.csv' %m
r = HROs['RSA'] == x
Completed = HROs['Current Team Simple'].isin(['Completed'])
table = HROs[Completed & r]
top20 = table.groupby(['To Position Title']).RequestNumber.count().sort_values().nlargest(20)
top20.to_csv(path2, index=True, header=True)
Let me know if this works!

Text in a class - href statement

How could i get all the categories mentioned on each listing page of the same website "https://www.sfma.org.sg/member/category". for example, when i choose alcoholic beverage category on the above mentioned page, the listings mentioned on that page has the category information like this :-
Catergory: Alcoholic Beverage, Bottled Beverage, Spirit / Liquor / Hard Liquor, Wine, Distributor, Exporter, Importer, Supplier
how can i extract the categories mentioned here with in same variable.
The code i have written for this is :-
category = soup_2.find_all('a', attrs ={'class' :'clink'})
links = [links['href'] for links in category]
cat_name = [cat_name.text.strip() for cat_name in links]
but it is producing the below output which are all the links on the page & not the text with in the href:-
['http://www.sfma.org.sg/about/singapore-food-manufacturers-association',
'http://www.sfma.org.sg/about/council-members',
'http://www.sfma.org.sg/about/history-and-milestones',
'http://www.sfma.org.sg/membership/',
'http://www.sfma.org.sg/member/',
'http://www.sfma.org.sg/member/alphabet/',
'http://www.sfma.org.sg/member/category/',
'http://www.sfma.org.sg/resources/sme-portal',
'http://www.sfma.org.sg/resources/setting-up-food-establishments-in-singapore',
'http://www.sfma.org.sg/resources/import-export-requirements-and-procedures',
'http://www.sfma.org.sg/resources/labelling-guidelines',
'http://www.sfma.org.sg/resources/wsq-continuing-education-modular-programmes',
'http://www.sfma.org.sg/resources/holistic-industry-productivity-scorecard',
'http://www.sfma.org.sg/resources/p-max',
'http://www.sfma.org.sg/event/',
.....]
What i need is the below data for all the listings of all the categories on the base URL which is "https://www.sfma.org.sg/member/category/"
['Ang Leong Huat Pte Ltd',
'16 Tagore Lane
Singapore (787476)',
'Tel: +65 6749 9988',
'Fax: +65 6749 4321',
'Email: sales#alh.com.sg',
'Website: http://www.alh.com.sg/',
'Catergory: Alcoholic Beverage, Bottled Beverage, Spirit / Liquor / Hard Liquor, Wine, Distributor, Exporter, Importer, Supplier'
Please excuse if the question seems to be novice, i am just very new to python,
Thanks !!!
The following targets the two javascript objects housing mapping info about companies names, categories and the shown tags e.g. bakery product. More more detailed info on the use of regex and splitting item['category'] - see my SO answer here.
It handles unquoted keys with hjson library.
You end up with a dict whose keys are the company names (I use permalink version of name, over name, as this should definitely be unique), and whose values are a tuple with 2 items. The first item is the company page link; the second is a list of the given tags e.g. bakery product, alcoholic beverage). The logic is there for you to re-organise as desired.
import requests
from bs4 import BeautifulSoup as bs
import hjson
base = 'https://www.sfma.org.sg/member/info/'
p = re.compile(r'var tmObject = (.*?);')
p1 = re.compile(r'var ddObject = (.*?);')
r = requests.get('https://www.sfma.org.sg/member/category/manufacturer')
data = hjson.loads(p.findall(r.text)[0])
lookup_data = hjson.loads(p1.findall(r.text)[0])
name_dict = {item['id']:item['name'] for item in lookup_data['category']}
companies = {}
for item in data['tmember']:
companies[item['permalink']] = (base + item['permalink'], [name_dict[i] for i in item['category'].split(',')])
print(companies)
Updating for your additional request at end (Address info etc):
I then loop companies dict visiting each company url in tuple item 1 of value for current dict key; extract the required info into a dict, which I add the category info to, then update the current key:value with the dictionary just created.
import requests
from bs4 import BeautifulSoup as bs
import hjson
base = 'https://www.sfma.org.sg/member/info/'
p = re.compile(r'var tmObject = (.*?);')
p1 = re.compile(r'var ddObject = (.*?);')
r = requests.get('https://www.sfma.org.sg/member/category/manufacturer')
data = hjson.loads(p.findall(r.text)[0])
lookup_data = hjson.loads(p1.findall(r.text)[0])
name_dict = {item['id']:item['name'] for item in lookup_data['category']}
companies = {}
for item in data['tmember']:
companies[item['permalink']] = (base + item['permalink'], [name_dict[i] for i in item['category'].split(',')])
with requests.Session() as s:
for k,v in companies.items():
r = s.get(v[0])
soup = bs(r.content, 'lxml')
tel = soup.select_one('.w3-text-sfma ~ p:contains(Tel)')
fax = soup.select_one('.w3-text-sfma ~ p:contains(Fax)')
email = soup.select_one('.w3-text-sfma ~ p:contains(Email)')
website = soup.select_one('.w3-text-sfma ~ p:contains(Website)')
if tel is None:
tel = 'N/A'
else:
tel = tel.text.replace('Tel: ','')
if fax is None:
fax = 'N/A'
else:
fax = fax.text.replace('Fax: ','')
if email is None:
email = 'N/A'
else:
email = email.text.replace('Email: ','')
if website is None:
website = 'N/A'
else:
website = website.text.replace('Website: ','')
info = {
# 'Address' : ' '.join([i.text for i in soup.select('.w3-text-sfma ~ p:not(p:nth-child(n+4) ~ p)')])
'Address' : ' '.join([i.text for i in soup.select('.w3-text-sfma ~ p:nth-child(-n+4)')])
, 'Tel' : tel
, 'Fax': fax
, 'Email': email
,'Website' : website
, 'Categories': v[1]
}
companies[k] = info
Example entry in companies dict:

Finding multiple accounts under the same name for Python bank system

In my bank system I have a set of customer accounts but for one name Adam Smith he has two accounts:
def load_bank_data(self):
# the customers in the bank system
account_no = 1234
customer_1 = CustomerAccount("Adam", "Smith", 14, "Wilcot Street", "Bath", "B5 5RT", account_no, "Current", 2500.00)
self.accounts_list.append(customer_1)
account_no += 5678
customer_2 = CustomerAccount("David", "White", 60, "Holburn Viaduct", "London", "EC1A 2FD", account_no, "Savings", 3200.00)
self.accounts_list.append(customer_2)
account_no += 3456
customer_3 = CustomerAccount("Alice", "Churchil", 55, "Cardigan Street", "Birmingham", "B4 7BD", account_no, "Current", 18000.00)
self.accounts_list.append(customer_3)
account_no += 6789
customer_4 = CustomerAccount("Ali", "Abdallah", 44, "Churchill Way West", "Basingstoke", "RG21 6YR", account_no, "Savings", 40.00)
self.accounts_list.append(customer_4)
account_no += 1987
customer_5 = CustomerAccount("Adam", "Smith", 44, "Churchill Way West", "Basingstoke", "RG21 6YR", account_no, "Savings", 5000.00)
self.accounts_list.append(customer_5)
I created a function so when many customer accounts under the same first and last name have been found, it should add all those bank account balances together and print out the final total. (The input is where I type the customer to find multiple accounts for:
def sum_of_all_money(self):
try:
find_customer = input("Enter the surname of the customer to find total sum of money for: ")
for find_customer in self.accounts_list:
find_customer = find_customer.get_balance() + find_customer.get_balance()
print(find_customer)
except SyntaxError as e:
print(e)
This is only just finding one Adam Smith account at the bottom as customer 5 but it doesn't detect the other Adam Smith account as customer 1 and it just adds customer 5 twice giving me an output of 1000.00 which isn't right, what am I doing wrong?
Your code has some flaws, currently it will only loop over your list and always overwrite find_customer with the current balance of the customer * 2.
You need to filter for the right name which was input, try it like this:
try:
find_customer = input("Enter the surname of the customer to find total sum of money for: ")
find_customer_balance = 0
for customer in self.accounts_list:
if customer.get_surname() == find_customer:
find_customer_balance += customer.get_balance()
print(find_customer)
print(find_customer_balance)
except SyntaxError as e:
print(e)

how to create list of dictionary in this code?

I have some names and scores as follows
input = {
'Maths': dict(Mohsen=19, Sadegh=18, Hafez=15),
'Physics': dict(Sadegh=16, Hafez=17, Mohsen=17),
'Chemistry': dict(Hafez=13),
'Literature': dict(Sadegh=14),
'Biology': dict(Mohsen=16, Sadegh=10),
}
if a person don't have any lesson its score consider zero also get avrege of scores's person and sort final list by averge and i want to get an output like this.
answer = [
dict(Name='Sadegh', Literature=14, Chemistry=0, Maths=18, Physics=16, Biology=10, Average=11.6),
dict(Name='Mohsen', Maths=19, Physics=17, Chemistry=0, Biology=16, Literature=0, Average=10.4),
dict(Name='Hafez', Chemistry=13, Biology=0, Physics=17, Literature=0, Maths=15, Average=9),
]
how to do it?
Essentially, you have a dictionary, where the information is arranged based on subjects, where for each subject, you have student marks. You want to collection all information related to each student in separate dictionaries.
One of the approaches which can try, is as below:
Try converting the data which you have into student specific data and then you can calculate the Average of the Marks of all subjects for that student. There is a sample code below.
Please do note that, this is just a sample and you should be trying
out a solution by yourself. There are many alternate ways of doing it and you should explore them by yourself.
The below code works with Python 2.7
from __future__ import division
def convert_subject_data_to_student_data(subject_dict):
student_dict = {}
for k, v in subject_dict.items():
for k1, v1 in v.items():
if k1 not in student_dict:
student_dict[k1] = {k:v1}
else:
student_dict[k1][k] = v1
student_list = []
for k,v in student_dict.items():
st_dict = {}
st_dict['Name'] = k
st_dict['Average'] = sum(v.itervalues()) / len(v.keys())
st_dict.update(v)
student_list.append(st_dict)
print student_list
if __name__ == "__main__":
subject_dict = {
'Maths': dict(Mohsen=19, Sadegh=18, Hafez=15),
'Physics': dict(Sadegh=16, Hafez=17, Mohsen=17),
'Chemistry': dict(Hafez=13),
'Literature': dict(Sadegh=14),
'Biology': dict(Mohsen=16, Sadegh=10),
}
convert_subject_data_to_student_data(subject_dict)
sample_input = {
'Maths': dict(Mohsen=19, Sadegh=18, Hafez=15),
'Physics': dict(Sadegh=16, Hafez=17, Mohsen=17),
'Chemistry': dict(Hafez=13),
'Literature': dict(Sadegh=14),
'Biology': dict(Mohsen=16, Sadegh=10),
}
def foo(lessons):
result = {}
for lesson in lessons:
for user in lessons[lesson]:#dictionary
if result.get(user):
#print(result.get(user))
result.get(user).setdefault(lesson, lessons[lesson].get(user,0))
else:
result.setdefault(user, dict(name=user))
result.get(user).setdefault(lesson,lessons[lesson].get(user,0))
#return list(result.values())
return result.values()
#if name == '__main__':
print(foo(sample_input))

Resources