I cannot figure how to return all the items using this code:
#staticmethod
def create_dataset():
cols = Colleagues.get_all_colleagues()
cols_abs = ((col['Firstname'] + " " + col['Surname'], col['Absences']) for col in cols)
for col in cols_abs:
dataset = list()
sum_days = list()
for d in col[1]:
start_date = d[0]
end_date = d[1]
s = datetime.strptime(start_date, "%Y-%m-%d")
e = datetime.strptime(end_date, "%Y-%m-%d")
startdate = s.strftime("%b-%y")
days = numpy.busday_count(s, e) + 1
sum_days.append(days)
days_per_month = startdate, days
dataset.append(days_per_month)
dict_gen1 = dict(dataset)
comb_days = sum(sum_days)
dict_gen2 = {'Name': col[0], 'Spells': len(col[1]), 'Total(Days)': comb_days}
dict_comb = [{**dict_gen1, **dict_gen2}]
return dict_comb
It only returns the first "col". If I move the return statement outside of the loop it returns only the last item in my set of data. This is the output that is returned from col_abs:
('Jonny Briggs', [['2015-08-01', '2015-08-05'], ['2015-11-02', '2015-11-06'], ['2016-01-06', '2016-01-08'], ['2016-03-07', '2016-03-11']])
('Matt Monroe[['2015-12-08', '2015-12-11'], ['2016-05-23', '2016-05-26']])
('Marcia Jones', [['2016-02-02', '2016-02-04']])
('Pat Collins', [])
('Sofia Marowzich', [['2015-10-21', '2015-10-30'], ['2016-03-09', '2016-03-24']])
('Mickey Quinn', [['2016-06-06', '2016-06-08'], ['2016-01-18', '2016-01-21'], ['2016-07-21', '2016-07-22']])
('Jenifer Andersson', [])
('Jon Fletcher', [])
('James Gray', [['2016-04-01', '2016-04-06'], ['2016-07-04', '2016-07-07']])
('Matt Chambers', [['2016-05-02', '2016-05-04']])
Can anyone help me understand this better as I want to return a "dict_comb" for each entry in col_abs ?
Replace your return statement with a yield statement. This will allow your method to continue to loop while "yielding" or returning values after each iteration.
Related
In the below code, I need to create two variables, namely flag1 and flag2. They are created based on multiple conditions. I used np.select approach as below. However, I wonder what would be the other ways to do this? In my real work situation, there would be more conditions to create the flag. Any advices or suggestions would be great.
import numpy as np
import pandas as pd
start_date = '2020-04-01'
end_date = '2020-05-01'
d1 = {'customer type':['walk in', 'online app', 'phone app', 'referral'], 'office visit':
['location1','location1','location1','location1'],'date1':['2020-04-17','2020-05-17','2020-03-
01','2020-05-01'],'date2':['2020-05-18','2020-04-18','2020-04-03','2020-05-19']}
df1=pd.DataFrame(data=d1)
con1 = [ (df1['date1'] >= start_date ) & (df1['date1'] < end_date )]
result1 = ['yes']
df1['flag1'] = np.select(con1, result1)
con2 = [ (df1['date2'] >= start_date ) & (df1['date2'] < end_date )]
result2 = ['yes']
df1['flag2'] = np.select(con2, result2)
You could use a dictionary and dynamically update the keys to the variable names and add the corresponding value of the variables.
For example:
import numpy as np
import pandas as pd
start_date = '2020-04-01'
end_date = '2020-05-01'
flags = dict()
flag_string = 'flag'
# This creates the strings flag1 and flag2 automatically
for i in range(1, 3):
# concatenate the flag_string with the index of the loop
flags[flag_string + str(i)] = flag_string + str(i)
print(flags)
d1 = {'customer type': ['walk in', 'online app', 'phone app', 'referral'],
'office visit': ['location1','location1','location1','location1'],'date1':['2020-04-17','2020-05-17','2020-03- \
01','2020-05-01'],'date2':['2020-05-18','2020-04-18','2020-04-03','2020-05-19']}
df1=pd.DataFrame(data=d1)
con1 = [ (df1['date1'] >= start_date ) & (df1['date1'] < end_date )]
result1 = ['yes']
df1[flags['flag1']] = np.select(con1, result1)
con2 = [ (df1['date2'] >= start_date ) & (df1['date2'] < end_date )]
result2 = ['yes']
df1[flags['flag2']] = np.select(con2, result2)
This is how you can substitute dictionary values as variables. I've also included a for loop that builds your flag dictionary.
I would like to access and edit individual dataframes after creating them by a for loop.
#Let's get those files!!!
bdc_files = {'nwhl': 'https://raw.githubusercontent.com/bigdatacup/Big-Data-Cup-2021/main/hackathon_nwhl.csv',
'olympics': 'https://raw.githubusercontent.com/bigdatacup/Big-Data-Cup-2021/main/hackathon_womens.csv',
'erie': 'https://raw.githubusercontent.com/bigdatacup/Big-Data-Cup-2021/main/hackathon_scouting.csv'}
df_list = []
for (a,b) in bdc_files.items():
#Grab csv file
c = pd.read_csv(b)
c.name = a
#a = a.append(c)
#Manipuling the Data as we please
c['Game_ID'] = c['game_date'] + c['Home Team'] + c['Away Team']
c['Detail 3'] = c['Detail 3'].replace('t', 'with traffic')
c['Detail 3'] = c['Detail 3'].replace('f', 'without traffic')
c['Detail 4'] = c['Detail 4'].replace('t', 'one-timer')
c['Detail 4'] = c['Detail 4'].replace('f', 'not one-timer')
c['Details'] = c['Detail 1'].astype(str).add(' ').add(c['Detail 2'].astype(str)).add(' ').add(c['Detail 3'].astype(str)).add(' ').add(c['Detail 4'].astype(str))
c['is_goal'] = 0
c['is_shot'] = 0
c.loc[c['Event'] == 'Shot', 'is_shot'] = 1
c.loc[c['Event'] == 'Goal', 'is_goal'] = 1
c['Goal Differential'] = c['Home Team Goals'] - c['Away Team Goals']
c['Clock'] = pd.to_datetime(c['Clock'], format = '%M:%S')
c['Seconds Remaining'] = ((c['Clock'].dt.minute)*60) + (c['Clock'].dt.second)
df_list.append(a)
#Printing Datasheet info
Title = "The sample of games from the {}".format(c.name)
print(c.name)
print(Title + " dataset is:", len(list(c['Game_ID'].value_counts())))
print(c['Event'].value_counts())
print(c.columns.values)
print(c.loc[c['Event'] == 'Shot', 'Details'].value_counts())
print(c.head())
print(c.info())
print(df_list)
print(nwhl)
However, if I want to print the nwhl database, I get the following output...
Empty DataFrame
Columns: []
Index: []
And if I were to use an append, I would get this error
AttributeError: 'str' object has no attribute 'append'
Long story short, based off of the code I have, how can I be able to print and perform other tasks with the dataframes outside of the for loop? Any assistance is truly appreciated.
Use a dictionary of dataframes, df_dict:
Add
df_dict = {}
...
for (a,b) in bdc_files.items():
#Grab csv file
c = pd.read_csv(b)
c.name = a
# Add this line to build dictionary
df_dict[a] = c
And, at the end print
df_dict['nwhl']
I'm looking to take a log file in the following format and turn it into the json format of the snippet below.
2020:03:29-23:07:22 sslvpnpa ulogd[19880]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth0"
and turn it into the json format of the snippet below.
{"timestamp": "2020:03:29-23:07:22", "object": "sslvpnpa", "code": "ulogd[19880]", "id":"2001", severity="info", sys="SecureNet", sub="packetfilter" ...}
My start was to loop like this:
log_fields = log_row.split()
obj={}
for k in log_fields:
if k.find('=') > -1:
obj[k.split('=')[0]] = k.split('=')[1]
But then i realized some of the values have spaces and that there might be some list comprehension or generator expression that is more efficient or easier to read.
The object/json this generates will then be added to a field in a larger object.
Thanks in advance.
I think this will work out for you:
def split_string(s):
d = {}
ind = 0
split_s = s.split()
while ind < len(split_s):
current_s = split_s[ind]
if "=" in current_s:
key, value, ind = get_full_string(split_s, ind)
d[key] = value
else:
d[f"key{ind}"] = current_s
ind += 1
return d
def get_full_string(split_s, ind):
current_s = split_s[ind]
current_s_split = current_s.split("=")
key = current_s_split[0]
current_value = current_s_split[1]
if current_value[-1] == '"':
current_value = current_value.replace('"', '')
return key, current_value, ind
value_list = [current_value]
ind += 1
while ind < len(split_s):
current_value = split_s[ind]
value_list.append(current_value)
if current_value[-1] == '"':
break
ind += 1
value = " ".join(value_list)
value = value.replace('"', '')
return key, value, ind
Input:
s = '2020:03:29-23:07:22 sslvpnpa ulogd[19880]: id="2001" severity="info" sys="SecureNet" sub="packetfilter" name="Packet dropped" action="drop" fwrule="60001" initf="eth0"'
print(split_string(s))
Output:
{'key0': '2020:03:29-23:07:22', 'key1': 'sslvpnpa', 'key2': 'ulogd[19880]:', 'id': '2001', 'severity': 'info', 'sys': 'SecureNet', 'sub': 'packetfilter', 'name': 'Packet dropped', 'action': 'drop', 'fwrule': '60001', 'initf': 'eth0'}
def getNewWatchedCountGraph(requests):
data = Video.getNewWatchedCountGraph(requests)
data = json.loads(data)
# print(data)
x = []
m = []
bg = {}
res = {}
monthnumbers = []
currentMonth = datetime.datetime.now().month
for item in data:
seconds = int(item['count'])
x.append(seconds)
mydate = datetime.datetime.strptime(item['_id'], "%Y-%m")
monthnumbers.append(mydate.month)
m.append(mydate.strftime("%B"))
startMonths = monthnumbers[0] #line 116
endMonths = currentMonth+1
data = []
mon = []
for months in range(startMonths,endMonths):
if months not in monthnumbers:
mon.append(calendar.month_name[months])
data.append(0)
else:
mon.append(calendar.month_name[months])
monthIndex = monthnumbers.index(months)
data.append(x[monthIndex])
res['series_name'] = "Views"
res['series'] = list(data)
res['xdata'] = list(mon)
restrn_response = dumps(res)
return HttpResponse(restrn_response)
I have made this function to show the graph of total number of views.
It is working fine in my local server. But showing List index out of range in main server at line no 116. Where am i doing wrong?
This happens because monthnumbers is empty. Given that it’s being filled while iterating over data, I think the loop doesn’t even start because data is empty.
I have a data like
sql = "select value, type from table"
cur.execute(sql)
row_headers = [x[0] for x in cur.description] #this will extract row headers
rv = cur.fetchall()
json_result = []
for result in rv:
json_result.append(dict(zip(row_headers, result)))
finalresult = json.dumps(json_result)
#[{'value':30,'type':'SS'},{'value':40,'type':'KK'},{'value':80,'type':'SK'}]
attrList = list(map(lambda x: x['value'], finalresult))
return attrList
I want OP :
[30,40,80]
But I am getting ERROR :
attrList = list(map(lambda x: x['value'], finalresult))
TypeError: string indices must be integers
Where am I doing mistake?