usagegroup = usage.groupby('date')
dfcID = pd.DataFrame()
for i,j in usagegroup:
uniqdate = str(j['date'].unique()[0])
j.rename(columns = {'hit':uniqdate},inplace = True)
del j['date']
if len(dfcID)==0:
dfcID = j.copy()
else:
dfcID = pd.merge(dfcID,j,on='customerid',how='left').reset_index(drop=True)
Tried the above solution.
Generated output: 01-Dec-22 01-Jan-23 02-Dec-22 02-Jan-23 so on.
Required output: 01-Dec-22 02-Dec-22 ------ 31-Dec-22 01-Jan-23 02-Jan-23 ----- 31-Jan-23
Related
Hello I am not able to figure out why i am not getting output. Output shows empty dataframe
#func to detect and extract tenses
`def tenseExtract(doc1): # doc1 is cleaned str
doc = nlp(doc1)
sents_list = [sent for sent in doc.sents] # sentence-tokenize the text
hypoth_ind = [] # store sentence index
hypoth_mod = [] # store verbform modifier
hypoth_inf = [] # store infinitive verb
hypoth_sents = [] # store text
for i1 in range(len(sents_list)):
sent0 = str(sents_list[i1]) # pick each sentence individually
if len(sent0) < 5: # drop sent fragments, newlines etc
break
sent_ann = nlp(sent0)
try:
morph0 = ([(token.lemma_, token.morph)for token in sent_ann])
morph1 = [y for (x, y) in morph0]
morph2 = [x for (x, y) in morph0]
if bool(re.search(r"mod", str(morph1))) == True:
out_list = allMatches(
morph1
)
mod_ind = out_list[0]
inf_ind = out_list[1]
test0 = [morph2[x] for x in mod_ind]
test1 = [morph2[x] for x in inf_ind]
mod1 = str(test0).strip("[]")
inf1 = str(test1).strip("[]")
hypoth_ind.concat(i1)
hypoth_mod.concat(mod1)
hypoth_inf.concat(inf1)
hypoth_sents.concat(sent0)
except:
break
sent_df = pd.DataFrame(
{
"index": hypoth_ind,
"modifier": hypoth_mod,
"infinitive_verb": hypoth_inf,
"sentence": hypoth_sents,
}
)
return sent_df
#testing the func on one doc
sent_df = tenseExtract(doc1)
sent_df.iloc[:, 0:3]`
Output : index modifier infinitive_verb
I am using spacy libraries.
The output should be like this
index modifier infinitive_verb
0 3 'will' 'meet'
1 7 'may' 'think'
2 10 'can' 'read'
3 26 'will' 'be'
4 30 'will' 'find'
`
I would like to access and edit individual dataframes after creating them by a for loop.
#Let's get those files!!!
bdc_files = {'nwhl': 'https://raw.githubusercontent.com/bigdatacup/Big-Data-Cup-2021/main/hackathon_nwhl.csv',
'olympics': 'https://raw.githubusercontent.com/bigdatacup/Big-Data-Cup-2021/main/hackathon_womens.csv',
'erie': 'https://raw.githubusercontent.com/bigdatacup/Big-Data-Cup-2021/main/hackathon_scouting.csv'}
df_list = []
for (a,b) in bdc_files.items():
#Grab csv file
c = pd.read_csv(b)
c.name = a
#a = a.append(c)
#Manipuling the Data as we please
c['Game_ID'] = c['game_date'] + c['Home Team'] + c['Away Team']
c['Detail 3'] = c['Detail 3'].replace('t', 'with traffic')
c['Detail 3'] = c['Detail 3'].replace('f', 'without traffic')
c['Detail 4'] = c['Detail 4'].replace('t', 'one-timer')
c['Detail 4'] = c['Detail 4'].replace('f', 'not one-timer')
c['Details'] = c['Detail 1'].astype(str).add(' ').add(c['Detail 2'].astype(str)).add(' ').add(c['Detail 3'].astype(str)).add(' ').add(c['Detail 4'].astype(str))
c['is_goal'] = 0
c['is_shot'] = 0
c.loc[c['Event'] == 'Shot', 'is_shot'] = 1
c.loc[c['Event'] == 'Goal', 'is_goal'] = 1
c['Goal Differential'] = c['Home Team Goals'] - c['Away Team Goals']
c['Clock'] = pd.to_datetime(c['Clock'], format = '%M:%S')
c['Seconds Remaining'] = ((c['Clock'].dt.minute)*60) + (c['Clock'].dt.second)
df_list.append(a)
#Printing Datasheet info
Title = "The sample of games from the {}".format(c.name)
print(c.name)
print(Title + " dataset is:", len(list(c['Game_ID'].value_counts())))
print(c['Event'].value_counts())
print(c.columns.values)
print(c.loc[c['Event'] == 'Shot', 'Details'].value_counts())
print(c.head())
print(c.info())
print(df_list)
print(nwhl)
However, if I want to print the nwhl database, I get the following output...
Empty DataFrame
Columns: []
Index: []
And if I were to use an append, I would get this error
AttributeError: 'str' object has no attribute 'append'
Long story short, based off of the code I have, how can I be able to print and perform other tasks with the dataframes outside of the for loop? Any assistance is truly appreciated.
Use a dictionary of dataframes, df_dict:
Add
df_dict = {}
...
for (a,b) in bdc_files.items():
#Grab csv file
c = pd.read_csv(b)
c.name = a
# Add this line to build dictionary
df_dict[a] = c
And, at the end print
df_dict['nwhl']
def getNewWatchedCountGraph(requests):
data = Video.getNewWatchedCountGraph(requests)
data = json.loads(data)
# print(data)
x = []
m = []
bg = {}
res = {}
monthnumbers = []
currentMonth = datetime.datetime.now().month
for item in data:
seconds = int(item['count'])
x.append(seconds)
mydate = datetime.datetime.strptime(item['_id'], "%Y-%m")
monthnumbers.append(mydate.month)
m.append(mydate.strftime("%B"))
startMonths = monthnumbers[0] #line 116
endMonths = currentMonth+1
data = []
mon = []
for months in range(startMonths,endMonths):
if months not in monthnumbers:
mon.append(calendar.month_name[months])
data.append(0)
else:
mon.append(calendar.month_name[months])
monthIndex = monthnumbers.index(months)
data.append(x[monthIndex])
res['series_name'] = "Views"
res['series'] = list(data)
res['xdata'] = list(mon)
restrn_response = dumps(res)
return HttpResponse(restrn_response)
I have made this function to show the graph of total number of views.
It is working fine in my local server. But showing List index out of range in main server at line no 116. Where am i doing wrong?
This happens because monthnumbers is empty. Given that it’s being filled while iterating over data, I think the loop doesn’t even start because data is empty.
I've gotten all the data I wanted from scraping this metacritc url (see below) however, I can't seem to put a value in for when I don't find the associated value for list (missing values)
I would like to have it so all the lists are even (so I can right to .csv)
Here is the code I have so far:
from requests import get
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import pandas as pd
#Define year
year_number = 2018
# Define the URL
i = range(0, 1)
names = []
metascores = []
userscores = []
userscoresNew = []
release_dates = []
release_datesNew = []
publishers = []
ratings = []
genres = []
genresNew = []
for element in i:
url = "http://www.metacritic.com/browse/games/score/metascore/year/pc/filtered?view=detailed&sort=desc&year_selected=" + format(year_number)
print(url)
year_number -= 1
# not sure about this but it works (I was getting blocked by something and this the way I found around it)
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
#this grabs the all the text from the page
html_soup = BeautifulSoup(webpage, 'html5lib')
#this is for selecting all the games in from 1 to 100 (the list of them)
game_names = html_soup.find_all("div", class_="main_stats")
game_metas = html_soup.find_all("a", class_="basic_stat product_score")
game_users = html_soup.find_all("li", class_='stat product_avguserscore')
game_releases = html_soup.find_all("ul", class_='more_stats')
game_publishers = html_soup.find_all("li", class_='stat publisher')
game_ratings = html_soup.find_all("li", class_='stat maturity_rating')
game_genres = html_soup.find_all("li", class_='stat genre')
#Extract data from each game
for games in game_names:
name = games.find()
names.append(name.text.strip())
for games2 in game_metas:
metascore = games2.find()
metascores.append(metascore.text.strip())
for games3 in game_releases:
release_date = games3.find()
release_dates.append(release_date.text.strip())
for games4 in game_users:
userscore = games4.find('span', class_="data textscore textscore_favorable") or games4.find('span', class_="data textscore textscore_mixed")
if userscore:
userscores.append(userscore.text)
for games5 in game_publishers:
publisher = games5.find("span", class_ = "data")
if publisher:
publishers.append(publisher.text)
for games6 in game_ratings:
rating = games6.find("span", class_ = "data")
for games7 in game_genres:
genre = games7.find("span", class_ = "data")
if genre:
genres.append(genre.text)
for x in release_dates:
temp = str(x)
temp2 = temp.replace("Release Date:\n ", "")
release_datesNew.append(temp2)
for z in genres:
temp3 = str(z)
temp4 = temp3.strip()
temp5 = temp4.replace(" ", "")
genresNew.append(temp5)
df = pd.DataFrame({'Games:': names})
not sure how I would work that in to this code
From what I understand it's take all the data it can find but if there is a blank it doesn't know about it
can someone adivse the best solution for this situation
any help would be great
Thanks
Just add else's for the existing conditions...
if userscore:
userscores.append(userscore.text)
else:
userscores.append('na')
I cannot figure how to return all the items using this code:
#staticmethod
def create_dataset():
cols = Colleagues.get_all_colleagues()
cols_abs = ((col['Firstname'] + " " + col['Surname'], col['Absences']) for col in cols)
for col in cols_abs:
dataset = list()
sum_days = list()
for d in col[1]:
start_date = d[0]
end_date = d[1]
s = datetime.strptime(start_date, "%Y-%m-%d")
e = datetime.strptime(end_date, "%Y-%m-%d")
startdate = s.strftime("%b-%y")
days = numpy.busday_count(s, e) + 1
sum_days.append(days)
days_per_month = startdate, days
dataset.append(days_per_month)
dict_gen1 = dict(dataset)
comb_days = sum(sum_days)
dict_gen2 = {'Name': col[0], 'Spells': len(col[1]), 'Total(Days)': comb_days}
dict_comb = [{**dict_gen1, **dict_gen2}]
return dict_comb
It only returns the first "col". If I move the return statement outside of the loop it returns only the last item in my set of data. This is the output that is returned from col_abs:
('Jonny Briggs', [['2015-08-01', '2015-08-05'], ['2015-11-02', '2015-11-06'], ['2016-01-06', '2016-01-08'], ['2016-03-07', '2016-03-11']])
('Matt Monroe[['2015-12-08', '2015-12-11'], ['2016-05-23', '2016-05-26']])
('Marcia Jones', [['2016-02-02', '2016-02-04']])
('Pat Collins', [])
('Sofia Marowzich', [['2015-10-21', '2015-10-30'], ['2016-03-09', '2016-03-24']])
('Mickey Quinn', [['2016-06-06', '2016-06-08'], ['2016-01-18', '2016-01-21'], ['2016-07-21', '2016-07-22']])
('Jenifer Andersson', [])
('Jon Fletcher', [])
('James Gray', [['2016-04-01', '2016-04-06'], ['2016-07-04', '2016-07-07']])
('Matt Chambers', [['2016-05-02', '2016-05-04']])
Can anyone help me understand this better as I want to return a "dict_comb" for each entry in col_abs ?
Replace your return statement with a yield statement. This will allow your method to continue to loop while "yielding" or returning values after each iteration.