Related
In my case, I have an empty dictionary which I want to fill with Key:Value pairs about stock tickers information. I tested the code below but I always get a KeyError message whenever a key is missing from the list of tickers I loop through. All I want is to set a group of default Keys for all my tickers (Info_data in the code below), then set the value of the missing "Key" to "None" whenever it finds a missing key.
Here is my code:
stocks_info = {}
for symbol in tqdm(sav_set):
info = yf.Tickers(symbol).tickers[symbol].info
if info['quoteType'] == 'EQUITY':
info_data = {'symbol': symbol, 'shortName': info['shortName'], 'country': info['country'],
'sector': info['sector'], 'industry': info['industry'], 'marketCap': info['marketCap'],
'currentPrice': info['currentPrice'], 'quoteType': info['quoteType'], 'market': info['market']}
Many thanks to #dawg for providing help. This is how I managed to resolve my issue:
stocks_info = {}
for symbol in tqdm(sav_set):
info = yf.Tickers(symbol).tickers[symbol].info
# info_keys = {'symbol', 'shortName', 'longName', 'country', 'sector', 'industry', 'marketCap', 'currentPrice',
# 'navPrice', 'quoteType', 'market'}
ticker = info.get('symbol', None)
shortName = info.get('shortName', None)
longName = info.get('longName', None)
country = info.get('country', None)
sector = info.get('sector', None)
industry = info.get('industry', None)
marketCap = info.get('marketCap', None)
currentPrice = info.get('currentPrice', None)
navPrice = info.get('navPrice', None)
quoteType = info.get('quoteType', None)
market = info.get('market', None)
if info['quoteType'] == 'EQUITY':
info_data = {'symbol': ticker, 'shortName': shortName, 'longName': longName, 'country': country,
'sector': sector, 'industry': industry, 'marketCap': marketCap, 'currentPrice': currentPrice,
'quoteType': quoteType, 'market': market}
else:
info_data = {'symbol': ticker, 'shortName': shortName, 'longName': longName, 'country': country,
'sector': sector, 'industry': industry, 'marketCap': marketCap, 'currentPrice': navPrice,
'quoteType': quoteType, 'market': market}
stocks_info[symbol] = info_data
json_object = json.dumps(stocks_info)
with open("../tickers_data/stocks_info.json", "w") as outfile:
outfile.write(json_object)
Hi Everyone i am working work django framework, where i used to upload excel file in Dailytrip table, current i get car_mumber from car table, but now i need to store car_number from Car_team table also team_id, i am storing car_id and team_id in car_team table also i need to store team_id in dailytrip table automaticlly based on car_id(car_number) i am to much confuse how to i work that, pls help me out
models.py
class Car_team(BaseModel):
team = models.ForeignKey(
Team,
models.CASCADE,
verbose_name='Team',
null=True,
)
car=models.ForeignKey(
Car,
models.CASCADE,
verbose_name='Car',
null=True)
city =models.ForeignKey(
City,
models.CASCADE,
verbose_name='City',
)
start_date=models.DateField(null=True, blank=True)
end_date=models.DateField(null=True, blank=True)
views.py
def add_payout_uber_daily_data(request):
if request.method == 'POST':
form = UberPerformanceDataForm(request.POST, request.FILES, request=request)
if form.is_valid():
date = form.cleaned_data['date']
excel_file = request.FILES['file']
df = pd.read_excel(excel_file)
is_na = pd.isna(df['Date']).sum().sum() + pd.isna(df['Name']).sum().sum() + pd.isna(df['UUID']).sum().sum() + pd.isna(df['Net Fare With Toll']).sum().sum() + pd.isna(df['Trips']).sum().sum() + pd.isna(df['Uber KMs']).sum().sum() + pd.isna(df['CashCollected']).sum().sum() + pd.isna(df['UberToll']).sum().sum() + pd.isna(df['Tips']).sum().sum() + pd.isna(df['Hours Online']).sum().sum() + pd.isna(df['Ratings']).sum().sum() + pd.isna(df['Acceptance Rate']).sum().sum() + pd.isna(df['Cancellation Rate']).sum().sum()
error_list = []
if is_na > 0:
error_list.append('Found #N/A or blank values in the sheet. Please correct and re-upload')
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
date_match = True
for d in df['Date']:
if str(d.strftime("%Y-%m-%d")) != str(date):
date_match = False
break
if not date_match:
error_list.append('Some dates are not matching in excel')
if len(error_list) > 0:
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
DailyTrip.objects.filter(date=date).update(is_active=0)
for i in df.index:
uuid = df['UUID'][i]
driver_id = None
car_id = None
fleet_id = None
manager_id = None
try:
driver = Driver.objects.get(uber_uuid=uuid)
driver_id = driver.id
except Driver.DoesNotExist:
driver_id = None
#replce car code and store car_number,car_id,team_id via car_team only this logic need to change current get car_number direct car table but we need car_number vai foriegn key
try:
car = Car.objects.get(car_number=df["Car Number"][i])
car_id = car.id
manager_id = car.manager_id
except Car.DoesNotExist:
car_id = None
try:
fleet = Fleet.objects.get(name=df["Fleet Name"][i])
fleet_id = fleet.id
except Fleet.DoesNotExist:
fleet_id = None
name = df['Name'][i]
car_number = df['Car Number'][i]
fare_total = df['Net Fare With Toll'][i]
trips = df['Trips'][i]
pool_trips = 0
hours_online = df['Hours Online'][i]
total_km = df['Uber KMs'][i]
cash_collected = abs(df['CashCollected'][i])
toll = df['UberToll'][i]
tip_amount = df['Tips'][i]
fare_avg = float(fare_total)/int(trips)
fare_per_hour_online = float(fare_total)/float(hours_online)
fare_per_km = fare_total/total_km
trips_per_hour = trips/hours_online
km_per_trip = total_km/trips
rating = df['Ratings'][i]
acceptance_rate_perc = float(df['Acceptance Rate'][i])/100
driver_cancellation_rate = float(df['Cancellation Rate'][i])/100
obj, created = DailyTrip.all_objects.update_or_create(
date=date, uuid=uuid,
defaults={
'car_id': car_id,
'manager_id': manager_id,
'car_number': car_number,
'driver_id': driver_id,
'car_id': car_id,
'fleet_id': fleet_id,
'driver_name': name,
'fare_total': fare_total,
'trips': trips,
'pool_trips': pool_trips,
'hours_online': hours_online,
'total_km': total_km,
'cash_collected': cash_collected,
'toll': toll,
'tip_amount': tip_amount,
'fare_avg': fare_avg,
'fare_per_hour_online':fare_per_hour_online,
'fare_per_km':fare_per_km,
'trips_per_hour': trips_per_hour,
'km_per_trip': km_per_trip,
'rating': rating,
'acceptance_rate_perc': acceptance_rate_perc,
'driver_cancellation_rate': driver_cancellation_rate,
'is_active': 1,
'comments': None}
)
if len(error_list) > 0:
DailyTrip.objects.filter(date=date).update(is_active=0)
context = {'error_list': error_list, 'menu_payout': 'active','submenu_daily_data': 'active','form': form, }
return render(request, 'add_payout_uber_daily_data.html', context=context)
else:
messages.success(request, 'Daily Trips added Successfully...')
return redirect('/fleet/payout/daily_data/add/uber')
else:
form = UberPerformanceDataForm(initial={})
context = {
'menu_payout': 'active',
'submenu_daily_data': 'active',
'form': form,
}
return render(request, 'add_payout_uber_daily_data.html', context=context)
You can try that :
to get car_number from car_team -->
car_team = car_team.objects.objects.all().last() # to get the last car_team for example
car_number = car_team.car.car_number # to get the car number from the car_team
try:
car = Car.objects.get(car_number=df["Car Number"][i])
car_id = car.id
car1=Car_team.objects.filter(car_id=car_id)
if car1:
team_id=car1[0].team_id
else:
team_id=None
except Car.DoesNotExist:
car_id = None
team_id= None
Could you please help me understand issue with below unittest?
Here's my function for which i am writing unittest.
def running_config_from_database(device):
try:
data = databaseproxy(cluster='https://xxx.xxxx.xxx.net')
datadb = 'test'
query = f'''fGetrunningconfigData('{device}')
'''
raw_data = data.execute_query(datadb, query)
# pdb.set_trace()
for items in raw_data.fetchall():
config = items['Config'].split('\r\n')
for index, line in enumerate(config):
if '$PASS$' in line:
if line.startswith('set groups ospf_test'):
config_line = line.replace('$PASS$', get_auth('ospf'))
config[index] = config_line
elif line.startswith('set groups rip_test'):
config_line = line.replace('$PASS$', get_auth('rsvp'))
config[index] = config_line
config = config + overload_config
return True, '\r\n'.join(config)
except Exception as e:
return False, f'Failed to get the running config from database, error: {e}'
Here's my unittest for this function:
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
assert status and out1 == data
When I run this unittest to test the function, I get the below assertion error - looks like the function doesn't return any value.
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
> assert status and out1 == data
E AssertionError: assert (True and '' == 'set groups d...rload_config2'
E + set groups ospf_test secret xyz
E +
E + sample_overload_config1
E + sample_overload_config2)
tests/test_scripts.py:80: AssertionError
I edited my function to reduce the complexity but it still doesn't work. not sure why.
Main Function:
==============
def running_config_from_database(device):
try:
pdb.set_trace()
config = running_config_database(device)
for index, line in enumerate(config):
if '$PASS$' in line:
if line.startswith('set groups ospf_test'):
config_line = line.replace('$PASS$', get_cred('ospf'))
config[index] = config_line
config = config + overload_config
return True, '\r\n'.join(config)
except Exception as e:
return False, f'Failed to get the running config from Database, error: {e}'
UnitTest Result for above Function:
=========================================================================================================== FAILURES ============================================================================================================
________________________________________________________________________________________________ test_running_config_from_database _________________________________________________________________________________________________
mock_cred = <MagicMock name='get_cred' id='140210277622336'>, mock_overload = ['sample_overload_config1', 'sample_overload_config2'], mock_running_config = <MagicMock name='running_config_database' id='140210277652128'>
#patch("test.test1.scripts.running_config_database")
#patch("test.test1.scripts.overload_config")
#patch("test.test1.scripts.get_cred")
def test_running_config_from_database(mock_cred, mock_overload, mock_running_config):
mock_running_config.return_value = ['set groups ospf_test secret $PASS$', '']
mock_cred.return_value = 'xyz'
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = test.test1.scripts.test_running_config_from_database('devA')
> assert status and out1 == data
E AssertionError: assert (True and '' == 'set groups d...rload_config2'
E + set groups ospf_test secret xyz
E +
E + sample_overload_config1
E + sample_overload_config2)
validation_tests/test_scripts.py:152: AssertionError
================================================================================================== 1 failed, 6 passed in 4.79s ==================================================================================================
The problem here is the assignment to mock_overload. If you want to adapt your mocked object you have to make sure that the object itself is changed. If you just assign another object (in this case, a list), your variable now points to the list object, while the original mock_overload is no longer referenced (and is not changed). So instead of writing:
mock_overload = ['sample_overload_config1', 'sample_overload_config2']
you can for example write
mock_overload[:] = ['sample_overload_config1', 'sample_overload_config2']
For clarification, here is a simplified version of the original code:
>>> mock_overload = []
>>> id(mock_overload)
1477793866440
>>> mock_overload = [5, 6]
>>> id(mock_overload)
1477791015560 <- changed id, no longer pointing to the mock
Now the same with the fixed code:
>>> mock_overload = []
>>> id(mock_overload)
140732764763024
>>> mock_overload[:] = [5, 6]
>>> id(mock_overload)
140732764763024 <- unchanged id, still points to the mock
Note that mock_overload[:] = [5, 6] is basically a shortcut for:
mock_object.clear()
mock_object.extend([5, 6])
Answer is already provided in comment section by #MrBean Bremen. here's the UT after making changes suggested.
#patch("scripts.test.overload_config")
#patch("scripts.test.get_auth")
#patch("scripts.test.databaseproxy.execute_query")
def test_running_config_from_database(self, mock_data, mock_cred, mock_overload):
ret = MagicMock()
ret.fetchall.return_value = [{'Hostname': 'devA', 'Config': 'set groups ospf_test secret $PASS$\r\n'}]
mock_data.return_value = ret
mock_cred.return_value = 'xyz'
***mock_overload[:]*** = ['sample_overload_config1', 'sample_overload_config2']
expected = ['set groups ospf_test secret xyz', '']
out = expected + mock_overload
data = '\r\n'.join(out)
status, out1 = tests.test_scripts.running_config_from_database('devA')
assert status and out1 == data
from bs4 import BeautifulSoup
import requests , sys ,os
import pandas as pd
URL = r"https://www.vault.com/best-companies-to-work-for/law/top-100-law-firms-rankings/year/"
My_list = ['2007','2008','2009','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019','2020']
Year= []
CompanyName = []
Rank = []
Score = []
print('\n>>Process started please wait\n\n')
for I, Page in enumerate(My_list, start=1):
url = r'https://www.vault.com/best-companies-to-work-for/law/top-100-law-firms-rankings/year/{}'.format(Page)
print('\nData fetching from : ',url)
Res = requests.get(url)
soup = BeautifulSoup(Res.content , 'html.parser')
data = soup.find('section',{'class': 'search-result CompanyWorkfor RankingMain FindSchools school-results contrastSection d-flex justify-content-center min-height Rankings CompRank'})
if len(soup) > 0:
print("\n>>Getting page source for :" , url)
else:
print("Please Check url :",url)
for i, item in enumerate(data.find_all("div", {"class": "RankItem"})):
year = item.find("i",{"class":"fa-stack fa-2x"})
Year.append(year)
title = item.find("h3", {"class": "MainLink"}).get_text().strip()
CompanyName.append(title)
rank = item.find("div", {"class": "RankNumber"}).get_text().strip()
Rank.append(rank)
score = item.find("div", {"class": "score"}).get_text().strip()
Score.append(score)
Data = pd.DataFrame({"Year":Year,"CompanyName":CompanyName,"Rank":Rank,"Score":Score})
Data[['First','Score']] = Data.Score.str.split(" " , expand =True,)
Data[['hash','Rank']] = Data.Rank.str.split("#" , expand = True,)
Data.drop(columns = ['hash','First'],inplace = True)
Data.to_csv('Vault_scrap.csv',index = False)
For each url the expected output Data for year, rank, title and score is 100 lines, but I'm getting only 10 lines.
You can iterate through the year and pages like this.
import requests
import pandas as pd
url = 'https://www.vault.com/vault/api/Rankings/LoadMoreCompanyRanksJSON'
def page_loop(year, url):
tableReturn = pd.DataFrame()
for page in range(1,101):
payload = {
'rank': '2',
'year': year,
'category': 'LBACCompany',
'pg': page}
jsonData = requests.get(url, params=payload).json()
if jsonData == []:
return tableReturn
else:
print ('page: %s' %page)
tableReturn = tableReturn.append(pd.DataFrame(jsonData), sort=True).reset_index(drop=True)
return tableReturn
results = pd.DataFrame()
for year in range(2007,2021):
print ("\n>>Getting page source for :" , year)
jsonData = page_loop(year, url)
results = results.append(pd.DataFrame(jsonData), sort=True).reset_index(drop=True)
My first python project, I'm trying to scrape restaurant inspection. One site has summaries that offer keys to the detailed reports that I want to scrape. I'm stumped at looping through the keyed list of urls to get the details.
import pandas as pd
import bs4
import datetime
import re
import lxml
from urllib.request import urlopen
from urllib.error import HTTPError
try:
insp = pd.read_csv("ftp://dbprftp.state.fl.us/pub/llweb/5fdinspi.csv",
usecols=[2,14,18,80,81])
except IOError:
print("The file is not accessible.")
insp.columns = ["CountyName", "InspectDate",
"NumHighVio", "LicenseID", "VisitID"]
# filter for alachua county restaurants
alachua = insp[insp.CountyName == 'Alachua']
# filter for restaurants that had at least one serious violation
alachua = alachua[alachua.NumHighVio > 0]
# change date string to date object
alachua['InspectDate'] = pd.to_datetime(alachua['InspectDate'])
# sort most recent
alachua = alachua.sort_values('InspectDate', ascending=False)
# prefer to have user set timedelta below:
today = pd.to_datetime('today')
startDay = datetime.date.today() - datetime.timedelta(days=30)
alachua = alachua[(alachua['InspectDate'] > startDay) &
(alachua['InspectDate'] < today)]
# takes LicenseID and VisitID, passes it into the urls for detailed reports
for index, rows in alachua.iterrows():
visitID = rows['VisitID']
licID = rows['LicenseID']
urls = "https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID=
%s &licid= %s" % (visitID, licID)
urls = urls.replace(' ', '')
print(urls)
## here's my problem:
for url in urls:
def get_inspect_detail():
html = urlopen(url)
soup = bs4.BeautifulSoup(html.read(), 'lxml')
details = soup.find_all('font', {'face':'verdana'})[10:]
for detail in details:
siteName = details[0].text
licNum = details[2].text
siteRank = details[4].text
expDate = details[6].text
primeStatus = details[8].text
secStatus = details[10].text
siteAddress = details[12].text
inspectResult = details[20].text
observed1 = details[34].get_text
observed2 = details[36].text
observed3 = details[38].text
observed4 = details[40].text
observed5 = details[42].text
observed6 = details[44].text
observed7 = details[46].text
observed8 = details[48].text
observed9 = details[50].text
observed10 = details[52].text
detailsLib = {
'Restaurant': siteName,
'License': licNum,
'Rank': siteRank,
'Expires': expDate,
'Primary': primeStatus,
'Secondary': secStatus,
'Address': siteAddress,
'Result': inspectResult,
'Observed1': observed1,
'Observed2': observed2,
'Observed3': observed3,
'Observed4': observed4,
'Observed5': observed5,
'Observed6': observed6,
'Observed7': observed7,
'Observed8': observed8,
'Observed9': observed9,
'Observed10': observed10
}
repr(get_inspect_detail())
Probably an obvious mistake or lack of knowledge, but I can get the unscrubbed data for one url, but not for all.
I dont see a reason to define your function inside the loop. You would end up with a lot of redundant definitions this way. Second, you could just define a result list and accumulate the detailsLib objects inside it.
def get_inspect_detail(url):
html = urlopen(url)
soup = bs4.BeautifulSoup(html.read(), 'lxml')
details = soup.find_all('font', {'face': 'verdana'})[10:]
result = []
for detail in details:
siteName = details[0].text
licNum = details[2].text
siteRank = details[4].text
expDate = details[6].text
primeStatus = details[8].text
secStatus = details[10].text
siteAddress = details[12].text
inspectResult = details[20].text
observed1 = details[34].get_text
observed2 = details[36].text
observed3 = details[38].text
observed4 = details[40].text
observed5 = details[42].text
observed6 = details[44].text
observed7 = details[46].text
observed8 = details[48].text
observed9 = details[50].text
observed10 = details[52].text
detailsLib = {
'Restaurant': siteName,
'License': licNum,
'Rank': siteRank,
'Expires': expDate,
'Primary': primeStatus,
'Secondary': secStatus,
'Address': siteAddress,
'Result': inspectResult,
'Observed1': observed1,
'Observed2': observed2,
'Observed3': observed3,
'Observed4': observed4,
'Observed5': observed5,
'Observed6': observed6,
'Observed7': observed7,
'Observed8': observed8,
'Observed9': observed9,
'Observed10': observed10
}
result.append(detailsLib)
return result
for url in urls:
repr(get_inspect_detail(url))