Python 3 code stops at HTTP error and I can't figure out how to handle it

Python 3 code stops at HTTP error and I can't figure out how to handle it - python-3.x

I'm trying to scrape links from the website https://www.usyouthsoccer.org/clubs/club-directory/. Initially, the code broke at the 30th link, so I tried to handle the exception error with urllib HTTPError. Now, the script just stops running at the 30th link. I checked that specific url and it is a bad link. I just want to move past it in the loop, but I'm having trouble with the work around. Any suggestions would be greatly appreciated...
import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd
from urllib.request import Request, urlopen
from urllib.error import HTTPError
executable_path = {"executable_path": "chromedriver"}
browser = Browser("chrome", **executable_path, headless=True)
url = 'https://www.usyouthsoccer.org/clubs/club-directory/'
zipcode_input = 'CT_Main_0$txtLocation'
search_button = '//*[#id="CT_Main_0_btnSearch"]'
dropdown = '//*[#id="CT_Main_0_drpMiles"]/option[5]'
zip_codes = [64015]
team_df = pd.DataFrame()
for x in zip_codes:
try:
print(f'\n{x}\n')
url = 'https://www.usyouthsoccer.org/clubs/club-directory/'
browser.visit(url)
browser.fill(zipcode_input, x)
browser.find_by_xpath(dropdown).click()
browser.find_by_xpath(search_button).click()
html = browser.html
soup = bs(html, 'html.parser')
dallas_urls = soup.find_all(class_="more")
counter = 1
for url in dallas_urls:
print(f'Link {counter} of {len((dallas_urls))}')
counter += 1
back_url = url['href']
front_url = 'https://www.usyouthsoccer.org'
total_url = front_url + back_url
browser.visit(total_url)
my_html = pd.read_html(total_url)
details_pd = pd.DataFrame(my_html[0])
details_pd.columns = ['Cols', 'Vals']
df = details_pd.T
df.columns = df.iloc[0]
df.drop('Cols', inplace = True)
contacts_pd = pd.DataFrame(my_html[1])
if len(contacts_pd.index) == 1:
df['Contact_Title'] = contacts_pd.iloc[0,0]
df['Contact_Name'] = contacts_pd.iloc[0, 1]
df['Contact_Email'] = contacts_pd.iloc[0, 2]
elif len(contacts_pd.index) == 2:
df['Contact_Title'] = contacts_pd.iloc[0,0]
df['Contact_Name'] = contacts_pd.iloc[0, 1]
df['Contact_Email'] = contacts_pd.iloc[0, 2]
df['Contact_Title2'] = contacts_pd.iloc[1,0]
df['Contact_Name2'] = contacts_pd.iloc[1, 1]
df['Contact_Email2'] = contacts_pd.iloc[1, 2]
elif len(contacts_pd.index) == 3:
df['Contact_Title'] = contacts_pd.iloc[0,0]
df['Contact_Name'] = contacts_pd.iloc[0, 1]
df['Contact_Email'] = contacts_pd.iloc[0, 2]
df['Contact_Title2'] = contacts_pd.iloc[1,0]
df['Contact_Name2'] = contacts_pd.iloc[1, 1]
df['Contact_Email2'] = contacts_pd.iloc[1, 2]
df['Contact_Title3'] = contacts_pd.iloc[2,0]
df['Contact_Name3'] = contacts_pd.iloc[2, 1]
df['Contact_Email3'] = contacts_pd.iloc[2, 2]
team_df = pd.concat([team_df, df])
except HTTPError as err:
continue

Put your try statement inside of the nested for loop. Right now it looks like if you have a HTTP Error it is stopping the entire for loop - instead of continuing through the for loop.
for url in dallas_urls:
try:
print(f'Link {counter} of {len((dallas_urls))}')
counter += 1
back_url = url['href']
front_url = 'https://www.usyouthsoccer.org'
total_url = front_url + back_url
urllib.request.urlretrieve(total_url)
except urllib.error.HTTPError:
print ('Error')
continue

Related

Python Program ValueError: invalid literal for int() with base 10:

I am a python-beginner, trying to write a program to: "Corona Virus Live Updates for India – Using Python".
I am getting this error, after running the code/programe:
performance.append(int(row[2]) + int(row[3]))
ValueError: invalid literal for int() with base 10:
What can I do to fix this problem?
The Code:
extract_contents = lambda row: [x.text.replace('\n', '') for x in row]
URL = 'https://www.mohfw.gov.in/'
SHORT_HEADERS = ['SNo', 'State','Indian-Confirmed',
'Foreign-Confirmed','Cured','Death']
response = requests.get(URL).content
soup = BeautifulSoup(response, 'html.parser')
header = extract_contents(soup.tr.find_all('th'))
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
stat = extract_contents(row.find_all('td'))
if stat:
if len(stat) == 5:
# last row
stat = ['', *stat]
stats.append(stat)
elif len(stat) == 6:
stats.append(stat)
stats[-1][1] = "Total Cases"
stats.remove(stats[-1])
#Step #3:
objects = []
for row in stats :
objects.append(row[1])
y_pos = np.arange(len(objects))
performance = []
for row in stats:
performance.append(int(row[2]) + int(row[3]))
table = tabulate(stats, headers=SHORT_HEADERS)
print(table)

just changed the line performance.append(int(row[2]) + int(row[3])) to performance.append(row[2] +str(int(float(row[3]))) )
Full Code:
import requests
from bs4 import BeautifulSoup
import numpy as np
from tabulate import tabulate
extract_contents = lambda row: [x.text.replace('\n', '') for x in row]
URL = 'https://www.mohfw.gov.in/'
SHORT_HEADERS = ['SNo', 'State','Indian-Confirmed', 'Foreign-Confirmed','Cured','Death']
response = requests.get(URL).content
soup = BeautifulSoup(response, 'html.parser')
header = extract_contents(soup.tr.find_all('th'))
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
stat = extract_contents(row.find_all('td'))
if stat:
if len(stat) == 5:
# last row
stat = ['', *stat]
stats.append(stat)
elif len(stat) == 6:
stats.append(stat)
stats[-1][1] = "Total Cases"
stats.remove(stats[-1])
#Step #3:
objects = [row[1] for row in stats ]
y_pos = np.arange(len(objects))
performance = [row[2] +str(int(float(row[3]))) for row in stats]
table = tabulate(stats, headers=SHORT_HEADERS)
print(table)

How to scrape the data which is unable to inspect and which is inside the <svg> tag

I'm unable to scrape some of the data from a webpage Partywise Result. I want to scrape the partwise{vote%,vote count} from that page.
The code I have tried so far:
import urllib
import urllib.request
from bs4 import BeautifulSoup
import os
def soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage,"html.parser")
return soupdata
#chhattisgarh
edatas = ""
edata1=""
codes = ["S26"]
for code in codes:
soup3 = "http://eciresults.nic.in/PartyWiseResult"+code+".htm"#2018
#soup3 = "https://web.archive.org/web/20140613012440/http://eciresults.nic.in/PartyWiseResult" + code + ".htm" # 2014
soup2 = soup(soup3)
for records2 in soup2.findAll("div",{"id":"piecharts26"}):
print(records2.table)
for records in records2.findAll("table"):
print(records)
edata = ""
for data in records.findAll('td'):
edata= edata+","+data.text
edatas= edatas + "\n" + edata[1:]+","+code
header ="Party,Won,Leading,Total,State code"
file = open(os.path.expanduser("per2014_result.csv"),"wb")#2018
#file = open(os.path.expanduser("per2014_result.csv"),"wb")#2014
file.write(bytes(header, encoding="ascii", errors="ignore"))
file.write(bytes(edatas, encoding="ascii", errors="ignore"))
file.write(bytes(edata1, encoding="ascii", errors="ignore"))
The result which I am expecting from is the %vote share
I want the output to be in CSV format like this:
INC,43.0%,6144192
and so on fully from the page one
and two

There is a loading of the data directly from the javascript inside your div:
if(document.getElementById('piecharts26')!=null)
So you have to use a console browser, such as selenium (link here) , or use a regex:
import urllib
import urllib.request
from bs4 import BeautifulSoup
import os
import re
import json
def get_data(html_page):
s = str(html_page)
r = re.compile('data.addRows\((.*?)\);')
m = r.search(s)
if m:
result = m.group(1)
return json.loads(result.replace("'",'"'))
def soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage,"html.parser")
return soupdata
#chhattisgarh
edatas = ""
edata1=""
codes = ["S26"]
for code in codes:
soup3 = "http://eciresults.nic.in/PartyWiseResult"+code+".htm"#2018
#soup3 = "https://web.archive.org/web/20140613012440/http://eciresults.nic.in/PartyWiseResult" + code + ".htm" # 2014
soup2 = soup(soup3)
result = get_data(soup2)
print(result)
header ="Party,Won,Leading,Total,State code"
file = open(os.path.expanduser("per2014_result.csv"),"wb")#2018
#file = open(os.path.expanduser("per2014_result.csv"),"wb")#2014
file.write(bytes(header, encoding="ascii", errors="ignore"))
file.write(bytes(edatas, encoding="ascii", errors="ignore"))
file.write(bytes(edata1, encoding="ascii", errors="ignore"))
OUTPUT:
[['INC {43.0%,6144192}', 6144192],
['BJP {33.0%,4707141}', 4707141],
['JCCJ {7.6%,1086581}', 1086581],
['IND {5.9%,839053}', 839053],
['BSP {3.9%,552313}', 552313],
['GGP {1.7%,247459}', 247459],
['AAAP {0.9%,123526}', 123526],
['CPI {0.3%,48255}', 48255],
['APoI {0.3%,42013}', 42013],
['SHS {0.2%,34678}', 34678],
['NCP {0.2%,28983}', 28983],
['SP {0.2%,21969}', 21969],
['BYPP {0.1%,8425}', 8425],
['CPM {0.1%,8348}', 8348],
['JD(U) {0.1%,8285}', 8285],
['CSM {0.1%,7783}', 7783],
['BMUP {0.1%,7419}', 7419],
['BSCP {0.0%,5546}', 5546],
['BTP {0.0%,5498}', 5498],
['RJsbhP {0.0%,5141}', 5141],
['RGOP {0.0%,5040}', 5040],
['IPBP {0.0%,4982}', 4982],
['NINSHAD {0.0%,4586}', 4586],
['PSPU {0.0%,4309}', 4309],
['BHBHP {0.0%,3780}', 3780],
['RPI(A) {0.0%,3257}', 3257],
['JAC {0.0%,3034}', 3034],
['CPIM {0.0%,3017}', 3017],
['NDPF {0.0%,2912}', 2912],
['AASPP {0.0%,2474}', 2474],
['BBC {0.0%,2089}', 2089],
['SWAP {0.0%,2023}', 2023],
['cvgrp {0.0%,1582}', 1582],
['bhmm {0.0%,1474}', 1474],
['AVVP {0.0%,1407}', 1407],
['LSWP {0.0%,1399}', 1399],
['CSP {0.0%,1232}', 1232],
['BPSGKD {0.0%,1093}', 1093],
['BKNP {0.0%,1085}', 1085],
['CGVP {0.0%,1053}', 1053],
['SUCI {0.0%,1048}', 1048],
['SUSP {0.0%,988}', 988],
['DPI {0.0%,970}', 970],
['RJBP {0.0%,717}', 717],
['ASSP {0.0%,701}', 701],
['BLRP {0.0%,570}', 570],
['BSHSP {0.0%,562}', 562],
['ABHM {0.0%,549}', 549],
['SSBD {0.0%,468}', 468],
['ABSSP {0.0%,436}', 436],
['BRSP {0.0%,429}', 429],
['ABSKP {0.0%,389}', 389],
['BSSP {0.0%,279}', 279],
['BNIP {0.0%,267}', 267],
['RMGP {0.0%,258}', 258],
['KMSP {0.0%,241}', 241],
['BHBP {0.0%,224}', 224],
['RP(K) {0.0%,202}', 202],
['CMM {0.0%,192}', 192],
['CHSJP {0.0%,183}', 183],
['RSSM {0.0%,72}', 72],
['AnAP {0.0%,66}', 66],
['NOTA {2.0%,282744}', 282744]]
Then you can loop on the result and save it into the csv file
EDIT:
See this edit to save it in csv file:
import urllib
import urllib.request
from bs4 import BeautifulSoup
import os
import re
import json
import csv
def get_data(html_page):
s = str(html_page)
r = re.compile('data.addRows\((.*?)\);')
m = r.search(s)
if m:
result = m.group(1)
return json.loads(result.replace("'",'"'))
def soup(url):
thepage = urllib.request.urlopen(url)
soupdata = BeautifulSoup(thepage,"html.parser")
return soupdata
codes = ["S26"]
for code in codes:
soup3 = "http://eciresults.nic.in/PartyWiseResult"+code+".htm"#2018
#soup3 = "https://web.archive.org/web/20140613012440/http://eciresults.nic.in/PartyWiseResult" + code + ".htm" # 2014
soup2 = soup(soup3)
result = get_data(soup2)
header = ["Party","Vote%","Count","State code"]
results_export = []
results_export.append(header)
for r in result:
export = []
party = r[0].split(' {')[0]
percent = r[0].split(' {')[1].split(',')[0]
count = r[1]
export.append(str(party))
export.append(str(percent))
export.append(str(count))
export.append(code)
results_export.append(export)
file = open(os.path.expanduser("per2014_result.csv"), "w") # 2018
writer = csv.writer(file)
writer.writerows(results_export)
EDIT2:
def get_data(html_page):
s = str(html_page)
r = re.compile('data.addRows\((.*?)\);')
ms = r.findall(s)
result = '[]'
if ms:
for m in ms:
if m != '[]':
result = m
return json.loads(result.replace("'",'"'))

adding "na" text to an array within a loop

I've gotten all the data I wanted from scraping this metacritc url (see below) however, I can't seem to put a value in for when I don't find the associated value for list (missing values)
I would like to have it so all the lists are even (so I can right to .csv)
Here is the code I have so far:
from requests import get
from bs4 import BeautifulSoup
from urllib.request import Request, urlopen
import pandas as pd
#Define year
year_number = 2018
# Define the URL
i = range(0, 1)
names = []
metascores = []
userscores = []
userscoresNew = []
release_dates = []
release_datesNew = []
publishers = []
ratings = []
genres = []
genresNew = []
for element in i:
url = "http://www.metacritic.com/browse/games/score/metascore/year/pc/filtered?view=detailed&sort=desc&year_selected=" + format(year_number)
print(url)
year_number -= 1
# not sure about this but it works (I was getting blocked by something and this the way I found around it)
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
web_byte = urlopen(req).read()
webpage = web_byte.decode('utf-8')
#this grabs the all the text from the page
html_soup = BeautifulSoup(webpage, 'html5lib')
#this is for selecting all the games in from 1 to 100 (the list of them)
game_names = html_soup.find_all("div", class_="main_stats")
game_metas = html_soup.find_all("a", class_="basic_stat product_score")
game_users = html_soup.find_all("li", class_='stat product_avguserscore')
game_releases = html_soup.find_all("ul", class_='more_stats')
game_publishers = html_soup.find_all("li", class_='stat publisher')
game_ratings = html_soup.find_all("li", class_='stat maturity_rating')
game_genres = html_soup.find_all("li", class_='stat genre')
#Extract data from each game
for games in game_names:
name = games.find()
names.append(name.text.strip())
for games2 in game_metas:
metascore = games2.find()
metascores.append(metascore.text.strip())
for games3 in game_releases:
release_date = games3.find()
release_dates.append(release_date.text.strip())
for games4 in game_users:
userscore = games4.find('span', class_="data textscore textscore_favorable") or games4.find('span', class_="data textscore textscore_mixed")
if userscore:
userscores.append(userscore.text)
for games5 in game_publishers:
publisher = games5.find("span", class_ = "data")
if publisher:
publishers.append(publisher.text)
for games6 in game_ratings:
rating = games6.find("span", class_ = "data")
for games7 in game_genres:
genre = games7.find("span", class_ = "data")
if genre:
genres.append(genre.text)
for x in release_dates:
temp = str(x)
temp2 = temp.replace("Release Date:\n ", "")
release_datesNew.append(temp2)
for z in genres:
temp3 = str(z)
temp4 = temp3.strip()
temp5 = temp4.replace(" ", "")
genresNew.append(temp5)
df = pd.DataFrame({'Games:': names})
not sure how I would work that in to this code
From what I understand it's take all the data it can find but if there is a blank it doesn't know about it
can someone adivse the best solution for this situation
any help would be great
Thanks

Just add else's for the existing conditions...
if userscore:
userscores.append(userscore.text)
else:
userscores.append('na')

I'm stumped at looping through a returned list of URLs

My first python project, I'm trying to scrape restaurant inspection. One site has summaries that offer keys to the detailed reports that I want to scrape. I'm stumped at looping through the keyed list of urls to get the details.
import pandas as pd
import bs4
import datetime
import re
import lxml
from urllib.request import urlopen
from urllib.error import HTTPError
try:
insp = pd.read_csv("ftp://dbprftp.state.fl.us/pub/llweb/5fdinspi.csv",
usecols=[2,14,18,80,81])
except IOError:
print("The file is not accessible.")
insp.columns = ["CountyName", "InspectDate",
"NumHighVio", "LicenseID", "VisitID"]
# filter for alachua county restaurants
alachua = insp[insp.CountyName == 'Alachua']
# filter for restaurants that had at least one serious violation
alachua = alachua[alachua.NumHighVio > 0]
# change date string to date object
alachua['InspectDate'] = pd.to_datetime(alachua['InspectDate'])
# sort most recent
alachua = alachua.sort_values('InspectDate', ascending=False)
# prefer to have user set timedelta below:
today = pd.to_datetime('today')
startDay = datetime.date.today() - datetime.timedelta(days=30)
alachua = alachua[(alachua['InspectDate'] > startDay) &
(alachua['InspectDate'] < today)]
# takes LicenseID and VisitID, passes it into the urls for detailed reports
for index, rows in alachua.iterrows():
visitID = rows['VisitID']
licID = rows['LicenseID']
urls = "https://www.myfloridalicense.com/inspectionDetail.asp?InspVisitID=
%s &licid= %s" % (visitID, licID)
urls = urls.replace(' ', '')
print(urls)
## here's my problem:
for url in urls:
def get_inspect_detail():
html = urlopen(url)
soup = bs4.BeautifulSoup(html.read(), 'lxml')
details = soup.find_all('font', {'face':'verdana'})[10:]
for detail in details:
siteName = details[0].text
licNum = details[2].text
siteRank = details[4].text
expDate = details[6].text
primeStatus = details[8].text
secStatus = details[10].text
siteAddress = details[12].text
inspectResult = details[20].text
observed1 = details[34].get_text
observed2 = details[36].text
observed3 = details[38].text
observed4 = details[40].text
observed5 = details[42].text
observed6 = details[44].text
observed7 = details[46].text
observed8 = details[48].text
observed9 = details[50].text
observed10 = details[52].text
detailsLib = {
'Restaurant': siteName,
'License': licNum,
'Rank': siteRank,
'Expires': expDate,
'Primary': primeStatus,
'Secondary': secStatus,
'Address': siteAddress,
'Result': inspectResult,
'Observed1': observed1,
'Observed2': observed2,
'Observed3': observed3,
'Observed4': observed4,
'Observed5': observed5,
'Observed6': observed6,
'Observed7': observed7,
'Observed8': observed8,
'Observed9': observed9,
'Observed10': observed10
}
repr(get_inspect_detail())
Probably an obvious mistake or lack of knowledge, but I can get the unscrubbed data for one url, but not for all.

I dont see a reason to define your function inside the loop. You would end up with a lot of redundant definitions this way. Second, you could just define a result list and accumulate the detailsLib objects inside it.
def get_inspect_detail(url):
html = urlopen(url)
soup = bs4.BeautifulSoup(html.read(), 'lxml')
details = soup.find_all('font', {'face': 'verdana'})[10:]
result = []
for detail in details:
siteName = details[0].text
licNum = details[2].text
siteRank = details[4].text
expDate = details[6].text
primeStatus = details[8].text
secStatus = details[10].text
siteAddress = details[12].text
inspectResult = details[20].text
observed1 = details[34].get_text
observed2 = details[36].text
observed3 = details[38].text
observed4 = details[40].text
observed5 = details[42].text
observed6 = details[44].text
observed7 = details[46].text
observed8 = details[48].text
observed9 = details[50].text
observed10 = details[52].text
detailsLib = {
'Restaurant': siteName,
'License': licNum,
'Rank': siteRank,
'Expires': expDate,
'Primary': primeStatus,
'Secondary': secStatus,
'Address': siteAddress,
'Result': inspectResult,
'Observed1': observed1,
'Observed2': observed2,
'Observed3': observed3,
'Observed4': observed4,
'Observed5': observed5,
'Observed6': observed6,
'Observed7': observed7,
'Observed8': observed8,
'Observed9': observed9,
'Observed10': observed10
}
result.append(detailsLib)
return result
for url in urls:
repr(get_inspect_detail(url))

AttributeError: 'str' object has no attribute 'text' python 2.7

Ik there are many questions like this but the answers are all specific and can only fix the solution for the persons specific script.
I am currently trying to print a bunch of info from supremenewyork.com
from the uk website. This script can succsesfully print all the info I want from supreme us but when I added the proxy script I starte to get alot of errors.
I know the prxy script works becuase I tested it on a small scipt and It was able to pull info that was on supreme uk and didnt exist on supreme us
Here is my script.
import requests
from bs4 import BeautifulSoup
UK_Proxy1 = raw_input('UK http Proxy1: ')
UK_Proxy2 = raw_input('UK http Proxy2: ')
proxies = {
'http': 'http://' + UK_Proxy1 + '',
'https': 'http://' + UK_Proxy2 + '',
}
categorys = ['jackets','shirts','tops_sweaters','sweatshirts','pants','shorts','t- shirts','hats','hats','bags','accessories','shoes','skate']
catNumb = 0
altArray = []
nameArray = []
styleArray = []
for cat in categorys:
catStr = str(categorys[catNumb])
cUrl = 'http://www.supremenewyork.com/shop/all/' + catStr
proxy_script = requests.get((cUrl.text), proxies=proxies)
bSoup = BeautifulSoup(proxy_script, 'lxml')
print('\n*******************"'+ catStr.upper() + '"*******************\n')
catNumb += 1
for item in bSoup.find_all('div', class_='inner-article'):
url = item.a['href']
alt = item.find('img')['alt']
req = requests.get('http://www.supremenewyork.com' + url)
item_soup = BeautifulSoup(req.text, 'lxml')
name = item_soup.find('h1', itemprop='name').text
style = item_soup.find('p', itemprop='model').text
print alt +(' --- ')+ name +(' --- ')+ style
altArray.append(alt)
nameArray.append(name)
styleArray.append(style)
print altArray
print nameArray
print styleArray
I am getting this error when I execute the script
AttributeError: 'str' object has no attribute 'text' with the error pointing towards the
proxy_script = requests.get((cUrl.text), proxies=proxies)
i recently added this to the script which sorta fixed it... It was able to print the category's but no info between them. Which (I NEED) it just printed ****************jackets**************, ****shirts******, etc.... here is what I changed
import requests
from bs4 import BeautifulSoup
# make sure proxy is http and port 8080
UK_Proxy1 = raw_input('UK http Proxy1: ')
UK_Proxy2 = raw_input('UK http Proxy2: ')
proxies = {
'http': 'http://' + UK_Proxy1 + '',
'https': 'http://' + UK_Proxy2 + '',
}
categorys = ['jackets','shirts','tops_sweaters','sweatshirts','pants','shorts','t-shirts','hats','bags','accessories','shoes','skate']
catNumb = 0
altArray = []
nameArray = []
styleArray = []
for cat in categorys:
catStr = str(categorys[catNumb])
cUrl = 'http://www.supremenewyork.com/shop/all/' + catStr
proxy_script = requests.get(cUrl, proxies=proxies).text
bSoup = BeautifulSoup(proxy_script, 'lxml')
print('\n*******************"'+ catStr.upper() + '"*******************\n')
catNumb += 1
for item in bSoup.find_all('div', class_='inner-article'):
url = item.a['href']
alt = item.find('img')['alt']
req = requests.get('http://www.supremenewyork.com' + url)
item_soup = BeautifulSoup(req.text, 'lxml')
name = item_soup.find('h1', itemprop='name').text
style = item_soup.find('p', itemprop='model').text
print alt +(' --- ')+ name +(' --- ')+ style
altArray.append(alt)
nameArray.append(name)
styleArray.append(style)
print altArray
print nameArray
print styleArray
I put .text at the end and it worked sorta.... How do i fix it so it prints the info I want???

I think you miss smt. Your cUrl is a string type, not request type. I guess you want:
proxy_script = requests.get(cUrl, proxies=proxies).text

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Python 3 code stops at HTTP error and I can't figure out how to handle it - python-3.x

Related

Python Program ValueError: invalid literal for int() with base 10:

How to scrape the data which is unable to inspect and which is inside the <svg> tag

adding "na" text to an array within a loop

I'm stumped at looping through a returned list of URLs

AttributeError: 'str' object has no attribute 'text' python 2.7

Categories

Resources