I want to download this website 's pdf file using python3 https://qingarchives.npm.edu.tw/index.php?act=Display/image/207469Zh18QEz#74l
This might accomplish what you're trying to achieve:
import requests
# URL to be downloaded
url = "https://cfm.ehu.es/ricardo/docs/python/Learning_Python.pdf"
def download_pdf(url, file_name):
# Send GET request
response = requests.get(url)
# Save the PDF
with open(file_name, "wb") as f:
f.write(response.content)
download_pdf(url, 'myDownloadedFile.pdf')
from urllib import request
response = request.urlretrieve("https://cfm.ehu.es/ricardo/docs/python/Learning_Python.pdf", "learing_python.pdf")
or
import wget
URL = "https://cfm.ehu.es/ricardo/docs/python/Learning_Python.pdf"
response = wget.download(URL, ".learing_python.pdf")
Related
I am trying to take screenshot of an URL but somehow it takes the screenshot of the gateway because of restricted entry. So tried adding ID and password to open the link but it does not for reason, could you help?
import requests
import urllib.parse
BASE = 'https://mini.s-shot.ru/1024x0/JPEG/1024/Z100/?' # we can modify size, format, zoom as needed
url = 'https://mail.google.com/mail/'#or whatever link you need
url = urllib.parse.quote_plus(url) #
print(url)
Id="XXXXXX"
import getpass
key = getpass.getpass('Password :: ')
path = 'target1.jpg'
response = requests.get(BASE + url+Id+Password, stream=True)
if response.status_code == 200:
with open(path, 'wb') as file:
for chunk in response:
file.write(chunk)
Thanks!
Based on the code from here, I'm able to crawler url for each transation and save them into an excel file which can be downloaded here.
Now I would like to go further and click the url link:
For each url, I will need to open and save pdf format files:
How could I do that in Python? Any help would be greatly appreciated.
Code for references:
import shutil
from bs4 import BeautifulSoup
import requests
import os
from urllib.parse import urlparse
url = 'xxx'
for page in range(6):
r = requests.get(url.format(page))
soup = BeautifulSoup(r.content, "html.parser")
for link in soup.select("h3[class='sv-card-title']>a"):
r = requests.get(link.get("href"), stream=True)
r.raw.decode_content = True
with open('./files/' + link.text + '.pdf', 'wb') as f:
shutil.copyfileobj(r.raw, f)
An example of download a pdf file in your uploaded excel file.
from bs4 import BeautifulSoup
import requests
# Let's assume there is only one page.If you need to download many files, save them in a list.
url = 'http://xinsanban.eastmoney.com/Article/NoticeContent?id=AN201909041348533085'
r = requests.get(url)
soup = BeautifulSoup(r.content, "html.parser")
link = soup.select_one(".lookmore")
title = soup.select_one(".newsContent").select_one("h1").text
print(title.strip() + '.pdf')
data = requests.get(link.get("href")).content
with open(title.strip().replace(":", "-") + '.pdf', "wb+") as f: # file name shouldn't contain ':', so I replace it to "-"
f.write(data)
And download successfully:
Here's bit different approach. You don't have to open those urls from the excel file as you can build the .pdf file source urls yourself.
For example:
import requests
urls = [
"http://data.eastmoney.com/notices/detail/871792/AN201909041348533085,JWU2JWEwJTk2JWU5JTljJTllJWU3JTg5JWE5JWU0JWI4JTlh.html",
"http://data.eastmoney.com/notices/detail/872955/AN201912101371726768,JWU0JWI4JWFkJWU5JTgzJWJkJWU3JTg5JWE5JWU0JWI4JTlh.html",
"http://data.eastmoney.com/notices/detail/832816/AN202008171399155565,JWU3JWI0JWEyJWU1JTg1JThiJWU3JTg5JWE5JWU0JWI4JTlh.html",
"http://data.eastmoney.com/notices/detail/831971/AN201505220009713696,JWU1JWJjJTgwJWU1JTg1JTgzJWU3JTg5JWE5JWU0JWI4JTlh.html",
]
for url in urls:
file_id, _ = url.split('/')[-1].split(',')
pdf_file_url = f"http://pdf.dfcfw.com/pdf/H2_{file_id}_1.pdf"
print(f"Fetching {pdf_file_url}...")
with open(f"{file_id}.pdf", "wb") as f:
f.write(requests.get(pdf_file_url).content)
I have a problem with my code created in python.
I would like the URL API telegram to open with a change so that the downloaded item from the site is sent to chat.
# Import libraries
import requests
import urllib.request
import time
import sys
from bs4 import BeautifulSoup
stdoutOrigin=sys.stdout
sys.stdout = open("log.txt", "w")
# Set the URL you want to webscrape from
url = 'https://31asdasdasdasdasd.com/'
# Connect to the URL
response = requests.get(url)
# Parse HTML and save to BeautifulSoup object
soup = BeautifulSoup(response.text, "html.parser")
zapisane = ''
row = soup.find('strong')
print(">> Ilosc opinii ktora przeszla:")
send = print(row.get_text()) # Print row as text
import urllib.request
u = urllib.request.urlopen("https://api.telegram.org/botid:ts/sendMessage?chat_id=-3channel1&text=")
You likely want to use a string format with a variable in your last line of code shown here. Here's a helpful resource for string formatting: https://www.geeksforgeeks.org/python-format-function/
I have a stupid issue. I have an address that generates a csv file immediately when I copy that to the browser. But I need to do it with python code, so I tried to do something like that:
import urllib.request
url = 'https://www.quandl.com/api/v3/datasets/WSE/TSGAMES.csv?column_index=4&start_date=2018-01-01&end_date=2018-12-31&collapse=monthly&transform=rdiff&api_key=AZ964MpikzEYAyLGfJD2Y
csv = urllib.request.urlopen(url).read()
with open('file.csv', 'wb') as fx: # bytes, hence mode 'wb'
fx.write(csv)
But I got an error: raise HTTPError(req.full_url, code, msg, hdrs, fp)
HTTPError: Bad Request
Do you know the reason and could you help ?
Thanks for any help !
Edit I should state that your link did not work for me, and my quandl API is different then yours.
This is pretty easy to do with the requests module:
import requests
filename = 'test_file.csv'
link = 'your link here'
data = requests.get(link) # request the link, response 200 = success
with open(filename, 'wb') as f:
f.write(data.content) # write content of request to file
f.close()
That link doesn't work for me. Try it like this (generic example).
from urllib.request import urlopen
from io import StringIO
import csv
data = urlopen("http://pythonscraping.com/files/MontyPythonAlbums.csv").read().decode('ascii', 'ignore')
dataFile = StringIO(data)
csvReader = csv.reader(dataFile)
with open('C:/Users/Excel/Desktop/example.csv', 'w') as myFile:
writer = csv.writer(myFile)
writer.writerows(csvReader)
i am not able to download from JavaScript generated images and to store them on my local machine. The code below is not giving any errors but there are no images in my folder. I tried already in many ways. Here is one of them:
path = "http://my.site.com/page/oo?_b=9V2FG34519CV2N56SLK567943N25J82V"
os.makedirs('C:/Images', exist_ok=True)
print('Dowload images %s...' % path)
res = request.get(path)
res.raise_for_status()
imageFile = open(os.path.join('logos', os.path.basename(res)), 'wb')
for chunk in res.iter_content(100000):
imageFile.write(chunk)
imageFile.close()
i am trying since two days to solve this problem, so i would be gratefully if somebody can help me!
Once you've got the image url, you can use requests and shutil to save the image to a given directory (working for me for the given url):
import shutil
import requests
import os
url = 'http://epub.hpo.hu/e-kutatas/aa?_p=A554F6BCDBCEA51EFF1E0E17E777F3AC'
response = requests.get(url, stream=True)
with open(out_file_path, 'wb') as out_file:
shutil.copyfileobj(response.raw, out_file)