I write a function to get full source of a Korean dictionary web and then cut the URL of mp3. It has errors because of the Korean language.
How do I fix it?
import requests
from bs4 import BeautifulSoup
def cut_to_get_mp3_url(word):
if word == None:
return None
link = 'https://krdict.korean.go.kr/vie/dicSearch/search?nation=vie&nationCode=2&ParaWordNo=&mainSearchWord='+word
x = requests.get(link)
soup = BeautifulSoup(x.content, "html.parser")
url = ''
for link in soup.find_all('img'):
str_onClick = link.get('onclick')
if str_onClick != None:
if str_onClick.endswith(".mp3');"):
url = str_onClick[len("javascript:fnSoundPlay('"): len(str_onClick)-len("');")]
print(url)
return url
cut_to_get_mp3_url('오')
The error:
Traceback (most recent call last):
File "/home/linh/Desktop/python/in_link.py", line 36, in <module>
save_file(cut_to_get_mp3_url(korean_word), str(count))
File "/home/linh/Desktop/python/in_link.py", line 24, in save_file
x = requests.get(mp3_url)
File "/usr/lib/python3.7/site-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/lib/python3.7/site-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python3.7/site-packages/requests/sessions.py", line 519, in request
prep = self.prepare_request(req)
File "/usr/lib/python3.7/site-packages/requests/sessions.py", line 462, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "/usr/lib/python3.7/site-packages/requests/models.py", line 313, in prepare
self.prepare_url(url, params)
File "/usr/lib/python3.7/site-packages/requests/models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '': No schema supplied. Perhaps you meant http://?
I found a site that relate to my question here.
https://www.reddit.com/r/Korean/comments/60fzyq/download_sound_files_from_naver_dictionary/
I apply in my case and it works :D
import urllib.request
import re
#koreanWords = input('Enter words: ').split()
koreanWords = ['한국']
for x in range (0,len(koreanWords)):
url = ('https://krdict.korean.go.kr/vie/dicSearch/search?nation=vie&nationCode=2&ParaWordNo=&mainSearchWord'+ urllib.parse.urlencode({'': koreanWords[x], 'kind': 'keyword'}))
print(url)
response = urllib.request.urlopen(url)
html = response.read()
html = html.decode("utf8")
response.close()
regexSearch = re.search('mySound = soundManager.createSound\({url:\'(.*?)(\'|$)', html)
mp3Page = regexSearch.group(1)
print(mp3Page)
Related
I have a small python script that used to connect to Discord and it would print the author and their comment from the channels I am a member of. I now get an error when it tries to connect. Not sure how to find the connection error.
from fileinput import close
from locale import format_string
import websocket
import json
import threading
import time
import csv
import string
def send_json_request(ws, request):
ws.send(json.dumps(request))
def recieve_json_response(ws):
response = ws.recv()
if response:
return json.loads(response)
def heartbeat(interval, ws):
print("Heartbeat Begin")
while True:
time.sleep(interval)
heartbeatJSON = {
"op": 1,
"d": "null"
}
send_json_request(ws, heartbeatJSON)
print("Heartbeat Sent")
ws = websocket.WebSocket()
ws.connect("wss://gateway.discord.gg/?v=6&encording=json")
event = recieve_json_response(ws)
heartbeat_interval = event["d"]["heartbeat_interval"] / 1000
threading._start_new_thread(heartbeat, (heartbeat_interval, ws))
token = "token"
payload = {"op":2,"d": {"token":token, "properties": {"$os":"Linux","$browser":"Discord","$device": "Pi" }}}
send_json_request(ws, payload)
while True:
event = recieve_json_response(ws)
try:
author = event['d']['author']['username']
content = event['d']['content']
rows = [[author, content]]
if author in ["pebbles", "runner", "snapper"] :
print({event['d']['author']['username']}, {event['d']['content']})
with open('file.csv', 'a', newline='', encoding='UTF8') as csvfile:
writer = csv.writer(csvfile)
auth = author.casefold()
writer.writerow(rows)
op_code = event('op')
if op_code == 11:
print ("Heartbeat received")
except:
pass
I get the below when I run it. Hope someone can point me in the right direction.
Traceback (most recent call last):
File "c:\Users\seanj\Documents\Trading\main.py", line 52, in
event = recieve_json_response(ws)
File "c:\Users\seanj\Documents\Trading\main.py", line 17, in recieve_json_response
response = ws.recv()
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_core.py", line 359, in recv
opcode, data = self.recv_data()
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_core.py", line 382, in recv_data
opcode, frame = self.recv_data_frame(control_frame)
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_core.py", line 403, in recv_data_frame
frame = self.recv_frame()
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_core.py", line 442, in recv_frame
return self.frame_buffer.recv_frame()
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_abnf.py", line 338, in recv_frame
self.recv_header()
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_abnf.py", line 294, in recv_header
header = self.recv_strict(2)
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_abnf.py", line 373, in recv_strict
bytes_ = self.recv(min(16384, shortage))
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_core.py", line 526, in _recv
return recv(self.sock, bufsize)
File "C:\Users\seanj\AppData\Local\Programs\Python\Python310\lib\site-packages\websocket_socket.py", line 122, in recv
raise WebSocketConnectionClosedException(
websocket._exceptions.WebSocketConnectionClosedException: Connection to remote host was lost.
Using chrome 90 and python 3.9. All imports are fully updated as I just installed them.
As I have a bad ISP I made this script to copy novels from the internet to text files for offline viewing while my internet is out. This script pretty much works until the recursion error pops up then I have to manually go in and change the chapter after setting it. My expected results from the code is to run until the novel is completely copied (from chapter 1 to ######) to text files no matter how many chapters there is.
Always get a recursion error after I hit 499 or 500 chapters copied. I am not sure why it is this low nor how it even gets this error. I have read that recursion errors are usually after 999 iterations.
Error :: (first 2 lines repeat for quite a while)
File "C:\Users\james\Documents\Novels\PEERLESS MARTIAL GOD\novel.py", line 42, in CopyChapter
NextChapter()
File "C:\Users\james\Documents\Novels\PEERLESS MARTIAL GOD\novel.py", line 49, in NextChapter
link = driver.find_element_by_link_text(cLink)
File "C:\Program Files\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 428, in find_element_by_link_text
return self.find_element(by=By.LINK_TEXT, value=link_text)
File "C:\Program Files\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 976, in find_element
return self.execute(Command.FIND_ELEMENT, {
File "C:\Program Files\Python39\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 319, in execute
response = self.command_executor.execute(driver_command, params)
File "C:\Program Files\Python39\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 374, in execute
return self._request(command_info[0], url, body=data)
File "C:\Program Files\Python39\lib\site-packages\selenium\webdriver\remote\remote_connection.py", line 397, in _request
resp = self._conn.request(method, url, body=body, headers=headers)
File "C:\Program Files\Python39\lib\site-packages\urllib3\request.py", line 78, in request
return self.request_encode_body(
File "C:\Program Files\Python39\lib\site-packages\urllib3\request.py", line 170, in request_encode_body
return self.urlopen(method, url, **extra_kw)
File "C:\Program Files\Python39\lib\site-packages\urllib3\poolmanager.py", line 375, in urlopen
response = conn.urlopen(method, u.request_uri, **kw)
File "C:\Program Files\Python39\lib\site-packages\urllib3\connectionpool.py", line 699, in urlopen
httplib_response = self._make_request(
File "C:\Program Files\Python39\lib\site-packages\urllib3\connectionpool.py", line 445, in _make_request
six.raise_from(e, None)
File "<string>", line 3, in raise_from
File "C:\Program Files\Python39\lib\site-packages\urllib3\connectionpool.py", line 440, in _make_request
httplib_response = conn.getresponse()
File "C:\Program Files\Python39\lib\http\client.py", line 1347, in getresponse
response.begin()
File "C:\Program Files\Python39\lib\http\client.py", line 331, in begin
self.headers = self.msg = parse_headers(self.fp)
File "C:\Program Files\Python39\lib\http\client.py", line 225, in parse_headers
return email.parser.Parser(_class=_class).parsestr(hstring)
File "C:\Program Files\Python39\lib\email\parser.py", line 67, in parsestr
return self.parse(StringIO(text), headersonly=headersonly)
File "C:\Program Files\Python39\lib\email\parser.py", line 56, in parse
feedparser.feed(data)
File "C:\Program Files\Python39\lib\email\feedparser.py", line 176, in feed
self._call_parse()
File "C:\Program Files\Python39\lib\email\feedparser.py", line 180, in _call_parse
self._parse()
File "C:\Program Files\Python39\lib\email\feedparser.py", line 295, in _parsegen
if self._cur.get_content_maintype() == 'message':
File "C:\Program Files\Python39\lib\email\message.py", line 594, in get_content_maintype
ctype = self.get_content_type()
File "C:\Program Files\Python39\lib\email\message.py", line 578, in get_content_type
value = self.get('content-type', missing)
File "C:\Program Files\Python39\lib\email\message.py", line 471, in get
return self.policy.header_fetch_parse(k, v)
File "C:\Program Files\Python39\lib\email\_policybase.py", line 316, in header_fetch_parse
return self._sanitize_header(name, value)
File "C:\Program Files\Python39\lib\email\_policybase.py", line 287, in _sanitize_header
if _has_surrogates(value):
File "C:\Program Files\Python39\lib\email\utils.py", line 57, in _has_surrogates
s.encode()
RecursionError: maximum recursion depth exceeded while calling a Python object
Code ::
#! python3
import requests
import bs4 as BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
from unidecode import unidecode
CHROMEDRIVER_PATH = 'C:\Program Files\Python39\chromedriver.exe'
NovelChapter = 'peerless-martial-god/chapter-1-spirit-awakening.html'
BaseURL = 'https://novelfull.com'
url = '%(U)s/%(N)s' % {'U': BaseURL, "N": NovelChapter}
options = Options()
options.add_argument("--headless") # Runs Chrome in headless mode.
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
driver.get(url)
def Close():
driver.stop_client()
driver.close()
driver.quit()
# start copy of chapter and add to a file
def CopyChapter():
global soup
soup = BeautifulSoup.BeautifulSoup(driver.page_source, 'html.parser')
readables = soup.find(id='chapter-content')
name = driver.title
filename = name.replace('<',' ').replace('"',' ').replace('>',' ').replace('/',' ').replace("|",' ').replace("?",' ').replace("*",' ').replace(":", ' -').replace('Read ',"").replace(' online free from your Mobile, Table, PC... Novel Updates Daily ',"").replace(' online free - Novel Full',"")
file_name = (filename + '.txt')
print(file_name)
data = ''
for data in soup.find_all("p"):
myfile = open(file_name, 'a+')
myfile.write(unidecode(data.get_text())+'\n'+'\n')
myfile.close()
global lastURL
lastURL = driver.current_url
print('**********Chapter Copied!**********')
NextChapter()
# end copy of chapter and add to a file
# start goto next chapter if exists then return to copy chapter else Close()
def NextChapter():
bLink = soup.find(id = "next_chap")
cLink = 'Next Chapter'
link = driver.find_element_by_link_text(cLink)
link.click()
global currentURL
currentURL = driver.current_url
if currentURL != lastURL:
CopyChapter()
else:
print('Finished!!!')
Close()
# end goto next chapter if exists then return to copy chapter else Close()
CopyChapter()
#EOF
Doesn't look as nice as defs but works perfectly for what I need. Added a few things such as making folders for the text files and starting from the chapter list page. There is probably a bunch of things that could be optimized but it works that is all that matters to me.
#! python3
import os
import requests
import bs4 as BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.chrome.options import Options
from unidecode import unidecode
CHROMEDRIVER_PATH = 'C:\Program Files\Python39\chromedriver.exe'
def Close():
driver.stop_client()
driver.close()
driver.quit()
global NovelName
NovelName = ['']
global DIR
global baseDIR
baseDIR = "C:/Users/james/Documents/Novels"
while NovelName:
NN = NovelName.pop(-1)
NNx = NN.replace('.html', '').replace('-', ' ').upper()
DIR = '%(B)s/%(N)s' % {'B': baseDIR, "N": NNx}
os.mkdir(DIR)
BaseURL = 'https://novelfull.com'
url = '%(U)s/%(N)s' % {'U': BaseURL, "N": NN}
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
driver.get(url)
print(url)
global currentURL
currentURL = driver.current_url
global lastURL
lastURL = ''
soupx = BeautifulSoup.BeautifulSoup(driver.page_source, 'html.parser')
ChapterList = soupx.find(id='list-chapter')
CL = []
for i in ChapterList.find_all("li"):
CL.append(i)
NovelChapter1Raw = CL[0]
xx=[]
for i in NovelChapter1Raw.find_all("a"):
for x in i.find_all("span"):
xx.append(x)
ChapterTextX = ' '.join(map(str, xx))
ChapterText = ChapterTextX.replace('<span class="chapter-text">','').replace('</span>','')
BaseURL = 'https://novelfull.com'
link = driver.find_element_by_link_text(ChapterText)
url = '%(U)s/%(N)s' % {'U': BaseURL, "N": link}
link.click()
currentURL = driver.current_url
while currentURL != lastURL:
global soup
soup = BeautifulSoup.BeautifulSoup(driver.page_source, 'html.parser')
readables = soup.find(id='chapter-content')
name = driver.title
filename = name.replace('<',' ').replace('"',' ').replace('>',' ').replace('/',' ').replace("|",' ').replace("?",' ').replace("*",' ').replace(":", ' -').replace('Read ',"").replace(' online free from your Mobile, Table, PC... Novel Updates Daily ',"").replace(' online free - Novel Full',"")
file_name = (filename + '.txt')
print(file_name)
data = ''
for data in soup.find_all("p"):
myfile = open(DIR +'/'+ file_name, 'a+')
myfile.write(unidecode(data.get_text())+'\n'+'\n')
myfile.close()
lastURL = driver.current_url
print('**********Chapter Copied!**********')
bLink = soup.find(id = "next_chap")
cLink = 'Next Chapter'
link = driver.find_element_by_link_text(cLink)
link.click()
currentURL = driver.current_url
print('Finished!!!')
Close()
print('Finished!!!')
Close() #<- throws a bunch of errors but makes sure everything closes.
#EOF
Somewhere in the mist i have tangled myself running these code gives me "weird" errors and it seems like i am missing a module but cant seem to get it work even after reading the error messages many times.
Anyone that has a clue on whats wrong here?
Happy new year and thanks in advance!
import requests
from bs4 import BeautifulSoup
import csv
def get_page(url):
response = requests.get(url)
if not response.ok:
print('Server responded:', response.status_code)
else:
soup = BeautifulSoup(response.text, 'lxml')
return soup
def get_detail_data(soup):
try:
product = soup.find('span',{'class':'a-size-large product-title-word-break'}).text
except:
product = ''
try:
price = soup.find('span',{'class':'a-size-medium a-color-price priceBlockBuyingPriceString'}).text.strip()
currency, price = p.split(' ')
except:
currency = ''
price = ''
try:
amount = soup.find('span', class_='a-size-medium a-color-state').find('a').text.strip()
except:
amount = ''
data = {
'product': product,
'price': price,
'currency': currency,
'amount': amount,
}
return data
def get_index_data(soup):
try:
links = soup.find_all('a',class_='a-link-normal a-text-normal')
except:
links = []
urls = [item.get('href') for item in links]
return urls
def write_csv(data, url):
with open('hardware.csv', 'a') as csvfile:
writer = csv.writer(csvfile)
row = [data['title'], data['price'], data['currency'], data['amount'], url]
writer.writerow(row)
def main():
url = 'https://www.amazon.se/s?k=grafikkort&page=1'
products = get_index_data(get_page(url))
for link in products:
data = get_detail_data(get_page(link))
write_csv(data, link)
if __name__ == '__main__':
main()
And the Error messages.
Traceback (most recent call last):
File "scrp.py", line 75, in <module>
main()
File "scrp.py", line 71, in main
data = get_detail_data(get_page(link))
File "scrp.py", line 7, in get_page
response = requests.get(url)
File "/usr/lib/python3/dist-packages/requests/api.py", line 75, in get
return request('get', url, params=params, **kwargs)
File "/usr/lib/python3/dist-packages/requests/api.py", line 60, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 519, in request
prep = self.prepare_request(req)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 452, in prepare_request
p.prepare(
File "/usr/lib/python3/dist-packages/requests/models.py", line 313, in prepare
self.prepare_url(url, params)
File "/usr/lib/python3/dist-packages/requests/models.py", line 387, in prepare_url
raise MissingSchema(error)
requests.exceptions.MissingSchema: Invalid URL '/ASUS-NVIDIA-GeForce-grafikkort-kylning/dp/B07489XSJP?dchild=1': No schema supplied. Perhaps you meant http:///ASUS-NVIDIA-GeForce-grafikkort-kylning/dp/B07489XSJP?dchild=1?
What is happening here is that you are only getting the URL suffixes from your products, as seen for instance with /ASUS-NVIDIA-GeForce-grafikkort-kylning.
A quick solution is to prepend `'https://amazon.se' to all your urls:
def main():
url = 'https://www.amazon.se/s?k=grafikkort&page=1'
products = get_index_data(get_page(url))
for link in products:
data = get_detail_data(get_page('https://www.amazon.se' + link))
write_csv(data, link)
I'm new to python, and I am having problems using Beautifulsoup to scrape multiple url's from either a text list, or even coded into the program. Here is an example of my code.
import requests
from bs4 import BeautifulSoup
import re
url = 'https://0.0.0.0/directory/'
r = requests.get(url)
soup = BeautifulSoup(r.content, 'html5lib')
with open("1.txt", "w") as f:
for name, date in zip(
soup.find_all("a", {"class": "name"}), soup.find_all("span", {"class": "date"})
):
f.write(name.text.strip() + " ")
f.write(date.text.strip() + "\n")
This works great for one url, but when I add two it fails. It also fails when trying to load a list from a text file. I have about 25 urls in a file that I would like the program to run through and collect daily.
Failed multiple url code.
url = ['https://0.0.0.0/directory/', 'https://0.0.0.0/directory/']
Error message:
┌──(c4㉿ib)-[~/Desktop/dev]
└─$ python3 test.py
Traceback (most recent call last):
File "crime.py", line 9, in <module>
r = requests.get(url)
File "/usr/lib/python3/dist-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/usr/lib/python3/dist-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 637, in send
adapter = self.get_adapter(url=request.url)
File "/usr/lib/python3/dist-packages/requests/sessions.py", line 728, in get_adapter
raise InvalidSchema("No connection adapters were found for {!r}".format(url))
requests.exceptions.InvalidSchema: No connection adapters were found for "['https://0.0.0.0/directory/', 'https://0.0.0.0/directory/']"
┌──(c4㉿ib)-[~/Desktop/dev]
└─$
Clearly I am not scraping 0.0.0.0 I renamed the domain for the question. Any advice what I am doing wrong would be helpful. I would rather grab from a list so my code doesn't have 25 urls stuffed into it. Thank you.
Try looping through the URL's and request each one separately:
import requests
from bs4 import BeautifulSoup
urls = ['https://0.0.0.0/directory/', 'https://0.0.0.0/directory/']
with open("output.txt", "w") as f:
for url in urls:
print(url)
resp = requests.get(url).content
soup = BeautifulSoup(resp, "html.parser")
for name, date in zip(
soup.find_all("a", {"class": "name"}), soup.find_all("span", {"class": "date"})
):
f.write(name.text.strip() + " ")
f.write(date.text.strip() + "\n")
I am trying to download the thumbnails from the digital commons website in order to make a imageJ visualization. Everything prints up until the JSON dump file. I have a code written by my friend to download the image but I need to have a json file of the URLs before I continue. At the end it gives me the error that " Object of type Tag is not JSON serializable".
Sorry for the spaces, I'm new to stack overflow and when I copy and past from Sublime it is messed up.
from bs4 import BeautifulSoup
import requests
import re
import json
all_my_data = []
url = "https://www.digitalcommonwealth.org/search?f%5Bcollection_name_ssim%5D%5B%5D=Produce+Crate+Labels&f%5Binstitution_name_ssim%5D%5B%5D=Boston+Public+Library&per_page=50"
results_page = requests.get(url)
page_html = results_page.text
soup = BeautifulSoup(page_html, "html.parser")
all_labels = soup.find_all("div", attrs = {'class': 'document'})
for items in all_labels:
my_data = {
"caption": None,
"url": None,
"image url": None,
}
item_link = items.find('a')
abs_url = "https://www.digitalcommonwealth.org/search?f%5Bcollection_name_ssim%5D%5B%5D=Produce+Crate+Labels&f%5Binstitution_name_ssim%5D%5B%5D=Boston+Public+Library&per_page=50" + item_link["href"]
my_data["url"] = abs_url
#print(abs_url)
item_request = requests.get(abs_url)
item_html = item_request.text
item_soup = BeautifulSoup(item_html, "html.parser")
all_field_divs = item_soup.find_all("div", attrs={'class': 'caption'})
for field in all_field_divs:
caption = field.find("a")
cpation = caption.text
my_data["caption"] = caption
#print(caption)
all_photo_urls = item_soup.find_all("div", attrs={'class': 'thumbnail'})
for photo_url in all_photo_urls:
photo = photo_url.find('img')
photo_abs_url = "https://www.digitalcommonwealth.org/search?f%5Bcollection_name_ssim%5D%5B%5D=Produce+Crate+Labels&f%5Binstitution_name_ssim%5D%5B%5D=Boston+Public+Library&per_page=50" + photo['src']
my_data['image url'] = photo_abs_url
#print(photo_abs_url)
all_my_data.append(my_data)
#print(all_my_data)
with open('fruit_crate_labels.json', 'w') as file_object:
json.dump(all_my_data, file_object, indent=2)
print('Your file is now ready')
It prints this:
Traceback (most recent call last):
File "dh.py", line 54, in
json.dump(all_my_data, file_object, indent=2)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/init.py", line 179, in dump
for chunk in iterable:
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/encoder.py", line 429, in _iterencode
yield from _iterencode_list(o, _current_indent_level)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/encoder.py", line 325, in _iterencode_list
yield from chunks
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.class.name} '
TypeError: Object of type Tag is not JSON serializable
thanks for the help!
The following code on line 35:
cpation = caption.text
should be:
caption = caption.text
Then your code appears to work as you intended.