I am currently developing a program to try an xml file. However I encounter a problem that has been blocking me for a few days now and I can rack my brains my beginner level in python and my many research does not help me to get over it.
I first try to retrieve 1 element from my xml file. I can do it well only the graphics window that should normally display it does not generate and I have the following error message:
The error :
window.fenetre_text(Acte_D) NameError: name 'Acte_D' is not defined`
well my code is :
import tkinter as tk
from tkinter import *
from tkinter import messagebox
from tkinter.filedialog import askopenfilename
from xml.dom import minidom`
global file
global Acte_D
global element_A
global lines
class fenetreM(Tk):
#GUI
def __init__(self, master=None,*args, **kwargs):
Tk.__init__(self,master, *args, **kwargs)
self.menu_bar()
self.title("EssaiV1.0.1")
self.geometry("800x400")
self.minsize(380,140)
self.config(background='#F0F0F2')
#New_frame=Frame(fenetreM, bg='#abd7f4')`
def menu_bar(self):
menu_bar = Menu(self)
menu_file = Menu(menu_bar, tearoff=0)
menu_file.add_command(label="Open", underline=0, command=self.open_file)
menu_file.add_command(label="Affichage",underline=0,command=self.do_something)
menu_file.add_separator()
menu_file.add_command(label="Exit",underline=1,command=self.quit)
menu_bar.add_cascade(label="File",underline=0, menu=menu_file)
menu_help = Menu(menu_bar, tearoff=0)
menu_help.add_command(label="About", command=self.do_about)
menu_bar.add_cascade(label="Help",underline=0, menu=menu_help)
self.config(menu=menu_bar)
valid = False
return(valid)
def open_file(self):
types = [("All Files",".*")]
file = askopenfilename(title="File to open : ", filetypes=types)
print("Copy")
doc = minidom.parse(file)
print(doc.nodeName)
print(doc.firstChild.tagName)
Acte_D = doc.getElementsByTagName("ActeDescription")
print("\n")
element_A = (f" we have {Acte_D.length} element \n Version : ")
for i in Acte_D:
screen_1=(i.getAttribute("version"))
print(screen_1)
print("\n")
return (Acte_D)
def fenetre_text(self,Acte_D):
tk=Text(fenetreM, height = 10, width =100)
Fenetre_A=tk.inset(END,textvariable=self.element_A+Acte_D)
Fenetre_A.pack(side="top", fill="x", expand=True)
return(Acte_D)
window = fenetreM()
window.fenetre_text(Acte_D)
window.mainloop()
Can someone explain my mistake to me?
Thanks
I tried to graphically display the result of Acte_D in my window defined in my method fenetre_text.
But I can't.
I can't seem to figure out how to read the text inside this link which is a pdf document. I do not get any errors nor do I get back any type of text.
from io import BytesIO
from urllib.request import urlopen
import PyPDF2
def read_pdf_from_url(url):
try:
response = urlopen(url)
pdf_file = BytesIO(response.read())
reader = PyPDF2.PdfReader(pdf_file)
text = ""
for page_num in range(len(reader.pages)):
page = reader.pages[page_num]
text += page.extract_text()
return text
except Exception as e:
print(f"An error occurred: {e}")
url = 'https://probaterecords.shelbyal.com/shelby/search.do?indexName=shelbyimages&lq=Instrument%3A19890308000066440&page=1&view=FitV&scrollbar=0&navpanes=0&statusbar=0&messages=0?iframe=true&width=50%25&height=95%25'
text = read_pdf_from_url(url)
print(text)
I started working on a small image scraping terminal program that is supposed to save images to a specified file within the program hierarchy. This comes from a basic tutorial I found online. However, whenever I enter in a search term into the terminal to start scraping bing.com (yeah, i know), the program crashes. The errors i get seem to focus on either the image file type not being recognized or the file path where the images will be saved is not being recognized:
from bs4 import BeautifulSoup
import requests
from PIL import Image
from io import BytesIO
search = input("Search for:")
params = {"q": search}
r = requests.get("http://www.bing.com/images/search", params=params)
soup = BeautifulSoup(r.text, "html.parser")
links = soup.findAll("a", {"class": "thumb"})
for item in links:
img_obj = requests.get(item.attrs["href"])
print("Getting", item.attrs["href"])
title = item.attrs["href"].split("/")[-1]
img = Image.open(BytesIO(img_obj.content))
img.save("./scraped_images/" + title, img.format)
Error thrown: Exception has occurred: FileNotFoundError
[Errno 2] No such file or directory: './scraped_images/3849747391_4a7dc3f19e_b.jpg'
I've tried adding a file path variable (using pathlib) and concatenating that with with the other necessary variables:
from bs4 import BeautifulSoup
import requests
from PIL import Image
from io import BytesIO
from pathlib import Path
image_folder = Path("./scraped_images/")
search = input("Search for:")
params = {"q": search}
r = requests.get("http://www.bing.com/images/search", params=params)
soup = BeautifulSoup(r.text, "html.parser")
links = soup.findAll("a", {"class": "thumb"})
for item in links:
img_obj = requests.get(item.attrs["href"])
print("Getting", item.attrs["href"])
title = item.attrs["href"].split("/")[-1]
img = Image.open(BytesIO(img_obj.content))
img.save(image_folder + title, img.format)
Error thrown: Exception has occurred: TypeError
unsupported operand type(s) for +: 'WindowsPath' and 'str'
I've checked the documentation for PIL, BeautifulSoup, etc. to see if any updates may have been screwing me up, i've checked the elements on bing to see if the classes are correct, and even tried searching by different class and nothing worked. I'm at a loss. Any thoughts or guidance is appreciated. Thanks!
I have changed your code a bit:
from bs4 import BeautifulSoup
import requests
from pathlib import Path
import os
image_folder = Path("./scraped_images/")
if not os.path.isdir(image_folder):
print('Making %s'%(image_folder))
os.mkdir(image_folder)
search = input("Search for:")
params = {"q": search}
r = requests.get("http://www.bing.com/images/search", params=params)
soup = BeautifulSoup(r.text, "html.parser")
links = soup.findAll("a", {"class": "thumb"})
for item in links:
img_obj = requests.get(item.attrs["href"])
print("Getting", item.attrs["href"])
title = item.attrs["href"].split("/")[-1]
if img_obj.ok:
with open('%s/%s'%(image_folder, title), 'wb') as file:
file.write(img_obj.content)
You can use PIL but in this case you do not need it.
I also improved the code with PIL to work better:
from bs4 import BeautifulSoup
import requests
from PIL import Image
from io import BytesIO
from pathlib import Path
s = requests.Session()
image_folder = Path("./scraped_images/")
search = input("Search for:")
params = {"q": search}
r = s.get("http://www.bing.com/images/search", params=params)
soup = BeautifulSoup(r.text, "html.parser")
links = soup.findAll("a", {"class": "thumb"})
for item in links:
try:
img_obj = s.get(item.attrs["href"], headers={'User-Agent': 'User-Agent: Mozilla/5.0'})
if img_obj.ok:
print("Getting", item.attrs["href"])
title = item.attrs["href"].split("/")[-1]
if '?' in title:
title = title.split('?')[0]
img = Image.open(BytesIO(img_obj.content))
img.save(str(image_folder) + '/' + title, img.format)
else:
continue
except OSError:
print('\nError downloading %s try to visit'
'\n%s\n'
'manually and try to get the image manually.\n' %(title, item.attrs["href"]))
I use a requests session and added try and except if PIL can't make the image. I also only make try to make a image if the request get a 200 response from the site.
I have the following code :
from tkinter import *
class GUI:
def __init__(self,master):
self.ip_word = Label(master,text="Input Path")
self.ip_word.grid(row=0,sticky=E)
self.ip_path_field = Entry(master)
self.ip_path_field.grid(row=0,column=1,sticky=W)
self.op_word = Label(master,text="Output Path")
self.op_word.grid(row=2,sticky=E)
self.op_path_field = Entry(master)
self.op_path_field.grid(row=2,column=1,sticky=W)
self.filename_word=Label(master,text="Output Filename ")
self.filename_word.grid(row=4,sticky=E)
self.filename =Entry(master)
self.filename.grid(row=4,column=1,sticky=W)
self.Submit = Button(master,text="Submit",fg="black",bg="white",command=self.Scraper(ip_path_field,op_path_field,filename) )
self.Submit.grid(row=5,columnspan=2)
"""
def printMessage(self):
str1=ip_path_field
str2=op_path_field
str3=filename
Scraper(str1,str2,str3)"""
def Scraper(self,ip_path_field,op_path_field,filename):
import pandas as pd
import os
# "C:/Users/chowdhuryr/Desktop/first automation/MAIN RESEARCH DATA.xlsx"
user_input =ip_path_field#input("Enter the input file path of your file: ")
if os.path.exists(user_input):
df = pd.read_excel(user_input, sheetname='Sheet1')
print("File Found and We are Processing !")
else:
print ("Input Directory does not exists.")
#"C:/Users/chowdhuryr/Desktop/first automation/OUTPUT DATA.xlsx"
user_output =op_path_field#input("Enter the output file path of your file: ")
#if os.path.exists(user_input):
#df = pd.read_excel(user_input, sheetname='Sheet1')
#--------------------------------------------------------------------------------------------------------------------------------------
#setting up the path
import os
os.chdir('C:/Users/chowdhuryr/Desktop/first automation')
df=df[0:5]
#--------------------------------------------------------------------------------------------------------------------------------------
#importing necessary packages
from selenium import webdriver
#from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
#---------------------------------------------------------------------------------------------------------------------------------------
#Setting up Chrome webdriver
#chrome_options = webdriver.ChromeOptions()
options = webdriver.ChromeOptions()
options.add_argument("--headless") #making the window work in the background
options.add_argument('window-size=1200x850')
#declaring a list to store the messages
Message=list()
Tier=list()
Wirecentre=list()
#---------------------------------------------------------------------------------------------------------------------------------------
#iteration to access the url and and retriving the Tier Locations
for i in range(0,df.shape[0]): #(0,df.shape[0]):
driver = webdriver.Chrome(executable_path='C:/Users/chowdhuryr/Desktop/first automation/chromedriver', chrome_options=options) #openning chrome
#driver.maximize_window() #maximizing the window
driver.get('https://clec.att.com/facilitiescheck/facilities_chk.cfm') #openning the url
street_address=driver.find_element_by_xpath('/html/body/form/table/tbody/tr[2]/td[2]/table/tbody/tr[1]/td[2]/input') #accessing the street address field
street_address.send_keys(df['CIRCUIT_LOC_ADDR'][i].split(',')[0]) #passing the values to street_address location
city=driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[2]/td[2]/input') #accessing the city to street address field
city.send_keys(df['CIRCUIT_LOC_ADDR'][i].split(',')[1]) #passing the values to the city location
state=driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[4]/td[2]/select') #accessing the state field
state.send_keys(df['CIRCUIT_LOC_ADDR'][i].split(',')[2]) #passing the values to the state field
checkbox=driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[8]/td[1]/input') #accessing the checkbox
checkbox.click() #clicking on the check box
search_button=driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[8]/td[1]/input') #accessing the submit button
search_button.submit() #clicking the submit button
#try-except block in case if radio button appears
try:
Address=driver.find_element_by_xpath('/html/body/form/table[2]/tbody/tr[1]/td[2]/b') #taking the xpath of the address block
if (Address): #checking if it contains any radio button or not
Radio_button=driver.find_element_by_xpath('/html/body/form/table[2]/tbody/tr[2]/td[1]/input') #getting the xpath of radio button
Radio_button.click() #clicking the radio button
submit_button=driver.find_element_by_xpath('/html/body/form/table[3]/tbody/tr[2]/td/input') #getting the submit button
submit_button.submit()
except NoSuchElementException:
print('no such element found')
message_body= driver.find_element_by_xpath('//*[#id="msg"]/table/tbody/tr/td').text #Extracting the Message from the message box
Message.append(message_body[14:]) #putting the message into a text
str = message_body.split() #splitting the message
if any ("Tier"in s for s in str):
j=str.index('Tier')
Tier.append(str[j+1])
else:
Tier.append("NULL")
if any ("AT&T"in s for s in str):
j=str.index('AT&T')
Wirecentre.append(str[j+1])
else:
Wirecentre.append("NULL")
#saving the screenshot
str=df['STRIP_EC_CIRCUIT_ID'][i]
filename="C:\\Users\\chowdhuryr\\Desktop\\first automation\\"+str+".png" #Taking the circuit id name
driver.get_screenshot_as_file(filename)
driver.close() #closiing the driver
#------------------------------------------------------------------------------------------------------------------------------------------
#putting the back thenew columns into the dataframe and storing it into an excel sheet
df['Tier']=Tier #putting the Tier column back into the dataset
df['Wirecentre']=Wirecentre #putting the Wirecentre column back into the dataset
df['Message']=Message #putting the Message column back into the dataset
if os.path.exists(user_output):
user_output="user_output"+filename+".xlsx"
writer = pd.ExcelWriter(user_output) #writing the dataframe down into a new excel file
df.to_excel(writer,'sheet1',index=False) #to_excel(writer,'Sheet1')
writer.save()
else:
print ("Output Directory does not exists.")
#-------------------------------------------------------------------------------------------------------------------------------------------
#Generating pop up window at the end of the process
popup_driver=webdriver.Chrome()
popup_driver.maximize_window()
popup_driver.execute_script(" window.alert('Process is Completed');") #generating the pop up """
root =Tk()
b=GUI(root)
#b.Scraper(ip_path_field,op_path_field,filename)
root.mainloop()
Now what I want to do is this :
I want to pass the variables ip_path_field,op_path_field,filename as arguments to the function named scraper . Now ip_path_field,op_path_field,filename are all user inputs and not hard coded strings. Now whenever I run the following code, I get the GUI opened and whenever I provide my inputs in the required edit boxes and press the submit button I get the following error name 'ip_path_field' is not defined. My purpose of this code is to pass the user defined file paths to the function called scraper() as defined in the code above.
You need to create a callback that gets the values, and then passes them to the function that does the work.
Example:
class GUI:
def __init__(self,master):
...
self.Submit = Button(..., command=self.handle_submit)
...
def handle_submit(self):
ip_path = self.ip_path_field.get()
op_path = self.op_path_field.get()
filename = self.filename.get()
self.Scrapper(ip_path_field,op_path_field,filename)
The main problem you are having is related to how you built your method. Instead you should have put the get() calls inside that method. This way you can simple run the command to call scraper without needing to pass arguments. I have taken your code and re-written it to more closely follow the PEP8 standard with the method correction included.
You only need to apply the self. prefix to class attributes/methods. Things like labels/buttons that are not going to be modified later in the code should be left as normal widgets that are not assigned as attributes.
Next I moved all your imports to the top of the code. You only need to import a library one time and it should all be listed at the top of your code. On imports keep in mind it is better to import tkinter as tk instead of using * to prevent any accidental overrides occurring.
I have changed your string concatenation to use the format() method as + is deprecated.
Here is your code with some basic clean up as well.
import tkinter as tk
import pandas as pd
import os
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
class GUI:
def __init__(self,master):
tk.Label(master, text="Input Path").grid(row=0, sticky="e")
tk.Label(master, text="Output Path").grid(row=2, sticky="e")
tk.Label(master, text="Output Filename").grid(row=4, sticky="e")
self.ip_path_field = tk.Entry(master)
self.op_path_field = tk.Entry(master)
self.filename = tk.Entry(master)
self.ip_path_field.grid(row=0, column=1, sticky="w")
self.op_path_field.grid(row=2, column=1, sticky="w")
self.filename.grid(row=4, column=1, sticky="w")
tk.Button(master, text="Submit", fg="black", bg="white",
command=self.scrapper).grid(row=5,columnspan=2)
def scrapper(self): # changed function to a method.
user_input = self.ip_path_field.get(0, "end")
user_output = self.op_path_field.get(0, "end")
filename = self.filename.get(0, "end")
if os.path.exists(user_input):
df = pd.read_excel(user_input, sheetname='Sheet1')
print("File Found and We are Processing !")
else:
print ("Input Directory does not exists.")
os.chdir('C:/Users/chowdhuryr/Desktop/first automation')
df = df[0:5]
options = webdriver.ChromeOptions()
options.add_argument("--headless")
options.add_argument('window-size=1200x850')
message = list()
tier = list()
wirecentre = list()
for i in range(0,df.shape[0]):
driver = webdriver.Chrome(executable_path='C:/Users/chowdhuryr/Desktop/first automation/chromedriver', chrome_options=options)
driver.get('https://clec.att.com/facilitiescheck/facilities_chk.cfm')
street_address = driver.find_element_by_xpath('/html/body/form/table/tbody/tr[2]/td[2]/table/tbody/tr[1]/td[2]/input')
street_address.send_keys(df['CIRCUIT_LOC_ADDR'][i].split(',')[0])
city=driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[2]/td[2]/input')
city.send_keys(df['CIRCUIT_LOC_ADDR'][i].split(',')[1])
state = driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[4]/td[2]/select')
state.send_keys(df['CIRCUIT_LOC_ADDR'][i].split(',')[2])
checkbox = driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[8]/td[1]/input')
checkbox.click()
search_button = driver.find_element_by_xpath('/html/body/form/table[1]/tbody/tr[2]/td[2]/table/tbody/tr[8]/td[1]/input')
search_button.submit()
try:
address = driver.find_element_by_xpath('/html/body/form/table[2]/tbody/tr[1]/td[2]/b')
if (address):
radio_button = driver.find_element_by_xpath('/html/body/form/table[2]/tbody/tr[2]/td[1]/input')
radio_button.click()
submit_button = driver.find_element_by_xpath('/html/body/form/table[3]/tbody/tr[2]/td/input')
submit_button.submit()
except NoSuchElementException:
print('no such element found')
message_body = driver.find_element_by_xpath('//*[#id="msg"]/table/tbody/tr/td').text
message.append(message_body[14:])
strx = message_body.split()
if any ("Tier"in s for s in strx):
j = strx.index('Tier')
tier.append(strx[j+1])
else:
tier.append("NULL")
if any ("AT&T"in s for s in strx):
j = strx.index('AT&T')
wirecentre.append(strx[j+1])
else:
wirecentre.append("NULL")
strx = df['STRIP_EC_CIRCUIT_ID'][i]
filename = "C:\\Users\\chowdhuryr\\Desktop\\first automation\\{}.png".format(strx)
driver.get_screenshot_as_file(filename)
driver.close()
df['Tier'] = tier
df['Wirecentre'] = wirecentre
df['Message'] = message
if os.path.exists(user_output):
user_output="user_output{}.xlsx".format(filename)
writer = pd.ExcelWriter(user_output)
df.to_excel(writer, 'sheet1', index=False)
writer.save()
else:
print ("Output Directory does not exists.")
popup_driver = webdriver.Chrome()
popup_driver.maximize_window()
popup_driver.execute_script(" window.alert('Process is Completed');")
root = tk.Tk()
b = GUI(root)
root.mainloop()
All that said you should try to use a minimal code next time. It is easier to fix issues when you narrow it down to exactly the problem. For example based on your question a Minimal, Complete, and Verifiable example would look something like this.
from tkinter import *
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
class GUI:
def __init__(self,master):
self.ip_path_field = Entry(master)
self.ip_path_field.grid(row=0,column=1,sticky=W)
self.op_path_field = Entry(master)
self.op_path_field.grid(row=2,column=1,sticky=W)
self.filename =Entry(master)
self.filename.grid(row=4,column=1,sticky=W)
self.Submit = Button(master, text="Submit",
command=self.Scrapper(ip_path_field,op_path_field,filename) )
self.Submit.grid(row=5,columnspan=2)
def Scrapper(self,ip_path_field,op_path_field,filename):
user_input = ip_path_field
user_output = op_path_field
filename = filename
root = Tk()
b = GUI(root)
root.mainloop()
I'm trying to use tkinter on python 3.2 to display an image from the Internet.
I'm getting TypeError: 'HTTPResponse' object is not callable.I know that this means I'm referencing a variable or function incorrectly.
I have read urllib "module object is not callable" but I'm still not sure how to fix this problem. Your help is appreciated.
import tkinter as tk
from urllib.request import urlopen
from PIL import ImageTk, Image
#http://docs.activestate.com/activepython/3.1/diveintopython3/html/porting-code- to-python-3-with-2to3.html
import io
img_file = urlopen('https://www.google.com/images/srpr/logo4w.png')
im = io.FileIO(img_file())<<<<<<<<<<<<<this line is throwing the error
resized_image = Image.open(im)
im.show()
root = tk.Tk()
img = ImageTk.PhotoImage(resized_image)
panel = tk.Label(root, image = img)
panel.pack(side = "bottom", fill = "both", expand = "yes")
root.mainloop()
You can create a StringIO object instead:
from StringIO import StringIO
...
im = StringIO(img_file.read())
It behaves like a file, but it's not a file.