Login into a Website with data from csv file with python selenium - python-3.x

I am trying to create a script that automatically logs in different accounts to a website, the login data should be taken from a CSV file. Unfortunately I get no result, maybe someone has a solution for me?
import pandas as pd
import os
from selenium import webdriver
from twocaptcha import TwoCaptcha
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
loginlist = pd.read_csv('/Volumes/GoogleDrive/Meine Ablage/Privat/py_scripts/login.csv', header=None, skiprows=\[0\], sep =',')
print(loginlist)
driver = webdriver.Chrome()
driver.get("https://freebitco.in/signup/?op=s")
time.sleep(6)
webdriver.ActionChains(driver).send_keys(Keys.ESCAPE).perform()
time.sleep(1)
login = driver.find_element(By.CLASS_NAME, "login_menu_button")
login.click()
time.sleep(3)
def fulfill_form(email, password):
input_email = driver.find_element(By.ID, 'login_form_btc_address')
input_password = driver.find_element(By.ID, 'login_form_password')
input_email.send_keys(email)
time.sleep(1)
input_password.send_keys(password)
time.sleep(5)
failed_attempts = \[\]
for customer in loginlist:
try:
fulfill_form(str(loginlist\[0\]), str(loginlist\[1\]))
except:
failed_attempts.append(loginlist\[0\])
pass
if len(failed_attempts) \> 0:
print("{} cases have failed".format(len(failed_attempts)))
print("Procedure concluded")
I tried several older solutions from other posts, unfortunately they led nowhere.

Related

Unable to scrape texts from URLs

I have been strugling to scrape the contents/text of news articles from each URLs. The extraction of URLs works fine, but scraping the texts from each URLs has been challenging. Below is my code:
from selenium.webdriver import ActionChains, Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
import sys, time
from bs4 import BeautifulSoup
import requests
import pandas as pd
# Initialize drivver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
url = 'https://www.iol.co.za/news/south-africa/eastern-cape'
wait = WebDriverWait(driver, 5)
driver.get(url)
time.sleep(3)
# take the articles
articles = wait.until(EC.presence_of_all_elements_located((By.XPATH,
f"//article//*[(name() = 'h1' or name()='h2' or name()='h3' or name()='h4' or name()='h5' or name()='h6' or name()='h7') and string-length(text()) > 0]/ancestor::article")))
article_link = []
full_text = []
# For every article we take what we want
for article in articles:
link = article.find_element(By.XPATH, f".//a")
news_link = link.get_attribute('href')
article_link.append(news_link)
for j in article_link:
news_response = requests.get(j)
news_data = news_response.content
news_soup = BeautifulSoup(news_data, 'html.parser')
art_cont = news_soup.find('div', 'Article__StyledArticleContent-sc-uw4nkg-0')
full_text.append(art_cont.text)
print(article_link)
print(full_text)
I tried to use beautifulsoup, but it doesn't seem to work. I will be grateful for any help.
First off you should probably unindent the second for loop, it shouldn't be running inside of the first loop (you will be doubling and getting all of the information countless extra times).
Second. The requests that you send are returning a webpage that has content blocked (I could not figure out a way around this with inserting headers into the request). What you could do is use the driver to load each of the links and grab the text from there, here is how you could do that.
for link in article_link:
driver.get(link)
news_data = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'Article__StyledArticleContent-sc-uw4nkg-0')))
full_text.append(news_data[0].get_attribute('textContent'))
The full script would look like this:
from selenium.webdriver import ActionChains, Keys
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
import sys, time
from bs4 import BeautifulSoup
import requests
import pandas as pd
# Initialize drivver and navigate
driver = webdriver.Chrome()
driver.maximize_window()
url = 'https://www.iol.co.za/news/south-africa/eastern-cape'
wait = WebDriverWait(driver, 5)
driver.get(url)
time.sleep(3)
# take the articles
articles = wait.until(EC.presence_of_all_elements_located((By.XPATH,
f"//article//*[(name() = 'h1' or name()='h2' or name()='h3' or name()='h4' or name()='h5' or name()='h6' or name()='h7') and string-length(text()) > 0]/ancestor::article")))
article_link = []
full_text = []
# For every article we take what we want
for article in articles:
link = article.find_element(By.XPATH, f".//a")
news_link = link.get_attribute('href')
article_link.append(news_link)
for link in article_link[:5]:
driver.get(link)
news_data = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'Article__StyledArticleContent-sc-uw4nkg-0')))
full_text.append(news_data[0].get_attribute('textContent'))
print(article_link)
print(full_text)
The best course of action is to utilize selenium throughout as the site's content is cloudflare secured. Although #Andrew Ryan has already addressed the issue, I thought I'd come up with a shorter version of it since this answer was already halfway through at the time of his posting.
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
link = 'https://www.iol.co.za/news/south-africa/eastern-cape'
def get_links_and_texts(driver,url):
driver.get(url)
for article_link in [i.get_attribute('href') for i in WebDriverWait(driver,10).until(EC.presence_of_all_elements_located((By.XPATH,"//article/a[starts-with(#class,'Link__StyledLink')]")))]:
driver.get(article_link)
art_content = WebDriverWait(driver,10).until(EC.presence_of_element_located((By.CSS_SELECTOR,".article-content"))).text
yield {"Link":article_link,"article_content":art_content}
if __name__ == '__main__':
with webdriver.Chrome() as driver:
for item in get_links_and_texts(driver,link):
print(item)

How to click the next span value which has same class name

import urllib3
import certifi
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import requests
from bs4 import BeautifulSoup
import time
import ssl
http = urllib3.PoolManager(ca_certs=certifi.where())
chrome_options = Options()
chrome_options.add_argument("--incognito")
driver = webdriver.Chrome(options=chrome_options, executable_path="D:\\python works\\driver\\chromedriver.exe")
URL= "https://physicians.wustl.edu/"
driver.get(URL)
time.sleep(5)
driver.find_element_by_link_text("Find a Doctor").click()
find_doc = driver.current_url
print(find_doc)
driver.get(find_doc)
# content = driver.page_source
# print(content)
response = http.request('GET', find_doc)
url_text = response.data #text
time.sleep(10)
count = len(driver.find_elements_by_xpath("//span[#class='entry-title-link']"))
print(count)
s = driver.find_element_by_css_selector("span[class='entry-title-link']") #firstpage click
s.click()
urls = []
provider = []
print(driver.current_url)
urls.append(driver.current_url)
name = driver.find_element_by_css_selector("h1[class='washu-ppi-name entry-title']").text
print(name)
provider.append(name)
specialization = driver.find_element_by_css_selector("ul[class='wuphys-specialties']").text
print(specialization)
location= driver.find_element_by_css_selector("a[class='wuphys-addr name']").text
print(location)
time.sleep(5)
driver.find_element_by_css_selector("a[href='https://physicians.wustl.edu/find-a-doctor/']").click()
time.sleep(10)
I have same classname of span but I need to loop the same class name but the div is different. In the url there is doctors name with details after click I get details and I need to move to next doctor which has same class name
I think you are looking for something of this kind (to loop through all the doctor links and get info from there). Here I have written a basic action which you can scale to add more data related to each doctor.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver import ActionChains
from selenium.webdriver.common.keys import Keys
import time
driver = webdriver.Chrome(options=chrome_options, executable_path="D:\\python works\\driver\\chromedriver.exe")
driver.maximize_window()
driver.get("https://physicians.wustl.edu/")
WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.LINK_TEXT, "Find a Doctor"))).click()
print(driver.current_url)
doc_cnt = WebDriverWait(driver, 10).until(EC.visibility_of_all_elements_located((By.XPATH, "//span[#class='entry-title-link']")))
print(len(doc_cnt))
doc_list=[] # to append all the doctor urls into the list for further processing, if required.
for doc in doc_cnt:
ActionChains(driver).key_down(Keys.CONTROL).click(doc).key_up(Keys.CONTROL).perform()
driver.switch_to.window(driver.window_handles[1])
doc_list.append(driver.current_url)
# ... you could include any code of yours related to each doctor here...
# After this one the tab terminates and a new doctor link would open
driver.close()
driver.switch_to.window(driver.window_handles[0])
time.sleep(1)
print(doc_list)

Python Selenium Element not found and Not Intractable

I'm trying to scrape from the moneycontrol.com. When I tried to send value in the search box I keep getting the same error in except block as "Element not Found".
I tried using XPath id as well as using the full XPath but in both cases, it doesn't work.
WITHOUT MAXIMIZING THE WINDOW
XPath id - //*[#id="search_str"]
Full XPath - /html/body/div[1]/header/div[1]/div[1]/div/div/div[2]/div/div/form/input[5]
Attaching the full code below:
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
def search_stock():
driver = webdriver.Chrome(
r'./chromedriver')
driver.get('https://www.moneycontrol.com/')
time.sleep(5)
search_icon = driver.find_element_by_xpath(
'//*[#id="fixedheader"]/div[4]/span')
search_icon.click()
time.sleep(2)
try:
search_box = driver.find_element_by_xpath('//*[#id="search_str"]')
print("Element is visible? " + str(search_box.is_displayed()))
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
driver.close()
search_stock()
Sometimes, it started working but most of the time it throwing exceptions and errors. Struggling since 3 days but none of the solutions working.
web scraping like that seems quite inefficient it is prob better to use requests and bs4. However if you want to do it like this you could try using action chains. found here Or you can do driver.get('https://www.moneycontrol.com/india/stockpricequote/consumer-food/zomato/Z') from the start instead of typing it in.
You may wanna try the below code :
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
driver.maximize_window()
driver.implicitly_wait(30)
driver.get('https://www.moneycontrol.com/')
wait = WebDriverWait(driver, 10)
time.sleep(5)
try:
ActionChains(driver).move_to_element(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))).perform()
search_box = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[id='search_str']")))
print("Element is visible? ", search_box.is_displayed())
time.sleep(10)
if search_box.is_displayed():
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
except NoSuchElementException:
print("Element not found")
Imports :
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
Try clicking on search_box and only after that sending text there.
search_box = driver.find_element_by_xpath('//form[#id="form_topsearch"]//input[#id="search_str"]')
search_box.click()
time.sleep(0.1)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
Additionally I would advise you using explicit waits of expected conditions instead of hardcoded sleeps.
With it your code will be faster and more reliable.
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
driver.get('https://www.moneycontrol.com/')
wait.until(EC.element_to_be_clickable((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
search_box = wait.until(EC.element_to_be_clickable((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
UPD
Let's try this
import time
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
def search_stock():
driver = webdriver.Chrome(r'./chromedriver')
wait = WebDriverWait(driver, 20)
actions = ActionChains(driver)
driver.get('https://www.moneycontrol.com/')
search_icon = wait.until(EC.presence_of_element_located((By.XPATH, '//*[#id="fixedheader"]/div[4]/span')).click()
time.sleep(0.5)
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
search_box = wait.until(EC.presence_of_element_located((By.XPATH, '//form[#id="form_topsearch"]//input[#id="search_str"]')))
driver.execute_script("arguments[0].scrollIntoView();", search_icon)
driver.execute_script("arguments[0].click();", search_icon)
time.sleep(0.5)
search_box.send_keys('Zomato')
search_box.send_keys(Keys.RETURN)
#I'm not sure you should close the driver immediately after involving searching....
#driver.close()
search_stock()
If the above solution is still not working instead of
actions.move_to_element(search_box).click().perform()
try
driver.execute_script("arguments[0].click();", search_box)

selenium in python is skipping articles while trying to scrape the data

Im trying to extract data from articles using selenium in python, the code is identifying the articles but while running the loop a few articles are skipped randomly. Any help resolving this issue will be appreciated.
#Importing libraries
import requests
import os
import json
from selenium import webdriver
import pandas as pd
from bs4 import BeautifulSoup
import time
import requests
from time import sleep
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import traceback
from webdriver_manager.chrome import ChromeDriverManager
#opening a chrome instance
options = webdriver.ChromeOptions()
options.add_argument("start-maximized")
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
driver = webdriver.Chrome(options=options, executable_path=r"C:/selenium/chromedriver.exe")
#getting into the website
driver.get('https://academic.oup.com/rof/issue/2/2')
#getting the articles
articles = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.XPATH, '/html/body/div[3]/main/section/div/div/div[1]/div/div[3]/div[2]/div[3]/div/div/div/div/h5')))
#loop to get in and out of articles
for article in articles:
try:
ActionChains(driver).key_down(Keys.CONTROL).click(article).key_up(Keys.CONTROL).perform()
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
window1 = driver.window_handles[1]
driver.switch_to_window(window1)
driver.close()
driver.switch_to_window(window0)
except:
print("couldnt get the article")
First, for collect all article element, you can use this css selector:
articles = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, '.customLink.item-title a')))
Second, This is wrong method:
driver.switch_to_window(window1)
It's should:
driver.switch_to.window(window1)
See the difference between _ and . above.
Third, you forgot to initialize the window0 variable:
window0 = driver.window_handles[0]
And finally, try the following code:
#getting into the website
driver.get('https://academic.oup.com/rof/issue/2/2')
#getting the articles
articles = WebDriverWait(driver, 30).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, '.customLink.item-title a')))
#loop to get in and out of articles
for article in articles:
try:
ActionChains(driver).key_down(Keys.CONTROL).click(article).key_up(Keys.CONTROL).perform()
WebDriverWait(driver, 10).until(EC.number_of_windows_to_be(2))
window1 = driver.window_handles[1]
driver.switch_to.window(window1)
driver.close()
window0 = driver.window_handles[0]
driver.switch_to.window(window0)
except:
print("couldnt get the article")
driver.quit()

PhantomJS python issue

My python selenium tests are working on firefox dirver (GUI) without any issue. But i wanted to run my tests in headless mode. When i try to run the same script with headless mode (with few modifications). it gives wierd errors.
Ex:
selenium.common.exceptions.NoSuchElementException: Message{"errorMessage":"Unable to find element with id 'ext-gen1499
python script :
import os
import time
from selenium.webdriver.common.proxy import *
from selenium.webdriver.common.by import By
phantomjs_path=r"/home/xxxx/nodejs-installs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs"
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
service_args = ['--proxy=x.x.x.x:80','--proxy-type=https']
driver = webdriver.PhantomJS(executable_path=r'/home/xxxx/nodejs-installs/phantomjs-2.1.1-linux-x86_64/bin/phantomjs',service_args=service_args)
os.environ['MOZ_HEADLESS'] = '1'
driver.get("https://aaaaa.com")
def Login():
try:
driver.find_element_by_id("username").send_keys("test#aaaa.com")
driver.find_element_by_id("password").send_keys("xxxxxxx")
driver.find_element_by_id("Submit").click()
login_flag=1
except:
print("Error Loading Login page")
login_flag=0
finally:
return login_flag
def CreateMail():
try:
element = WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.ID, "button-1143-btnInnerEl")))
driver.find_element_by_id("button-1143-btnInnerEl").click()
except TimeoutException:
print ("Loading took too much time - Create New Mail")
driver.find_element_by_id("ext-gen1499").send_keys("test#test.com")
driver.find_element_by_id("textfield-1265-inputEl").send_keys("Automated Test Mail from Selenium")
driver.find_element_by_id("button-1252-btnIconEl").click()
Am i missing anything ?
It is a good practice to add an implicit wait of at-least 10 seconds , for allowing the target page element/s to load completely.
driver.implicitly_wait(10)

Resources