How to iterate through same x paths for different hyperlinks - python-3.x

I am having few hyperlinks(6 hyperlinks) in the ls List. I want to iterate over all the hyperlinks and for all the hyperlinks I want to iterate over whatever Xpaths(c,a,b,d mentioned below) are mentioned and the loop
should go on infinite. Below is my code.
import pandas as pd
from selenium import ebdriver
import time
driver=webdriver.Chrome(executable_path=".... ")
driver.get("hyerlink 1")
driver.maximize_window()
time.sleep(30)
df=pd.read_csv('..')
df.head
ls=df['column_name'].to_list()
for i in ls:
print(i)
driver.get(i)
i=0
while i<len(ls):
i+=1
c = driver.findElement(By.X_path, '/html/body/div[7]/div/div[11]/div[1]/div[2]/div[2]/div/div/div[1]/div/div/div/table/tbody/tr[1]/td/div/h3')
c.click()
time.sleep(30)
a = driver.findElement(By.X_path, '/html/body/div[7]/div/div[11]/div[1]/div[2]/div[2]/div/div/div[1]/div/div/div/table/tbody/tr[2]/td/div/div')
a.click()
time.sleep(30)
b=driver.findElement (By.X_path, '/html/body/div[7]/div/div[7]/div[1]/div/div/div')
b.click()
time.sleep(30) d=driver.findElement(By.X_path,'/html/body/div[7]/div/div[11]/div[1]/div[2]/div[2]/div/div/div[1]/div/div/div/div/div/div/div/div[2]/div ')
d.click()
time.sleep(30)

Related

Python Loop read the same CSV Data

I have to take the first line of the csv file and after having processed it, delete it, and then resume the line after.
I'm trying to build a login system that takes the accounts from the csv file, and logs in one by one.
the problem is that every time you start the loop it always takes the same account, how can I fix it?
import pandas as pd
import pyperclip
import selenium
import random
from selenium import webdriver
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
import time
from selenium.webdriver.common.keys import Keys
import names
df = pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv')
def instagram_login():
df2=df.at[0,'ID'] #Find the first row id
pyperclip.copy(df2) #Copy the first row id to the clipboard
print(pyperclip.paste()) #Print the first row id
#apro il sito
driver.get('https://www.instagram.com/')
driver.maximize_window() #schermo intero
time.sleep(2)
try:
consent= driver.find_element(By.XPATH,"/html/body/div[2]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/button[2]").click() #clicco il consenso
except:
pass
time.sleep(5)
put_username = driver.find_element(By.NAME,("username")).send_keys(pyperclip.paste()) #inserisco username
df2=df.at[0,'PASSWORD'] #find the password
pyperclip.copy(df2) #copy the password
put_password = driver.find_element(By.NAME,("password")).send_keys(pyperclip.paste()) #inserisco password
time.sleep(2)
login = driver.find_element(By.XPATH,"//div[contains(text(),'Accedi')]").click() #Click login
time.sleep(6)
#here is where the first row got deleted and saved on csv
df= pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv').drop(0, axis=0)
df.to_csv(r'/Users/giuseppeleonardi/Downloads/scraping2.csv', index=False)
#this is the loop that always takes the same line of the file every time even though this is canceled at the end of the operation:
for line in len(df):
instagram_login()
time.sleep(5)
driver.delete_all_cookies()
i've googled a lot but cannot figure it out, i've read that file handle will read the file once, i need the loop for reset the list everytime and take the first value, how can i do it?
sorry but i'm still learning
Google for local and global variables. You are changing df inside a function. This does not change the 'global' df. You either need to return your df from the function or declare it first as a global variable.
First option:
df = pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv')
def instagram_login():
df2=df.at[0,'ID'] #Find the first row id
.....
#here is where the first row got deleted and saved on csv
df= pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv').drop(0, axis=0)
df.to_csv(r'/Users/giuseppeleonardi/Downloads/scraping2.csv', index=False)
return df
for line in len(df):
df = instagram_login()
time.sleep(5)
driver.delete_all_cookies()
Second option:
df = pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv')
def instagram_login():
df2=df.at[0,'ID'] #Find the first row id
.....
#here is where the first row got deleted and saved on csv
global df
df = pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv').drop(0, axis=0)
df.to_csv(r'/Users/giuseppeleonardi/Downloads/scraping2.csv', index=False)
for line in len(df):
instagram_login()
time.sleep(5)
driver.delete_all_cookies()
Your definition of df inside the function doesn't change the outside df.
So you can return the df and save it to outside df.
data_frame= pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv')
def instagram_login(df):
......
#here is where the first row got deleted and saved on csv
df= pd.read_csv('/Users/giuseppeleonardi/Downloads/scraping2.csv').drop(0, axis=0)
df.to_csv(r'/Users/giuseppeleonardi/Downloads/scraping2.csv', index=False)
return df
#this is the loop that always takes the same line of the file every time even though this is canceled at the end of the operation:
for line in len(df):
data_frame = instagram_login(data_frame)
time.sleep(5)
driver.delete_all_cookies()

os.listdir() won't go after nine files

Hello i am trying to make a program that would automaticly go to imgur, enter the name that you typed and download top 10 images.Everything is working except the os library.When i try to do os.listdir() after nine files it wont show anymore files.I tried googling and found nothing if you see something that i messed up please tell me.Thanks in advance.Sorry for bad grammar.
Here is the code sample:
#! python3
import requests, os, sys
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
os.chdir('imgur/')
broswer = webdriver.Chrome(executable_path=r'C:\Users\{YOUR USERNAME}\Downloads\chromedriver.exe')
broswer.get('https://imgur.com/')
broswer.maximize_window()
search_bar = broswer.find_element_by_tag_name('input')
search_bar.send_keys('happy')
search_bar.send_keys(Keys.ENTER)
pictures = broswer.find_elements_by_tag_name('img')
for i in range(1, 11):
res = requests.get(pictures[i].get_attribute('src'))
try:
res.raise_for_status()
except:
print('Link doesnt exist')
if os.listdir() == []:
picture = open('picture1.png', 'wb')
else:
picture = open('picture' + str(int(os.listdir()[-1][7:-4]) + 1) + '.png', 'wb')
print(os.listdir())
for chunk in res.iter_content(100000):
picture.write(chunk)
picture.close()
os.listdir(".") #u missed adding address

Stock price logging function

I have this function where the price of a stock gets logged in real time every 2 seconds and save it into a csv file however I cant see anything in the csv when I open it. What am I missing from the script?
import pandas as pd
import time
import urllib
import sys
import fix_yahoo_finance as yf
def stocks():
# Enter stock symbol
stock = input("Enter stock: ")
# Name CSV file
csvy= str(stock) + time.strftime('.%A.%d.%b.%Y').replace(' ', '') + ".csv"
csvy = csvy.replace(':' , '')
with open(csvy, 'w') as f:
sys.stdout = f
while 1 > 0:
print(yf.get_live_price(stock))
time.sleep(2)
stocks()
You wrote:
print(yf.get_live_price(stock))
You want to additionally flush the buffer so your new text is immediately visible:
print(yf.get_live_price(stock), flush=True)
Alternatively, consider assigning the live price to a temp variable,
and then outputting it twice, with print() and f.write(),
rather than assigning a new value to stdout.
Then you'd be able to flush them independently according to your need,
f.flush() or sys.stdout.flush().

How to find out how long a search for files will take on python?

So I have a little app that searches for all xml files on my pc, copying the files that have 44 digits as the filename to the "output" folder.
The problem is that the final user needs an indication of the progress and remaining time of the task.
This is the module to copy files:
xml_search.py
import os
import re
from threading import Thread
from datetime import datetime
import time
import shutil
import winsound
os.system('cls')
def get_drives():
response = os.popen("wmic logicaldisk get caption")
list1 = []
t1 = datetime.now()
for line in response.readlines():
line = line.strip("\n")
line = line.strip("\r")
line = line.strip(" ")
if (line == "Caption" or line == ""):
continue
list1.append(line + '\\')
return list1
def search1(drive):
for root, dir, files in os.walk(drive):
for file in files:
if re.match("\d{44}.xml", file):
filename = os.path.join(root, file)
try:
shutil.copy(filename, os.path.join('output', file))
except Exception as e:
pass
def exec_(callback):
t1 = datetime.now()
list2 = [] # empty list is created
list1 = get_drives()
for each in list1:
process1 = Thread(target=search1, args=(each,))
process1.start()
list2.append(process1)
for t in list2:
t.join() # Terminate the threads
t2 = datetime.now()
total = str(t2-t1)
print(total, file=open('times.txt', 'a'), end="\n")
for x in range(3):
winsound.Beep(2000,100)
time.sleep(.1)
callback()
if __name__ == "__main__":
exec_()
The below code uses progressbar library and it shows
indication of the progress and remaining time of the task
import progressbar
from time import sleep
bar = progressbar.ProgressBar(maxval=1120, \
widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.ETA()])
bar.start()
for i in range(1120):
bar.update(i+1)
sleep(0.1)
bar.finish()
You would need to add the above modified code to your code.
So in your case, you would need to count the number of files and provide it as input to ProgressBar constructor's maxval argument and remove sleep call.
The suggested solution with progress bar should work with one thread. You would need to figure out how to initiate the progress bar and where to put the updates if you insist to work with multiple threads.
Try to implement a timer decorator like the following:
import time
def mytimer(func):
def wrapper():
t1 = time.time()
result = func()
t2 = time.time()
print(f"The function {func.__name__} was run {t2 - t1} seconds")
return result
return wrapper
#mytimer
def TimeConsumingFunction():
time.sleep(3)
print("Hello timers")
TimeConsumingFunction()
Output:
/usr/bin/python3.7 /home/user/Documents/python-workspace/timers/example.py
Hello timers
The function TimeConsumingFunction was run 3.002610206604004 seconds
Process finished with exit code 0

Selenium Web Scraping Id missing

I am trying to gather the data from the http://maharain.gov.in/ site. I have written the below script.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
import unittest, time, re
import csv,sys,os
from bs4 import BeautifulSoup
import time
def check_exists_by_xpath(xpath,driver):
try:
driver.find_element_by_xpath(xpath)
except NoSuchElementException:
return False
return True
chromepath= '/home/swapnil/Downloads/chromedriver'
driver = webdriver.Chrome(chromepath)
start_time =time.time()
base_url = "http://maharain.gov.in/"
driver.get(base_url)
driver.switch_to.frame('MenuFrame')
driver.find_element_by_name("QueriesCirclewise3").click()
time.sleep(3)
print("Done")
driver.find_element(By.XPATH, '//*[#id="menu"]/input[10]').click()
time.sleep(3)
print("Done")
# driver.find_element_by_name("PastQueriesCirclewise6").click()
time.sleep(3)
print("Done")
# driver.implicitly_wait(3)
driver.switch_to.default_content()
time.sleep(3)
print("Done")
driver.switch_to.frame(driver.find_element_by_name("ContentFrame"))
# dropdown_menu_year = Select(driver.find_element_by_id("selyear"))
# select_year = [option.text for option in dropdown_menu_year.options]
# select_year = [t for t in select_year if t !='Select']
# select_year = [ '2015', '2016', '2017']
time.sleep(3)
print("Done All ")
dropdown_menu_state = Select(driver.find_element_by_id("selstate"))
select_state = [option.text for option in dropdown_menu_state.options]
select_state = [t for t in select_state if t !='Select']
dropdown_menu_dist = Select(driver.find_element_by_id("seldist"))
select_dist = [option.text for option in dropdown_menu_dist.options]
select_dist = [t for t in select_dist if t !='Select']
dropdown_menu_month = Select(driver.find_element_by_id("selmonth"))
select_mon = [option.text for option in dropdown_menu_month.options]
select_mon = [t for t in select_mon if t !='Select']
i = 0
year=str(2018)
# for year in select_year:
if not os.path.exists(year):
os.makedirs(year)
for state in select_state:
for dist in select_dist:
if not os.path.exists(year+'/'+dist):
os.makedirs(year+'/'+dist)
for month in select_mon:
print (i)
# dropdown_menu_year = Select(driver.find_element_by_id("selyear"))
# dropdown_menu_year.select_by_visible_text(year)
dropdown_menu_state = Select(driver.find_element_by_id("selstate"))
dropdown_menu_state.select_by_visible_text(state)
time.sleep(1)
dropdown_menu_dist = Select(driver.find_element_by_id("seldist"))
dropdown_menu_dist.select_by_visible_text(dist)
if(dist=='Wardha' or dist=='Washim' or dist=='Yavatmal'):
# time.sleep(2)
dropdown_menu_month = Select(driver.find_element_by_id("selmonth"))
dropdown_menu_month.select_by_visible_text(month)
time.sleep(2)
driver.find_element_by_name("btnshow").click()
time.sleep(2)
print("Done")
driver.switch_to.frame(driver.find_element(By.CSS_SELECTOR, 'body > embed'))
if (check_exists_by_xpath('//*[#id="tableID"]',driver)):
tab = driver.find_element(By.XPATH,'//*[#id="tableID"]')
soup = BeautifulSoup (driver.page_source)
table = soup.select_one('table')
data = [[td.text for td in row.find_all("td")] for row in table.find_all("tr")]
file_name = year+'/'+dist+'/'+year+'_'+dist+'_'+month+'.csv'
print(file_name)
f = open(file_name,'w', newline='')
writer =csv.writer(f)
writer.writerows(data)
f.close()
i+=1
driver.switch_to.default_content()
driver.switch_to.frame(driver.find_element_by_name("ContentFrame"))
print ( time.time() - start_time)
print(i)
But each time I run the code it gets stuck at different locations with errors like missing selector id "selstate" or "body > embed" not present, which may run correctly in the next run without any changes to the code and may get stuck at the different location.
I have tried adding driver implicit wait and thread sleep with value set to 5 and less.Please point out what should be the correct measure to make it run in one go and where should be the wait or sleep statements to be added or any other changes if required.

Resources