Extract all the elements within a class in python

Extract all the elements within a class in python - python-3.x

I have made the following, in order to extract the first element in a class:
if var_source == "Image":
outcsvfile = 'Image_Ids' + file + '_' + timestamp +'.csv'
with open(outcsvfile, 'w', encoding='utf-8', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(['ax','physical_id'])
for i in range(len(var_ax)):
browser.get('https://test.com' + str(mpid) + '&ax=' + var_ax[i])
self.master.update()
self.status.config(text = str(i+1) + "/" + str(len(var_ax)) + " Extracting AX: " + var_ax[i])
try:
ph_id = browser.find_element_by_xpath("//div[contains(#class, 'a-image-wrapper')]").get_attribute("alt")
print(i+1,': extract AX:',var_ax[i])
with open(outcsvfile, 'a+', encoding='utf-8', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([var_ax[i],ph_id])
except:
print(i+1,': extract AX:',var_ax[i])
with open(outcsvfile, 'a+', encoding='utf-8', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([var_ax[i],'[missing AX]'])
I have 2 questions:
How can I extract all the physical_ids in the same cell separated by a comma (cell B2 = "physical_id1, physical_id2, physical_id3")?
How can I sum the number of physical_ids exported in column C (ex: for C2 we will have 3, because in B2 we have 3 physical_ids exported)?
The source code:
<div alt="51d5gBEzhjL" style="width:220px;float:left;margin-left:34px;margin-bottom:10px;border:1px solid #D0D0D0" class="a-image-wrapper a-lazy-loaded MAIN GLOBAL 51d5gBEzhjL"><h1 class="a-size-medium a-spacing-mini a-spacing-top-mini a-color-information a-text-center a-text-bold">MAIN</h1><h1 class="a-size-base a-spacing-mini a-spacing-top-mini a-color-information a-text-center a-text-bold"> ou GLOBAL / Merch 1</h1></div>
<h1 class="a-size-medium a-spacing-mini a-spacing-top-mini a-color-information a-text-center a-text-bold">FACT</h1>
<h1 class="a-size-base a-spacing-mini a-spacing-top-mini a-color-information a-text-center a-text-bold"> ou GLOBAL / Merch 1</h1>
<span class="a-declarative" data-action="a-modal"><center><img class="ecx" id="51S+wTs36zL" src="https://test.com/images/I/51S+wTs36zL._AA200_.jpg" alt="51S+wTs36zL"></center></span>
<center>
<img class="ecx" id="51S+wTs36zL" src="https://test.com/images/I/51S+wTs36zL._AA200_.jpg" alt="51S+wTs36zL">
</center>
</span>
<h5 class="physical-id">51S+wTs36zL</h5>
<h1 class="a-size-medium a-spacing-mini a-spacing-top-mini a-color-information a-text-center a-text-bold" style="background:#D0D0D0">UPLOADED</h1>
<h1 class="a-size-base a-spacing-mini a-spacing-top-mini a-color-information a-text-center a-text-bold">19/Apr/2016:17:45:40</h1>
</div>

This worked for me and resolved both my questions:
if var_source == "Image":
outcsvfile = 'Image_Ids-' + file + '_' + timestamp +'.csv'
with open(outcsvfile, 'w', encoding='utf-8', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow(['ax','physical_id','image_count'])
for i in range(len(var_ax)):
browser.get('https://test.com' + str(mpid) + '&ax=' + var_ax[i])
self.master.update()
self.status.config(text = str(i+1) + "/" + str(len(var_ax)) + " Extracting AX: " + var_ax[i])
try:
ph_id = browser.find_element_by_xpath("//div[contains(#class, 'a-image-wrapper')]").get_attribute("alt")
ids1 = browser.find_elements_by_class_name("physical-id")
ids1Text = []
for a in ids1:
ids1Text.append(a.text)
nr = str(len(ids1))
ax = ', '.join(ids1Text)
print(i+1,': extract AX:',var_ax[i])
with open(outcsvfile, 'a+', encoding='utf-8', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([var_ax[i], ax, nr])
except:
print(i+1,': extract AX:',var_ax[i])
with open(outcsvfile, 'a+', encoding='utf-8', newline='') as csvfile:
csv_writer = csv.writer(csvfile)
csv_writer.writerow([var_ax[i],'[missing AX]'])

Related

TypeError: NoneType is unsubscriptable - IF statement

I am trying to find fuzzy string matches for university names and print a certain score (10, 5 ,3) to a csv each time depending on what list the closest match came from.
data = [["MIT"], ["Stanford"], ...]
Data1 = ['MASSACHUSETTS INSTITUTE OF TECHNOLOGY (MIT)'], ['STANFORD UNIVERSITY'],...
So far I have tried:
1 for uni in data:
2 hit = process.extractOne(str(uni[0]), data1, scorer = fuzz.token_set_ratio, score_cutoff = 90)
3 if float(hit[1]) < 100:
4 print("not found")
5 else:
print("Closest match for " + str(uni[0]) + " is " + str(hit[0]) " + "score: 10")
At this point I get the TypeError: NoneType is unsubscriptable for line 3
I have checked the type of my variable:
print(type(hit)) #I was getting tuple now NoneType...
print(len(hit)) # Was getting 2 now unsubscriptable
print(float(hit[1])) # 100
As I understood this error comes up when a variable is not the type one thinks it is. Any idea how to resolve this issue? Many thanks
Thanks to #inthevortex, I was able to complete the code as follows:
for uni in data:
hit = process.extractOne(str(uni[0]), data10, scorer = fuzz.token_set_ratio, score_cutoff = 90)
try:
if float(hit[1]) >= 94:
with open(filename, mode='a', newline="") as csv_file:
fieldnames = ['bwbnr', 'uni_name', 'match', 'points']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=';')
writer.writerow({'bwbnr': str(uni[0]), 'uni_name': str(uni[0]), 'match': str(hit), 'points': 10})
except:
hit1 = process.extractOne(str(uni[0]), data11, scorer = fuzz.token_set_ratio, score_cutoff = 90)
try:
if float(hit1[1]) >= 94:
with open(filename, mode='a', newline="") as csv_file:
fieldnames = ['bwbnr', 'uni_name', 'match', 'points']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=';')
writer.writerow({'bwbnr': str(uni[0]), 'uni_name': str(uni[0]), 'match': str(hit1), 'points': 5})
... and so on... until the last except.

Thanks to #inthevortex I completed the code using the try-except method:
for uni in data:
hit = process.extractOne(str(uni[0]), data10, scorer = fuzz.token_set_ratio, score_cutoff = 90)
try:
if float(hit[1]) >= 94:
with open(filename, mode='a', newline="") as csv_file:
fieldnames = ['bwbnr', 'uni_name', 'match', 'points']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=';')
writer.writerow({'bwbnr': str(uni[0]), 'uni_name': str(uni[0]), 'match': str(hit), 'points': 10})
except:
hit1 = process.extractOne(str(uni[0]), data11, scorer = fuzz.token_set_ratio, score_cutoff = 90)
try:
if float(hit1[1]) >= 94:
with open(filename, mode='a', newline="") as csv_file:
fieldnames = ['bwbnr', 'uni_name', 'match', 'points']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, delimiter=';')
writer.writerow({'bwbnr': str(uni[0]), 'uni_name': str(uni[0]), 'match': str(hit1), 'points': 5})
All the way down to the last list I wanted to compare with, again with try-except!

text segmentation by the number of words in python

can anyone tell me what is the problem with my own code?
I want to segment a big text into small texts by words. for example, each segment contains 60 words each.
file=r'C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\economy2.txt'
openFile= open(file, 'r', encoding='utf-8-sig')
words= openFile.read().split()
#print (words)
i = 0
for idx, w in enumerate(words, start=0):
textNum = 1
while textNum <= 20:
wordAsText = []
print("word list before:", wordAsText)
while i<idx+60:
wordAsText.append(words[i])
i+=1
print ("word list after:", wordAsText)
textSeg=' '.join(wordAsText)
print (textNum, textSeg)
files = open(r"C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\datasetEco\Eco" + str(textNum) + ".txt", "w", encoding='utf-8-sig')
files.write(textSeg)
files.close()
idx+=60
if textNum!=20:
continue
textNum+=1
my big file (economy2) contains more than 12K words.
EDIT:
thanks for all responses. I tried what I found here and it is achieved my require.
Edited Code:
file=r'C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\economy2.txt'
openFile= open(file, 'r', encoding='utf-8-sig')
words= openFile.read().split()
#print (words)
n=60
segments=[' '.join(words[i:i+n]) for i in range(0,len(words),n)] #from link
i=1
for s in segments:
seg = open(r"C:\Users\Nujou\Desktop\Master\thesis\steganalysis\dataset\datasetEco\Eco" + str(i) + ".txt", "w", encoding='utf-8-sig')
seg.write(s)
seg.close()
i+=1

CSV Output to file Python 3

I'm trying to print certain values into a CSV from a list I'm passing through. Here is my function:
current = datetime.datetime.now() #defining datetime for use in function
def write_csv(custody_parts):
with open((current.strftime("%m_%d_%y_%H_%M_%S")) + '.csv', 'w', newline='') as csvfile:
csvfile = io.StringIO()
fieldnames = ['serial', 'user', 'time']
writer = csv.DictWriter(csvfile, extrasaction='ignore', fieldnames=fieldnames)
writer.writeheader()
writer.writerows(custody_parts)
csvfile.getvalue()
print(csvfile.getvalue())
return(csvfile.getvalue())```
Then I call it with the list I'm trying to pass through:
write_csv(parts)
and it creates the file: 06_06_18_12_13_53.csv
and prints to the screen:
serial,user,time
serial1,user1,date1
serial2,user2,date2
serial3,user3,date3
but the file it creates is empty, so it isn't writing to the file it is creating.
Can someone point me in the right direction?
Thanks ~
EDIT:
I ended up going with this instead:
def write_csv(custody_parts):
current = datetime.datetime.now()
with open((current.strftime("%m_%d_%y_%H_%M_%S")) + '.csv', 'w', newline='') as csvfile:
custodywriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
custodywriter.writerow(['serial', 'user', 'time'])
for i in custody_parts:
x = [i["serial"],i["user"],i["time"]]
custodywriter.writerow(x)
custodywriter.writerow(["End of Report"])

lat_long = lat.text.strip('() ').split(',') :AttributeError: 'list' object has no attribute 'text'

Need to find distance between 2 latitude and longitude. The Chrome is being controlled by the driver, then the latitude and longitudes are added to the suitable locations and the it also shows the distance value in the textbox, but it is not able to retrive that generated string of number.Here's the code.Kindly Help.
from selenium import webdriver
import csv
import time
with open('C:/Users/Nisarg.Bhatt/Documents/lats and
longs/Lat_long_cleaned/retail_first.csv', 'r') as f:
reader = csv.reader(f.read().splitlines(), delimiter = ',')
data = [row for row in reader]
filename='C:/Users/Nisarg.Bhatt/Documents/lats and
longs/Lat_long_cleaned/retail_first'
option= webdriver.ChromeOptions()
option.add_argument("-incognito")
path= "Y:/AppData/Local/chromedriver"
browser= webdriver.Chrome(executable_path=path)
url="https://andrew.hedges.name/experiments/haversine/"
browser.get(url)
print(browser.title)
crash = 1
results = []
new=[]
skipped = []
for i,row in enumerate(data[1:]):
print (i)
search = browser.find_element_by_name('lat1')
search_term = data[i+1][5]
search_1=browser.find_element_by_name("lon1")
search_term_1= data[i+1][6]
search_2 = browser.find_element_by_name('lat2')
search_term_2 = data[i+2][5]
search_3 = browser.find_element_by_name('lon2')
search_term_3 = data[i+2][6]
search.clear()
search_1.clear()
search_2.clear()
search_3.clear()
try:
search.send_keys(search_term)
search_1.send_keys(search_term_1)
search_2.send_keys(search_term_2)
search_3.send_keys(search_term_3)
except:
print ('Skiped %s' %search_term)
print (row)
skipped.append(row)
continue
search.submit()
time.sleep(1)
try:
lat = browser.find_elements_by_xpath("/html/body/form/p[4]/input[2]")
except:
alert = browser.switch_to_alert()
alert.accept()
browser.switch_to_default_content()
print ('Couldnt find %s' %search_term)
print (row)
skipped.append(row)
continue
lat_long = lat.text.strip('() ').split(',')
lat_long_clean = [float(n) for n in lat]
try:
browser.refresh()
except:
with open(filename + 'recovered' + '%i' %crash + '.csv' , "wb") as f:
writer = csv.writer(f)
writer.writerows(results)
crash +=1
print (lat_long_clean)
r = row
r.extend(lat_long_clean)
r.insert(0, i)
print (r)
results.append(r)
with open(filename + ".csv", "a") as f:
writer = csv.writer(f)
writer.writerow(r)
with open(filename + "comp.csv" , "wb") as f:
writer = csv.writer(f)
writer.writerows(results)

create a new csv file in new subdirectory python 3.6

Been having trouble with using variables in a new dirpath when using with open to create a csv file.
I can create the dirpath fine along with the .csv filename and add them with os.path.join .
But when i try to use the entire correctly formatted path in the open statement it is adding an extra backslash next to the current backslashes in the string?? could not find any documentation on why this may be happening.
This is the error
FileNotFoundError: [Errno 2] No such file or directory: 'Logs\\14-05-2018\\dfg.csv'
If i simply write the following i can create the subfolders and csvs no problemo.
with open(Logs\1234\asd.csv) 'w') as csvfile:
So this is what i am getting an error in on line 43 which is the with open(csvlogPath, 'w') as csvfile: I have tried with r instead of 'w' and same problem. At a loss after two days time to ask you guys. :)
import csv, os.path
#Get and create csv filename string
logFN = input("Please name your trade.\n#")
suffix = '.csv'
csvname = logFN + suffix
#Create dir name string
t = time.strftime("%d-%m-%Y-%H%M")
dirsname = os.path.join('Logs', t)
csvlogPath = os.path.join(dirsname,csvname)
with open(csvlogPath, 'w') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', lineterminator='\n', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['get', 'new', 'values', 'later'])
with open(csvlogPath, 'w') as f:
reader = csv.reader(f)
# read file row by row
rwsCount = 0
for row in reader:
print (row, rwsCount)
rwsCount += 1
line_number = rwsCount-1
print (line_number)
with open(csvlogPath, 'w') as f:
mycsv = csv.reader(f)
mycsv = list(mycsv)
text = mycsv[line_number][0]
print (text)

you need to make the dirs in python you can use os.makedirs(path) like so:
csvlogPath = os.path.join(dirsname,csvname)
os.makedirs(os.path.dirname(csvlogPath), exist_ok=True)
with open(csvlogPath, 'w') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', lineterminator='\n', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['get', 'new', 'values', 'later'])
With open, use 'r' to read and 'w' to write so you need to change the next to opens to:
with open(csvlogPath, 'r') as f:
reader = csv.reader(f)
# read file row by row
rwsCount = 0
for row in reader:
print (row, rwsCount)
rwsCount += 1
line_number = rwsCount-1
print (line_number)
with open(csvlogPath, 'r') as f:
mycsv = csv.reader(f)
mycsv = list(mycsv)
text = mycsv[line_number][0]

test this code :
import csv, os.path
#Get and create csv filename string
import time
logFN = input("Please name your trade.\n#")
suffix = '.csv'
csvname = logFN + suffix
#Create dir name string
t = time.strftime("%d-%m-%Y-%H%M")
dirsname = os.path.join('Logs', t)
###########################
if not os.path.exists(dirsname):
os.makedirs(dirsname)
###########################
csvlogPath = os.path.join(dirsname,csvname)
with open(csvlogPath, 'w') as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', lineterminator='\n', quotechar='|', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(['get', 'new', 'values', 'later'])
with open(csvlogPath, 'r') as f:
reader = csv.reader(f)
# read file row by row
rwsCount = 0
for row in reader:
print (row, rwsCount)
rwsCount += 1
line_number = rwsCount-1
print (line_number)
with open(csvlogPath, 'r') as f:
mycsv = csv.reader(f)
mycsv = list(mycsv)
text = mycsv[line_number][0]
print (text)

Develop Reference

node.js excel linux python-3.x azure haskell apache-spark rust .htaccess string

Extract all the elements within a class in python - python-3.x

Related

TypeError: NoneType is unsubscriptable - IF statement

text segmentation by the number of words in python

CSV Output to file Python 3

lat_long = lat.text.strip('() ').split(',') :AttributeError: 'list' object has no attribute 'text'

create a new csv file in new subdirectory python 3.6

Categories

Resources