csvwriter writing in a loop issue - python-3.x

try to write to multiple row and can't seem to do it with this code:
i = range(1, 21)
page_number = 38050
#rownumber = 0
for element in i:
url = 'https://howlongtobeat.com/game.php?id=' + format(page_number)
response = get(url)
html_soup = BeautifulSoup(response.text, 'html.parser')
page_number += 1
game_length = html_soup.select('div.game_times li div')[-1].string
game_developer = html_soup.find_all('strong')[1].next_sibling.string
game_publisher = html_soup.find_all('strong')[2].next_sibling.string
game_console = html_soup.find_all('strong')[3].next_sibling.string
game_genres = html_soup.find_all('strong')[4].next_sibling.string
print(url)
print(game_name)
print(game_length)
print(game_developer)
print(game_publisher)
print(game_genres)
print(game_console)
with open('HLTB.csv', 'w') as f:
thewriter = csv.writer(f)
# thewriter.writerow(['Game Name:', 'Game Length:', 'Game Developer:', "Game Publisher:", 'Game Genre:', 'Game Console'])
row = [game_name, game_length, game_developer, game_publisher, game_genres, game_console]
thewriter.writerow(row)
# rownumber += 1
just worried about the CSV part for now but it only writes the last entry (writing over the previous one)
what am I doing wrong?
how can I make it so there is a new row per game?
I have a feeling it's to do with a varialbe I can add but not sure where (hence why I have the rownumber varibale listed)

Here are the problems
You are reopening the file in loop and rewriting use a instead of w. This will overwrite your previous content. See
for element in i:
# ...
# ---> Wrong way of writing
with open('HLTB.csv', 'w') as f:
...
You can move with open('HLTB.csv', 'w') as f: out of loop and then it will not overwrite.

Related

Split function doesnt work for string and for list

Just doing one of my first web scraping and I already have elements I wanted to extract but I cannot find the function to print them as a numbered list. The code I have for now:
r = requests.get('https://mmazurek.dev/category/programowanie-2/page/3/', proxies={'http':'82.119.170.106'})
page = soup(r.content, "html.parser")
contents=page.findAll(None, class_="post-title-link")
for content in contents:
text_content=list(content.get_text())
first_letter=str(text_content[0])
x="".join(first_letter)
listToStr = "".join(map(str, text_content))
print(listToStr)
The purpose is to have list printed like:
P....
J...
...
Hope you dont mind it's a Polish text;)
def get_html(url, useragent=None, proxy=None):
session = requests.Session()
request = session.get(url=url, headers=useragent, proxies=proxy)
if request.status_code == 200:
soup = bs(request.text, 'lxml')
return soup
else:
print("Error " + str(request.status_code))
return request.status_code
def parse(soup):
data = []
contents = soup.findAll(None, class_="post-title-link")
for i, content in enumerate(contents):
text = content.text
href = content['href']
data.append([
i,
text,
href,
])
return data
return data
data = parse(get_html('https://mmazurek.dev/category/programowanie-2/page/3/', proxy={'http': '82.119.170.106'}))
print(data)

How do I change a text dictionary from file into usable dictionary

Right so, I need to make this function that basically saves a player's username in a dictionary which is next saved in a text file to be reused again.
The problem is on reusing it I can't manage to get the str that I get from the file into a dictionary.
Here is my code:
from ast import eval
def verification(j, d):
if j in d.keys():
return d
else:
d[j] = [0,0]
return d
savefile = open("save.txt", "r")
'''d = dict()
for line in savefile:
(key, val) = line.split(".")
d[key] = val
print(d)'''
d = savefile.read()
python_dict = literal_eval(d)
savefile.close()
j = input("name? ")
result = verification(j, python_dict)
savefile = open("save.txt", "w")
'''for i in result:
text = i + "." + str(result[i]) + " \n"
savefile.write(text)'''
savefile.write(str(result))
savefile.close()
As you can see I tried with the literal_eval from ast. I also tried to do a .split() but that wouldn't work. So I'm stuck. Any ideas? It would be of great help.
Thanks
There is no need to do your own encoding/decoding from scratch when you have existing libraries to do it for you.
One good example is JSON which is also not Python exclusive so the database you create can be used by other applications.
This can be done easily by:
import json
def verification(j, d):
if j not in d:
d[j] = [0,0]
return d
with open("save.txt", "r") as savefile:
python_dict = json.load(savefile)
j = input("name? ")
result = verification(j, python_dict)
with open("save.txt", "w") as savefile:
json.dump(result, savefile)

Using Python to delete rows in a csv file that contain certain chars

I have a csv file that I'm trying to clean up. I am trying to look at the first column and delete any rows that have anything other than chars for that row in the first column (I'm working on cleaning up rows where the first column has a ^ or . for now). It seems all my attempts either do nothing or nuke the whole csv file.
Interestingly enough, I have code that can identify the problem rows and it seems to work fine
def FindProblemRows():
with open('Data.csv') as csvDataFile:
ProblemRows = []
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
for i in range (0,length):
if data[i][0].find('^')!=-1 or data[i][0].find('.')!=-1:
ProblemRows.append(i)
return (ProblemRows)
Below I have my latest three failed attempts. Where am I going wrong and what should I change? Which of these comes closest?
'''
def Clean():
with open("Data.csv", "w", newline='') as f:
data = list(csv.reader(f))
writer = csv.writer(f)
Problems = FindProblemRows()
data = list(csv.reader(f))
length = len(data)
for row in data:
for i in Problems:
for j in range (0, length):
if row[j] == i:
writer.writerow(row)
Problems.remove(i)
def Clean():
Problems = FindProblemRows()
with open('Data.csv') as csvDataFile:
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
width = len(data[0])
with open("Data.csv","r") as csvFile:
csvReader = csv.reader( csvFile )
with open("CleansedData.csv","w") as csvResult:
csvWrite = csv.writer( csvResult )
for i in Problems:
for j in range (0, length):
if data[j] == i:
del data[j]
for j in range (0, length):
csvWrite.writerow(data[j])
'''
def Clean():
with open("Data.csv", 'r') as infile , open("CleansedData.csv", 'w') as outfile:
data = [row for row in infile]
for row in infile:
for column in row:
if "^" not in data[row][0]:
if "." not in data[row][0]:
outfile.write(data[row])
Update
Now I have:
def Clean():
df = pd.read_csv('Data.csv')
df = df['^' not in df.Symbol]
df = df['.' not in df.Symbol]
but I get KeyError: True
Shouldn't that work?
You should check whether the column Symbol contains any of the characters of interest. Method contains takes a regular expression:
bad_rows = df.Symbol.str.contains('[.^]')
df_clean = df[~bad_rows]

Error writng to csv, not allowing me to write Python3 and csvwriter

Having a problem with this code:
i = range(0, 51)
page_number = 1
with open('hltb data/HLTB.csv','w') as f:
thewriter = csv.writer(f)
thewriter.writerow(['Game Name:', 'Game Length:', 'Game Developer:', "Game Publisher:", 'Game Genre:', 'Game Console:', 'URL:'])
for element in i:
url = 'https://howlongtobeat.com/game.php?id=' + format(page_number)
response = get(url)
html_soup = BeautifulSoup(response.text, 'html.parser')
page_number += 1
try:
game_name = html_soup.select('div.profile_header')[0].text
except:
game_name = "Game Name not found"
try:
game_length = html_soup.select('div.game_times li div')[-1].string
except:
game_length = "Game length not found"
try:
game_developer = html_soup.find_all('strong', string='\nDeveloper:\n')[0].next_sibling
except:
game_developer = "Game developer not found"
try:
game_publisher = html_soup.find_all('strong', string='\nPublisher:\n')[0].next_sibling
except:
game_publisher = "Game Publisher not found"
try:
game_console = html_soup.find_all('strong', string='\nPlayable On:\n')[0].next_sibling
except:
game_console = "Game Playable on not found"
try:
game_genres = html_soup.find_all('strong', string='\nGenres:\n')[0].next_sibling
except:
game_genres = "Game Genres found"
print(url)
print(game_name)
print(game_length)
print(game_developer)
print(game_publisher)
print(game_genres)
print(game_console)
row = [game_name, game_length, game_developer, game_publisher, game_genres, game_console, url]
thewriter.writerow(row)
I'm getting this error when I run the code:
ValueError Traceback (most recent call last) in ()
46
47 row = [game_name, game_length, game_developer, game_publisher, game_genres, game_console, url]
---> 48 thewriter.writerow(row)
ValueError: I/O operation on closed file.
I had it working before.
How to do data scraping and transferring the info into a spreadsheet so I can manipulate the data?
it looks like you are performing you for loop outside of your with open() on the open file. move the for loop so it is contained in with open():
[...]
with open('hltb data/HLTB.csv','w') as f:
thewriter = csv.writer(f)
thewriter.writerow(['Game Name:', 'Game Length:', 'Game Developer:', "Game Publisher:", 'Game Genre:', 'Game Console:', 'URL:'])
for element in i:
url = 'https://howlongtobeat.com/game.php?id=' + format(page_number)
[...]
edit: i meant with open(), not while. fixed my post and am making some coffee...

lat_long = lat.text.strip('() ').split(',') :AttributeError: 'list' object has no attribute 'text'

Need to find distance between 2 latitude and longitude. The Chrome is being controlled by the driver, then the latitude and longitudes are added to the suitable locations and the it also shows the distance value in the textbox, but it is not able to retrive that generated string of number.Here's the code.Kindly Help.
from selenium import webdriver
import csv
import time
with open('C:/Users/Nisarg.Bhatt/Documents/lats and
longs/Lat_long_cleaned/retail_first.csv', 'r') as f:
reader = csv.reader(f.read().splitlines(), delimiter = ',')
data = [row for row in reader]
filename='C:/Users/Nisarg.Bhatt/Documents/lats and
longs/Lat_long_cleaned/retail_first'
option= webdriver.ChromeOptions()
option.add_argument("-incognito")
path= "Y:/AppData/Local/chromedriver"
browser= webdriver.Chrome(executable_path=path)
url="https://andrew.hedges.name/experiments/haversine/"
browser.get(url)
print(browser.title)
crash = 1
results = []
new=[]
skipped = []
for i,row in enumerate(data[1:]):
print (i)
search = browser.find_element_by_name('lat1')
search_term = data[i+1][5]
search_1=browser.find_element_by_name("lon1")
search_term_1= data[i+1][6]
search_2 = browser.find_element_by_name('lat2')
search_term_2 = data[i+2][5]
search_3 = browser.find_element_by_name('lon2')
search_term_3 = data[i+2][6]
search.clear()
search_1.clear()
search_2.clear()
search_3.clear()
try:
search.send_keys(search_term)
search_1.send_keys(search_term_1)
search_2.send_keys(search_term_2)
search_3.send_keys(search_term_3)
except:
print ('Skiped %s' %search_term)
print (row)
skipped.append(row)
continue
search.submit()
time.sleep(1)
try:
lat = browser.find_elements_by_xpath("/html/body/form/p[4]/input[2]")
except:
alert = browser.switch_to_alert()
alert.accept()
browser.switch_to_default_content()
print ('Couldnt find %s' %search_term)
print (row)
skipped.append(row)
continue
lat_long = lat.text.strip('() ').split(',')
lat_long_clean = [float(n) for n in lat]
try:
browser.refresh()
except:
with open(filename + 'recovered' + '%i' %crash + '.csv' , "wb") as f:
writer = csv.writer(f)
writer.writerows(results)
crash +=1
print (lat_long_clean)
r = row
r.extend(lat_long_clean)
r.insert(0, i)
print (r)
results.append(r)
with open(filename + ".csv", "a") as f:
writer = csv.writer(f)
writer.writerow(r)
with open(filename + "comp.csv" , "wb") as f:
writer = csv.writer(f)
writer.writerows(results)

Resources