Formatting a Python generated CSV - python-3.x

I'm making a web scraper in python.
I'd like to remove the blank rows from the generated csv and would like to add a header saying "Car make", "Car Model", "Price". and would also like to remove the [] from all the names in the generated csv
imports go here...
source = requests.get(' website link goes here...').text
soup = bs(source, 'html.parser')
csv_file = open('pyScraper_1.3_Export', 'w')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['brand_Names', 'Prices'])
#gives us the make and model of all cars
Names = []
Prices_Cars = []
for var1 in soup.find_all('h3', class_ = 'brandModelTitle'):
car_Names = var1.text # var1.span.text
test_Split = car_Names.split("\n")
full_Names = test_Split[1:3]
#make = test_Split[1:2]
#model = test_Split[2:3]
for Prices in soup.find_all('span', class_ = 'f20 bold fieldPrice'):
Prices = Prices.span.text
Prices = re.sub("^\s+|\s+$", "",Prices, flags=re.UNICODE) # removing whitespace before the prices
csv_file = open('pyScraper_1.3_Export.csv', 'a')
csv_writer = csv.writer(csv_file)
i = 0
while i < len(Prices_Cars):
csv_writer.writerow([Names[i], Prices_Cars[i]])
i = i + 1
here is the screenshot of the generated csv

To remove additional newlines:
csv_file = open('pyScraper_1.3_Export.csv', 'a', newline='')
("If csvfile is a file object, it should be opened with newline=''.",
To add headers:
you are actually adding headers, but to file named pyScraper_1.3_Export (note no .csv extension), this may be a mistype. Just change the code at about line 6 to
csv_file = open('pyScraper_1.3_Export.csv', 'w', newline='')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(["Car make", "Car Model", "Price"])
As for removing nested list, unpack Names[i] with * operator:
csv_writer.writerow([*Names[i], Prices_Cars[i]])


.csv to .arff function on Python

I'm trying to do a convertion function from csv to arff, right now I have this:
def csv2arff(csv_path, arff_path=None):
with open(csv_path, 'r') as fr:
attributes = []
if arff_path is None:
arff_path = csv_path[:-4] + '_prueba.arff' # *.arff -> *.csv
write_sw = False
with open(arff_path, 'w') as fw:
fw.write('#relation base_datos_modelo_3_limpia \n')
firstline = fr.readlines()[0].rstrip()
and that gives me:
#relation base_datos_modelo_3_limpia
So i want to put "#attribute" before each attribute and change the "," to "\n". But don't know how to do it, I tried to make a function to change the "," but didn't work, any idea?
Thank you guys.
Try the liac-arff library.
Here is an example for converting the UCI iris dataset from ARFF to CSV and then back to ARFF:
import csv
import arff
# arff -> csv
content = arff.load(open('./iris.arff', 'r'))
with open('./out.csv', 'w') as fp:
writer = csv.writer(fp)
header = []
for n, t in content['attributes']:
# csv -> arff
with open('./out.csv', 'r') as fp:
reader = csv.reader(fp)
header = None
data = []
for row in reader:
if header is None:
header = row
content = {}
content['relation'] = "from my csv file"
content['attributes'] = []
for n in header:
if n == "class":
content['attributes'].append((n, ['Iris-setosa', 'Iris-versicolor', 'Iris-virginica']))
content['attributes'].append((n, 'NUMERIC'))
content['data'] = data
with open('./out.arff', 'w') as fp:
arff.dump(content, fp)
NB: For the last stage, we need to specify the nominal class values, which you could determine by scanning the data.

Printing data from the csv file

I wanna ask how do I print the number of dentists respective in 2012 from the CSV file.
import csv
csv_file = open('project.csv' , 'r')
data_file = csv.reader(csv_file)
def opt_a():
for row in data_file:
i = row[1:]
This what I have tried but I will only be able to print out in rows.
Sample of data :
year,Private Dental Specialists,Private General Dental Practitioners,Public Dental Specialists,Public General Dental Practitioners
In your for loop, simply check if the first value of your row is 2012 :
import csv
csv_file = open('project.csv' , 'r')
data_file = csv.reader(csv_file)
def opt_a():
for row in data_file:
if row[0] == "2012":
i = row[1:]

Error "found unreadable content" when open excel file written by .to_excel()

I'm practicing web scraping, and try to get some info of cars.
I want one of my dataframe columns be a hyperlink to the model image, and I succeeded in creating the formula and saving it to dataframe
For example:
=HYPERLINK(""; "Audi Q3")
=HYPERLINK(""; "Audi Q7")
, but when opening the file (I'm using Excel 2010), it throws me that error "Excel found unreadable content...", and if I responded with Yes, it deletes the column contents.
Here is log:
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<recoveryLog xmlns=""><logFileName>error040320_03.xml</logFileName><summary>Errors were detected in file 'D:\Users\Tamer\PycharmProjects\Udemy\suvs.xlsx'</summary><removedRecords summary="Following is a list of removed records:"><removedRecord>Removed Records: Formula from /xl/worksheets/sheet1.xml part</removedRecord></removedRecords></recoveryLog>
I tried to copy and paste the link generated by the code into the file, and it works very well.
So what is the problem?
Thank you
Here's the code:
import numpy as np
import pandas as pd
import requests
from bs4 import BeautifulSoup
HEADERS = {"Accept-Language": "en-US,en;q=0.5"}
URL = ''
images = []
models = []
model_years = []
msrp_ranges = []
consumer_ratings = []
combined_mpgs = []
body_styles = []
page = requests.get(URL, headers=HEADERS)
data = BeautifulSoup(page.text, 'html.parser')
cards = data.find_all('div', class_='card make-model-card suvs')
for card in cards:
model = card.p.text
image = card.img['data-image-src']
# Creating hyperlinks to save it in suvs.xlsx file
hyperlink = f'=HYPERLINK("{image}"; "{model}")'
# images.append(image)
# Get 'More Details' page URL
details_url = '' + card.div.a.attrs['href']
details_page = requests.get(details_url, headers=HEADERS)
details_data = BeautifulSoup(details_page.text, 'html.parser')
details = details_data.find_all('div', class_='expanded-list-contents')
model_year = details_data.h3.text
if details_data.find('li', class_='expanded-list-item'):
msrp_range = details_data.find('li', class_='expanded-list-item').text.replace('MSRP Range\n', '').strip()
msrp_range = ''
if details_data.find('td', class_='expanded-list-item'):
msrp_range = details_data.find('li', class_='expanded-list-item').text.replace('MSRP Range\n', '').strip()
msrp_range = ''
if details_data.find('div', class_='star-rating__consumer-review'):
consumer_rating = details_data.find('div', class_='star-rating__consumer-review').text.replace('\n', '').strip()
consumer_rating = consumer_rating[:5] + ' ' + consumer_rating[-15:]
consumer_rating = 'Not rated yet'
# Creating dataframe "homes"
suvs = pd.DataFrame({
'images': images,
'models': models,
'year': model_years,
'msrp_ranges': msrp_ranges,
'consumer_ratings': consumer_ratings,
# 'combined_mpgs': combined_mpgs,
# 'body_styles': body_styles
file_name = 'suvs.xlsx'
suvs.to_excel(file_name, index=False) # index=False to avoid creating index column
# to see where you're missing data and how much data is missing
except requests.exceptions.ConnectionError:
print("Couldn't connect to the requested page!\n"
'Please, check your internet connection.')
except PermissionError:
print(f'The {file_name} file is opened.\n'
'Please, close it, and run the script again.')

How to add headers when creating csv file in python

I am getting "raw" data from a csv file, and putting only what I need for a new csv file that will be used to auto add users to a different system...
I am unsure how to add the correct headers needed for the file.
I've tried looking at other examples of adding headers but have not figured this out yet...
The headers I need to add are as follows:
"ID Card Number","Name","E-Mail","User Level","Position","Status","Covered Under Insurance","Paid Insurance"
(and in that order)
import csv
def studentscsv():
with open('..\StudentEmails_and_StudentNumbers.csv') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
with open('mydirectory\student_users.csv', mode='w', newline='') as output_file:
write = csv.writer(output_file, delimiter=',', quoting=csv.QUOTE_MINIMAL)
for row in csv_reader:
a = row[0]
studentnumber = row[1]
firstname = row[2]
lastname = row[3]
grade = row[4]
studentname = firstname + " " + lastname
studentemail = firstname + "." + lastname + ""
status = "Active"
position = "Student"
covered = "Yes"
paid = "Yes"
write.writerow([studentnumber, studentname, studentemail, grade, position, status, covered, paid])
def main():
Controls the program execution
:param in_file: the name of the input file.
:return: None
if __name__ == '__main__':
The file generates fine with the way the code is written. I am just unsure what I need to change to add the headers.
Using the csv module, as you are, it's pretty straight forward. Define your headers in an array and then create a DictWriter with the fieldnames set to your array. Reference the following code and documentation:
import csv
with open('names.csv', 'w') as csvfile:
fieldnames = ['first_name', 'last_name']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({'first_name': 'Baked', 'last_name': 'Beans'})
writer.writerow({'first_name': 'Lovely', 'last_name': 'Spam'})
writer.writerow({'first_name': 'Wonderful', 'last_name': 'Spam'})
Here's the documentation:

Saving list to a .csv file

I have made a code that opens a .csv file and takes a user input to filter it to a new list. What I am having trouble with is saving this new list to a .csv file properly.
This is my code:
#author: Joakim
from pprint import pprint
import csv
with open ('Change_in_Unemployment_2008-2014.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
next(readCSV) #Removing header
result = []
found = False
user_input = input("Please enter a full/partial NUTS code to filter by: ")
for row in readCSV:
if row[0].startswith(user_input):
found = True
if found == False:
print("There are no registered NUTS codes containing your input.. Please try again")
if found == True:
print("\n Successfully found ", len(result), "entries!""\n")
pprint (result)
#store data in a new csv file
Stored_path = "C:\data_STORED.csv"
file = open(Stored_path, 'w')
writer = csv.writer(file)
writer.writerow(["NUTS CODE", " Municipality", " value"])
for i in range(len(result)):
new_row = result[i]
NUTS_CODE = new_row[0]
Municipality = new_row[1]
Value = new_row[2]
writer.writerow([NUTS_CODE, Municipality])
If one runs my code with an input of : PL, one gets this list:
[['PL11', 'odzkie', '2.2'],
['PL12', 'Mazowieckie', '1.2'],
['PL21', 'Maopolskie', '2.9'],
['PL22', 'Slaskie', '2'],
['PL31', 'Lubelskie', '1.1'],
['PL32', 'Podkarpackie', '5.8'],
['PL33', 'Swietokrzyskie', '2.6'],
['PL34', 'Podlaskie', '2.7'],
['PL41', 'Wielkopolskie', '1.6'],
['PL42', 'Zachodniopomorskie', '-1.1'],
['PL43', 'Lubuskie', '1.8'],
['PL51', 'Dolnoslaskie', '0'],
['PL52', 'Opolskie', '1.3'],
['PL61', 'Kujawsko-Pomorskie', '1.6'],
['PL62', 'Warminsko-Mazurskie', '2.4'],
['PL63', 'Pomorskie', '3.1']]'
Now I would like to store this neatly into a new .csv file, but when I use the code above, I only get a couple of values repeated throughout.
What is my error?
