Python CSV not writing data to file - python-3.x

I am running into a wall with this. I am new to writing CSV files with python and have been reading lots of different posts on the topic, but now I ran into a wall with this and could use a little help.
import csv
#headers from the read.csv file that I wan't to parse and write to the new file.
headers = ['header1', 'header5', 'header6', 'header7']
#open the write.csv file to write the data to
with open("write.csv", 'wb') as csvWriter:
writer = csv.writer(csvWriter)
#open the main data file that I want to parse data out of and write to write.csv
with open('reading.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',' )
csvList = list(readCSV)
#finds where the position of the data I want to pull out and write to write.csv
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])
for row in readCSV:
writer.writerow([row[itemCode], row[vendorName], row[supplierID], row[supplierItemCode]])
csvWriter.close()
---UPDATE---
I made the changes suggested and tried commenting out the following part of the code & changing 'wb' to 'w' and the program worked. However, I don't understand why, and how do I set this up so that I can list the header I want to pull out?
csvList = list(readCSV)
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])
Here is my updated code:
headers = ['header1', 'header5', 'header6', 'header7']
#open the write.csv file to write the data to
with open("write.csv", 'wb') as csvWriter, open('reading.csv') as csvfile:
writer = csv.writer(csvWriter)
readCSV = csv.reader(csvfile, delimiter=',' )
"""csvList = list(readCSV)
#finds where the position of the data I want to pull out and write to write.csv
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])"""
for row in readCSV:
writer.writerow([row[0], row[27], row[28], row[29]])

It looks like you want to write a subset of columns to a new file. This problem is simpler with DictReader/DictWriter. Note the correct use of open when using Python 3.x. Your attempt was using the Python 2.x way.
import csv
# headers you want in the order you want
headers = ['header1','header5','header6','header7']
with open('write.csv','w',newline='') as csvWriter,open('read.csv',newline='') as csvfile:
writer = csv.DictWriter(csvWriter,fieldnames=headers,extrasaction='ignore')
readCSV = csv.DictReader(csvfile)
writer.writeheader()
for row in readCSV:
writer.writerow(row)
Test data:
header1,header2,header3,header4,header5,header6,header7
1,2,3,4,5,6,7
11,22,33,44,55,66,77
Output:
header1,header5,header6,header7
1,5,6,7
11,55,66,77

if you want to access both writer under the same block,you should do something like this
with open("write.csv", 'wb') as csvWriter,open('reading.csv') as csvfile:
writer = csv.writer(csvWriter)
readCSV = csv.reader(csvfile, delimiter=',' )
csvList = list(readCSV)
#finds where the position of the data I want to pull out and write to write.csv
itemCode = csvList[0].index(headers[0])
vendorName = csvList[0].index(headers[1])
supplierID = csvList[0].index(headers[2])
supplierItemCode = csvList[0].index(headers[3])
for row in readCSV:
writer.writerow([row[itemCode], row[vendorName], row[supplierID], row[supplierItemCode]])
csvWriter.close()

The with open() as csvWriter: construct handles closing of the supplied file once you exit the block. So once you get down to writer.writerow, the file is already closed.
You need to enclose the entire expression in the with open block.
with open("write.csv", 'wb') as csvWriter:
....
#Do all writing within this block
....

Related

Python 3: How to combine values from a csv and append them to a new csv depending on a key value

I have a .csv table that looks like this:
original csv
I want to get a new .csv data that looks like this:
new csv
I already got to the point that I have the second csv with the unique values of the SITENAMES in the first column, but now I'm struggling to append the SPECIESNAMES into the second column.
uri = 'file:///C:/Users/t/Desktop/T/Natura/Python/20220214_Natura2000_specieslist.txt'
csvLyr = QgsVectorLayer(uri, "csvLayer", "delimitedtext")
spalten = ["SITECODE"]
sitecodes = pd.read_csv(uri, usecols=spalten)
spalten2 = ["SPECIESNAME_deutsch"]
species = pd.read_csv(uri, usecols=spalten2)
#### Schritt 2: Mithilfe von unique() die unique values der Sidecodes erhalten und als neue Spalte in eine csv schreiben
sitecodes_unique = sitecodes.SITECODE.unique()
print(sitecodes_unique)
print(len(sitecodes_unique))
path = 'C:/Users/t/Desktop/T/Natura/Python/Ergebnisse'
if not os.path.isdir(path):
os.makedirs(path)
with open('C:/Users/t/Desktop/T/Natura/Python/Ergebnisse/sitecodes_namen.csv', 'w+', newline='') as f:
wr = csv.writer(f)
for line in sitecodes_unique:
sitecodes_unique_split = line.split(',')
wr.writerow(sitecodes_unique_split)
Try this natural python code a viable alternative which calls directly a csv file instead of txt. I've tried to use collections as mentioned by #JonSG :
sitecodes = pd.read_csv('file:///C:/Users/t/Desktop/T/Natura/Python/20220214_Natura2000_specieslist.csv', index_col= False)
sitecodes_df = pd.DataFrame(sitecodes,columns = sitecodes.columns)
sitecodes_namen = defaultdict(list)
for i in range(len(sitecodes_df)):
if sitecodes_df['SITECODE'][i] in sitecodes_namen.keys():
sitecodes_namen[sitecodes_df['SITECODE'][i]]+=','+sitecodes_df['SPECIESNAME_deutsch'][i]
else:
sitecodes_namen[sitecodes_df['SITECODE'][i]] = sitecodes_df['SPECIESNAME_deutsch'][i]
df = pd. DataFrame(list(sitecodes_namen.items()), columns = sitecodes.columns)
df.to_csv('C:/Users/t/Desktop/T/Natura/Python/Ergebnisse/sitecodes_namen.csv',index=False)

For loop into a pandas dataframe

I have the following piece of code and it works but prints out data as it should. I'm trying (unsuccessfully) to putting the results into a dataframe so I can export the results to a csv file.
I am looping through a json file and the results are correct, I just need two columns that print out to go into a dataframe instead of printing the results. I took out the code that was causing the error so it will run.
import json
import requests
import re
import pandas as pd
data = {}
df = pd.DataFrame(columns=['subtechnique', 'name'])
df
RE_FOR_SUB_TECHNIQUE = r"(T\d+)\.(\d+)"
r = requests.get('https://raw.githubusercontent.com/mitre/cti/master/enterprise-attack/enterprise-attack.json', verify=False)
data = r.json()
objects = data['objects']
for obj in objects:
ext_ref = obj.get('external_references',[])
revoked = obj.get('revoked') or '*****'
subtechnique = obj.get('x_mitre_is_subtechnique')
name = obj.get('name')
for ref in ext_ref:
ext_id = ref.get('external_id') or ''
if ext_id:
re_match = re.match(RE_FOR_SUB_TECHNIQUE, ext_id)
if re_match:
technique = re_match.group(1)
sub_technique = re_match.group(2)
print('{},{}'.format(technique+'.'+sub_technique, name))
Unless there is an easier way to put the results of each row in the loop and have that append to a csv file.
Any help is appreciated.
Thanks
In this instance, it's likely easier to just write the csv file directly, rather than go through Pandas:
with open("enterprise_attack.csv", "w") as f:
my_writer = csv.writer(f)
for obj in objects:
ext_ref = obj.get('external_references',[])
revoked = obj.get('revoked') or '*****'
subtechnique = obj.get('x_mitre_is_subtechnique')
name = obj.get('name')
for ref in ext_ref:
ext_id = ref.get('external_id') or ''
if ext_id:
re_match = re.match(RE_FOR_SUB_TECHNIQUE, ext_id)
if re_match:
technique = re_match.group(1)
sub_technique = re_match.group(2)
print('{},{}'.format(technique+'.'+sub_technique, name))
my_writer.writerow([technique+"."+sub_technique, name])
It should be noted that the above will overwrite the output of any previous runs. If you wish to keep the output of multiple runs, change the file mode to "a":
with open("enterprise_attack.csv", "a") as f:

Compare 2 CSV files (encoded = "utf8") keeping data format

I have 2 stock lists (New and Old). How can I compare it to see what items have been added and what had been removed (happy to add them to 2 different files added and removed)?
so far I have tired along the lines of looking row by row.
import csv
new = "new.csv"
old = "old.csv"
add_file = "add.csv"
remove_file = "remove.csv"
with open(new,encoding="utf8") as new_read, open(old,encoding="utf8") as old_read:
new_reader = csv.DictReader(new_read)
old_reader = csv.DictReader(old_read)
for new_row in new_reader :
for old_row in old_reader:
if old_row["STOCK CODE"] == new_row["STOCK CODE"]:
print("found")
This works for 1 item. if I add an *else: * it just keeps printing that until its found. So it's not an accurate way of comparing the files.
I have 5k worth of rows.
There must be a better way to add the differences to the 2 different files and keep the same data structure at the same time ?
N.B i have tired this link Python : Compare two csv files and print out differences
2 minor issues:
1. the data structure is not kept
2. there is not reference to the change of location
You could just read the data into memory and then compare.
I used sets for the codes in this example for faster lookup.
import csv
def get_csv_data(file_name):
data = []
codes = set()
with open(file_name, encoding="utf8") as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
data.append(row)
codes.add(row['STOCK CODE'])
return data, codes
def write_csv(file_name, data, codes):
with open(file_name, 'w', encoding="utf8", newline='') as csv_file:
headers = list(data[0].keys())
writer = csv.DictWriter(csv_file, fieldnames=headers)
writer.writeheader()
for row in data:
if row['STOCK CODE'] not in codes:
writer.writerow(row)
new_data, new_codes = get_csv_data('new.csv')
old_data, old_codes = get_csv_data('old.csv')
write_csv('add.csv', new_data, old_codes)
write_csv('remove.csv', old_data, new_codes)

How to write into a CSV file with Python

Background:
I have a CSV (csv_dump) file with data from a MySQL table. I want copy some of the lines that meet certain conditions (row[1] == condition_1 and row[2] == condition_2) into a temporary CSV file (csv_temp).
Code Snippet:
f_reader = open(csv_dump, 'r')
f_writer = open(csv_temp, 'w')
temp_file = csv.writer(f_writer)
lines_in_csv = csv.reader(f_reader, delimiter=',', skipinitialspace=False)
for row in lines_in_csv:
if row[1] == condition_1 and row[2] == condition_2:
temp_file.writerow(row)
f_reader.close()
f_writer.close()
Question:
How can I copy the line that is being read copy it "as is" into the temp file with Python3?
test.csv
data1,data2,data3
120,80,200
140,50,210
170,100,250
150,70,300
180,120,280
The code goes here
import csv
with open("test.csv", 'r') as incsvfile:
input_csv = csv.reader(incsvfile, delimiter=',', skipinitialspace=False)
with open('tempfile.csv', 'w', newline='') as outcsvfile:
spamwriter = csv.writer(outcsvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
first_row = next(input_csv)
spamwriter.writerow(first_row)
for row in input_csv:
if int(row[1]) != 80 and int(row[2]) != 300:
spamwriter.writerow(row)
output tempfile.csv
data1,data2,data3
140,50,210
170,100,250
180,120,280
if you don't have title remove these two lines
first_row = next(input_csv)
spamwriter.writerow(first_row)
The following Python script seems to do the job... However, having said that, you should probably be using a MySQL query to do this work directly, instead of re-processing from an intermediate CSV file. But I guess there must be some good reason for wanting to do that?
mycsv.csv:
aa,1,2,5
bb,2,3,5
cc,ddd,3,3
hh,,3,1
as,hfd,3,3
readwrite.py:
import csv
with open('mycsv.csv', 'rb') as infile:
with open('out.csv', 'wb') as outfile:
inreader = csv.reader(infile, delimiter=',', quotechar='"')
outwriter = csv.writer(outfile)
for row in inreader:
if row[2]==row[3]:
outwriter.writerow(row)
out.csv:
cc,ddd,3,3
as,hfd,3,3
With a little more work, you could change the 'csv.writer' to make use of the same delimiters and escape/quote characters as the 'csv.reader'. It's not exactly the same as writing out the raw line from the file, but I think it will be practically as fast since the lines in question have already clearly been parsed without error if we have been able to check the value of specific fields.

Can't store the scraped results in third and fourth column in a csv file

I've written a script which is scraping Address and Phone number of certain shops based on Name and Lid. The way it is searching is that It takes Name and Lid stored in column A and Column B respectively from a csv file. However, after fetching the result based on the search, I expected the parser to put that results in column C and column D respectively as it is shown in the second Image. At this point, I got stuck. I don't know how to manipulate Third and Fourth column using reading or writing method so that the data should be placed there. I'm trying with this now:
import csv
import requests
from lxml import html
Names, Lids = [], []
with open("mytu.csv", "r") as f:
reader = csv.DictReader(f)
for line in reader:
Names.append(line["Name"])
Lids.append(line["Lid"])
with open("mytu.csv", "r") as f:
reader = csv.DictReader(f)
for entry in reader:
Page = "https://www.yellowpages.com/los-angeles-ca/mip/{}-{}".format(entry["Name"].replace(" ","-"), entry["Lid"])
response = requests.get(Page)
tree = html.fromstring(response.text)
titles = tree.xpath('//article[contains(#class,"business-card")]')
for title in titles:
Address= title.xpath('.//p[#class="address"]/span/text()')[0]
Contact = title.xpath('.//p[#class="phone"]/text()')[0]
print(Address,Contact)
How my csv file looks like now:
My desired output is something like:
You can do it like this. Create a fresh output csv file whose header is based on the input csv, with the addition of the two columns. When you read a csv row it's available as a dictionary, in this case called entry. You can add the new values to this dictionary from the stuff you've gleaned on the 'net. Then write each newly created row out to file.
import csv
import requests
from lxml import html
with open("mytu.csv", "r") as f, open('new_mytu.csv', 'w', newline='') as g:
reader = csv.DictReader(f)
newfieldnames = reader.fieldnames + ['Address', 'Phone']
writer = csv.writer = csv.DictWriter(g, fieldnames=newfieldnames)
writer.writeheader()
for entry in reader:
Page = "https://www.yellowpages.com/los-angeles-ca/mip/{}-{}".format(entry["Name"].replace(" ","-"), entry["Lid"])
response = requests.get(Page)
tree = html.fromstring(response.text)
titles = tree.xpath('//article[contains(#class,"business-card")]')
#~ for title in titles:
title = titles[0]
Address= title.xpath('.//p[#class="address"]/span/text()')[0]
Contact = title.xpath('.//p[#class="phone"]/text()')[0]
print(Address,Contact)
new_row = entry
new_row['Address'] = Address
new_row['Phone'] = Contact
writer.writerow(new_row)

Resources