Delete multiple Column(headers) in csv in python - python-3.x

Here is my code
import csv
#import pandas as pd
f = open("stu.csv", "a+", newline="")
Smain = csv.writer(f)
Smain.writerow(["Name", "Father Name", "Class", "Admission Number"])
mainrec = []
increas = 1
class1 = open("class-1.csv", "a+", newline="")
stuclas1 = csv.writer(class1)
stuclas1.writerow(["Roll Number", "Name", "Admission Number"])
while True:
nam = input("Enter Student Name - ")
Clas = int(input("Enter Class - "))
Fname = input("Enter Father Name - ")
adm = 100 + increas
lst = [nam, Fname, Clas, adm]
mainrec.append(lst)
if Clas == 1:
stucls1list = []
a1 = 0
rollnum_cla1 = 0 + increas + a1
a1 = 0 + rollnum_cla1
lst1 = [rollnum_cla1, nam, adm]
stucls1list.append(lst1)
for i1 in stucls1list:
stuclas1.writerow(i1)
increas += 1
c = input("Input 'Y' If You Want To Record More, Otherwise Press 'N' - ")
if c == "N":
break
for i in mainrec:
Smain.writerow(i)
# load dataset
#df = pd.read_csv("stu.csv")
# select the rows
# if Name column
# has special characters
#print(df[df.Name.str.contains(r'[Name]')])
# drop the merged selected rows
f.close()
class1.close()
Can anyone tell me how i can delete duplicate header
when i run this program more than 2 time it start duplication header and i dont want to duplicate header again and again please tell me how i can delete that header(column)
stu csv when
i run this program 2 times
Name,Father Name,Class,Admission Number
xyz,xyz,1,101
qwe,qwe,1,102
N,N,1,103
Name,Father Name,Class,Admission Number # this i want to delete
test,test,1,101
you,you,1,102

Here's a way to solve for it.
f = open("stu.csv", "a+", newline="")
f.seek(0)
if len(f.read()) == 0
Smain = csv.writer(f)
Smain.writerow(["Name", "Father Name", "Class", "Admission Number"])
This will check if there is content inside the file. If it has contents, then you already have the header. If there is no content, then you dont have a header. This will determine if you need to write the header to the file.
Here's the sample output when I tried:
First run:
No header #my print statement output
Output file after first run: #the file header written to file
Name,Father Name,Class,Admission Number
Second run:
header found, skipped writing header again #my print statement output
It did not write the output to file again.
Output file on second run:
Name,Father Name,Class,Admission Number
Note here that f.write will move the pointer to the last line by default (since the file is opened in a+ mode)

Related

How can I find out which row of data from this excel sheet is duplicated the most

I am trying to find out which row (street name) has the most crimes in an excel spreadsheet. I have found the sum for the highest amount of crimes I just can't find the actual row that generated that many occurrences.
import os
import csv
def main():
#create and save the path to the file...
fileToRead = "C:/Users/zacoli4407/Documents/Intro_To_Scipting/Crime_Data_Set.csv"
highestNumberOfCrimes = 0
data = []
rowNumber = 0
count = 0
with open(fileToRead, 'r') as dataToRead:
dataToRead = open(fileToRead, 'r') # open the access to the file
reader = csv.reader(dataToRead) # gives the ability to read from the file
for row in reader:
if row[4].isnumeric():
if int(row[4]) > highestNumberOfCrimes:
highestNumberOfCrimes = int(row[4])
rowNumber = count
data.append([row[2],row[3],row[4],row[5]]) #row 3 has the street name I am trying to acquire
count += 1
print(highestNumberOfCrimes)
with open("crime.txt", "w") as outputFile:
outputFile.write("The highest number of crimes is: \n")
outputFile.write(str(highestNumberOfCrimes))
main()
You could do the following:
import csv
from collections import defaultdict
result = defaultdict(float)
with open(fileToRead, 'r') as dataToRead:
reader = csv.reader(dataToRead)
header = next(reader)
for row in reader:
result[row[3]] += float(row[4])
#Now to get the street with maximum number of crimes
mx = max(result, key = result.get)
print(mx)
#to get the maximum number of crimes
print(result[mx])

Print the last occurrence of a word in a TXT in Python

I'm working on extracting data from a .txt file, and I want to pick up the last occurrence of a certain word in the whole file. In this case, I get three occurrences of the words D / DÑA and Tfno but I want only the last one of each one and print it.
def extract(in_filename):
if not os.path.exists(in_filename):
print("ERROR: Input file does not exist ❌ ")
sys.exit()
with open(in_filename, 'r') as file:
rows = file.readlines()
for row in rows:
if re.match('D/DÑA', row):
row_parse = row.strip()
print(row_parse)
elif re.match('Tfno', row):
row_parse = row.strip()
print(row_parse)
extract('apd29.txt')
The output is:
D/DÑA: PRUEBA PRUEBITA
Tfno: 666666666
D/DÑA: PRUEBA PRUEBITA
Tfno: 666666666
D/DÑA: PRUEBA PRUEBITA <-- I need this
Tfno: 666666666 <-- I need this
I expect the output:
D/DÑA: PRUEBA PRUEBITA
Tfno: 666666666
Use reversed
Ex:
def extract(in_filename):
if not os.path.exists(in_filename):
print("ERROR: Input file does not exist ❌ ")
sys.exit()
with open(in_filename, 'r') as file:
rows = file.readlines()
for row in reversed(rows): #!Update
if re.match(r'D/DÑA', row):
row_parse = row.strip()
print(row_parse)
break
extract('apd29.txt')
Assigning a variable outside of for loop would work in this situation.
result = "not found"
with open(in_filename, 'r') as file:
rows = file.readlines()
for row in rows:
if re.match('D/DÑA', row):
row_strip = row.strip()
row_parse = row_strip.rfind('D/DÑA')
result = row_parse
print(result)

How to update the contents of a file which consists of Headers in the first line and the values corresponding to it in the corresponding lines

I have a file with below contents:
pid int| name varchar(20)| price float
1 |Giga. |10.99
2. |PowerGiga. |29.99
I want to replace Giga with Mega in the file where the column is Name
and replace price column <15 with 13.99
I have just written the contents given by the user input to a file. Its not stored with any mappings. How do you I replace the name in the file?
Expected:
pid int| name varchar(20)| price float
1 |Mega. |13.99
2. |PowerGiga. |29.99
I have tried this with python as below .
Whats happening is, my entire file content is getting erased
import sys,re
import os
mypath="/Users/rk/Documents/code/PA1/"
db_used="CS457_PA2"
def updateTable():
if os.path.isdir(mypath+db_used):
filepath= mypath+db_used+"/"+filename+".txt"
if not os.path.isfile(filepath): #check if file not exists is true
print("!Failed to insert into table "+ filename +" because it does not exist.")
else :
Column1=List[3]
Column2=List[6]
value1=List[4]
value2=List[7]
newfile=open(filepath, "r")
for w in newfile:
list= w.split('|')
if value1 in list:
print(list)
a=1
print("yes")
newfile=open(filepath, "w")
for a in list:
if value1 in list[0:]:
newfile.write(a.replace(value1,value2))
print("check the file if its updated")
else:
print("nothing")
else :
db_used == " "
print("DB is not selected")
user_says = input().strip()
if "UPDATE" in user_says.upper():
temp=user_says.strip(";")
removespecial=re.sub("\W+", " ", temp) #removes special characters but preserves space
List=removespecial.split(" ")
filename=List[1]
updateTable()
else:
print("Debug")
I tried the below code and it worked for me.
with open(filepath, "rt") as fin:
with open(out, "wt") as fout:
for line in fin:
fout.write(line.replace(value2, value1))
os.remove(filepath)
os.rename(out,filepath)

python 3 tab-delimited file adds column file.write

I'm writing string entries to a tab-delimited file in Python 3. The code that I use to save the content is:
savedir = easygui.diropenbox()
savefile = input("Please type the filename (including extension): ")
file = open(os.path.join(savedir, savefile), "w", encoding="utf-8")
file.write("Number of entities not found: " + str(missing_count) + "\n")
sep = "\t"
for entry in entities:
file.write(entry[0]+"\t")
for item in entry:
file.write(sep.join(item[0]))
file.write("\t")
file.write("\n")
file.close()
The file saves properly. There are no errors or warnings sent to the terminal. When I open the file, I find an extra column has been saved to the file.
Query | Extra | Name
Abu-Jamal, Mumia | A | Mumia Abu-Jamal
Anderson, Walter | A | Walter Inglis Anderson
Anderson, Walter | A | Walter Inglis Anderson
I've added vertical bars between the tabs for clarity; they don't normally appear there. As well, I have removed a few columns at the end. The column between the vertical bars is not supposed to be there. The document that is saved to file is longer than three lines. On each line, the extra column is the first letter of the Query column. Hence, we have A's in these three examples.
entry[0] corresponds exactly to the value in the Query column.
sep.join(item[0]) corresponds exactly to columns 3+.
Any idea why I would be getting this extra column?
Edit: I'm adding the full code for this short script.
# =============================================================================
# Code to query DBpedia for named entities.
#
# =============================================================================
import requests
import xml.etree.ElementTree as et
import csv
import os
import easygui
import re
# =============================================================================
# Default return type is XML. Others: json.
# Classes are: Resource (general), Place, Person, Work, Species, Organization
# but don't include resource as one of the
# =============================================================================
def urlBuilder(query, queryClass="unknown", returns=10):
prefix = 'http://lookup.dbpedia.org/api/search/KeywordSearch?'
#Selects the appropriate QueryClass for the url
if queryClass == 'place':
qClass = 'QueryClass=place'
elif queryClass == 'person':
qClass = 'QueryClass=person'
elif queryClass == 'org':
qClass = 'QueryClass=organization'
else:
qClass = 'QueryClass='
#Sets the QueryString
qString = "QueryString=" + str(query)
#sets the number of returns
qHits = "MaxHits=" + str(returns)
#full url
dbpURL = prefix + qClass + "&" + qString + "&" + qHits
return dbpURL
#takes a xml doc as STRING and returns an array with the name and the URI
def getdbpRecord(xmlpath):
root = et.fromstring(xmlpath)
dbpRecord = []
for child in root:
temp = []
temp.append(child[0].text)
temp.append(child[1].text)
if child[2].text is None:
temp.append("Empty")
else:
temp.append(findDates(child[2].text))
dbpRecord.append(temp)
return dbpRecord
#looks for a date with pattern: 1900-01-01 OR 01 January 1900 OR 1 January 1900
def findDates(x):
pattern = re.compile('\d{4}-\d{2}-\d{2}|\d{2}\s\w{3,9}\s\d{4}|\d{1}\s\w{3,9}\s\d{4}')
returns = pattern.findall(x)
if len(returns) > 0:
return ";".join(returns)
else:
return "None"
#%%
# =============================================================================
# Build and send get requests
# =============================================================================
print("Please select the CSV file that contains your data.")
csvfilename = easygui.fileopenbox("Please select the CSV file that contains your data.")
lookups = []
name_list = csv.reader(open(csvfilename, newline=''), delimiter=",")
for name in name_list:
lookups.append(name)
#request to get the max number of returns from the user.
temp = input("Specify the maximum number of returns desired: ")
if temp.isdigit():
maxHits = temp
else:
maxHits = 10
queries = []
print("Building queries. Please wait.")
for search in lookups:
if len(search) == 2:
queries.append([search[0], urlBuilder(query=search[0], queryClass=search[1], returns=maxHits)])
else:
queries.append([search, urlBuilder(query=search, returns=maxHits)])
responses = []
print("Gathering responses. Please wait.")
for item in queries:
response = requests.get(item[1])
data = response.content.decode("utf-8")
responses.append([item[0], data])
entities = []
missing_count = 0
for item in responses:
temp = []
if len(list(et.fromstring(item[1]))) > 0:
entities.append([item[0], getdbpRecord(item[1])])
else:
missing_count += 1
print("There are " + str(missing_count) + " entities that were not found.")
print("Please select the destination folder for the results of the VIAF lookup.")
savedir = easygui.diropenbox("Please select the destination folder for the results of the VIAF lookup.")
savefile = input("Please type the filename (including extension): ")
file = open(os.path.join(savedir, savefile), "w", encoding="utf-8")
file.write("Number of entities not found: " + str(missing_count) + "\n")
sep = "\t"
for entry in entities:
file.write(entry[0]+"\t")
for item in entry:
file.write(sep.join(item[0]))
file.write("\t")
file.write("\n")
file.close()

Saving list to a .csv file

I have made a code that opens a .csv file and takes a user input to filter it to a new list. What I am having trouble with is saving this new list to a .csv file properly.
This is my code:
#author: Joakim
from pprint import pprint
import csv
with open ('Change_in_Unemployment_2008-2014.csv') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
next(readCSV) #Removing header
result = []
found = False
user_input = input("Please enter a full/partial NUTS code to filter by: ")
for row in readCSV:
if row[0].startswith(user_input):
result.append(row)
found = True
if found == False:
print("There are no registered NUTS codes containing your input.. Please try again")
if found == True:
print("\n Successfully found ", len(result), "entries!""\n")
pprint (result)
#store data in a new csv file
Stored_path = "C:\data_STORED.csv"
file = open(Stored_path, 'w')
writer = csv.writer(file)
writer.writerow(["NUTS CODE", " Municipality", " value"])
for i in range(len(result)):
new_row = result[i]
NUTS_CODE = new_row[0]
Municipality = new_row[1]
Value = new_row[2]
writer.writerow([NUTS_CODE, Municipality])
csvfile.close()
If one runs my code with an input of : PL, one gets this list:
[['PL11', 'odzkie', '2.2'],
['PL12', 'Mazowieckie', '1.2'],
['PL21', 'Maopolskie', '2.9'],
['PL22', 'Slaskie', '2'],
['PL31', 'Lubelskie', '1.1'],
['PL32', 'Podkarpackie', '5.8'],
['PL33', 'Swietokrzyskie', '2.6'],
['PL34', 'Podlaskie', '2.7'],
['PL41', 'Wielkopolskie', '1.6'],
['PL42', 'Zachodniopomorskie', '-1.1'],
['PL43', 'Lubuskie', '1.8'],
['PL51', 'Dolnoslaskie', '0'],
['PL52', 'Opolskie', '1.3'],
['PL61', 'Kujawsko-Pomorskie', '1.6'],
['PL62', 'Warminsko-Mazurskie', '2.4'],
['PL63', 'Pomorskie', '3.1']]'
Now I would like to store this neatly into a new .csv file, but when I use the code above, I only get a couple of values repeated throughout.
What is my error?

Resources