I have a current working script that iterates through rows in a CSV file currently, but I want to configure it to only read between a range, such as index 0 - 100, but am stuck on how to accomplish this properly.
Here is my current code:
for (name, subscriber) in zip(usernames, subscriber_ids):
#do stuff with name / subscriber in selenium driver
if len(elements) == 0:
# logs information
else:
#logs information
with open('file', encoding='utf-8-sig') as csvfile:
readCSV = csv.reader(csvfile, delimiter=',')
usernames = []
subscriber_ids = []
for row in readCSV:
username = row[0]
subscriberid = row[1]
usernames.append(username)
subscriber_ids.append(subscriberid)
What I want to do is just read from row 0 - 100 or start at index 100-200 etc. Any assistance would be appreciated.
try something like this
with open("datafile") as myfile:
head = [next(myfile) for x in range(N)]
print(head)
Related
I am trying to find out which row (street name) has the most crimes in an excel spreadsheet. I have found the sum for the highest amount of crimes I just can't find the actual row that generated that many occurrences.
import os
import csv
def main():
#create and save the path to the file...
fileToRead = "C:/Users/zacoli4407/Documents/Intro_To_Scipting/Crime_Data_Set.csv"
highestNumberOfCrimes = 0
data = []
rowNumber = 0
count = 0
with open(fileToRead, 'r') as dataToRead:
dataToRead = open(fileToRead, 'r') # open the access to the file
reader = csv.reader(dataToRead) # gives the ability to read from the file
for row in reader:
if row[4].isnumeric():
if int(row[4]) > highestNumberOfCrimes:
highestNumberOfCrimes = int(row[4])
rowNumber = count
data.append([row[2],row[3],row[4],row[5]]) #row 3 has the street name I am trying to acquire
count += 1
print(highestNumberOfCrimes)
with open("crime.txt", "w") as outputFile:
outputFile.write("The highest number of crimes is: \n")
outputFile.write(str(highestNumberOfCrimes))
main()
You could do the following:
import csv
from collections import defaultdict
result = defaultdict(float)
with open(fileToRead, 'r') as dataToRead:
reader = csv.reader(dataToRead)
header = next(reader)
for row in reader:
result[row[3]] += float(row[4])
#Now to get the street with maximum number of crimes
mx = max(result, key = result.get)
print(mx)
#to get the maximum number of crimes
print(result[mx])
import csv
with open('C:/Users/dkarar/Desktop/Mapping project/RC_Mapping.csv', 'r') as file1:
with open('C:/Users/dkarar/Desktop/Mapping project/Thinclient_mapping.csv', 'r') as file2:
with open('C:/Users/dkarar/Desktop/Mapping project/output.csv', 'w') as outfile:
writer = csv.writer(outfile)
reader1 = csv.reader(file1)
reader2 = csv.reader(file2)
for row in reader1:
if not row:
continue
for other_row in reader2:
if not other_row:
continue
# if we found a match, let's write it to the csv file with the id appended
if row[1].lower() == other_row[1].lower():
new_row = other_row
new_row.append(row[0])
writer.writerow(new_row)
continue
# reset file pointer to beginning of file
file2.seek(0)
You seem to be getting at least one row where there is a single element. That's why when accessing row[1] you get an IndexError, there's only one element in the list row.
I have a csv file that I'm trying to clean up. I am trying to look at the first column and delete any rows that have anything other than chars for that row in the first column (I'm working on cleaning up rows where the first column has a ^ or . for now). It seems all my attempts either do nothing or nuke the whole csv file.
Interestingly enough, I have code that can identify the problem rows and it seems to work fine
def FindProblemRows():
with open('Data.csv') as csvDataFile:
ProblemRows = []
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
for i in range (0,length):
if data[i][0].find('^')!=-1 or data[i][0].find('.')!=-1:
ProblemRows.append(i)
return (ProblemRows)
Below I have my latest three failed attempts. Where am I going wrong and what should I change? Which of these comes closest?
'''
def Clean():
with open("Data.csv", "w", newline='') as f:
data = list(csv.reader(f))
writer = csv.writer(f)
Problems = FindProblemRows()
data = list(csv.reader(f))
length = len(data)
for row in data:
for i in Problems:
for j in range (0, length):
if row[j] == i:
writer.writerow(row)
Problems.remove(i)
def Clean():
Problems = FindProblemRows()
with open('Data.csv') as csvDataFile:
csvReader = csv.reader(csvDataFile)
data = [row for row in csv.reader(csvDataFile)]
length = len(data)
width = len(data[0])
with open("Data.csv","r") as csvFile:
csvReader = csv.reader( csvFile )
with open("CleansedData.csv","w") as csvResult:
csvWrite = csv.writer( csvResult )
for i in Problems:
for j in range (0, length):
if data[j] == i:
del data[j]
for j in range (0, length):
csvWrite.writerow(data[j])
'''
def Clean():
with open("Data.csv", 'r') as infile , open("CleansedData.csv", 'w') as outfile:
data = [row for row in infile]
for row in infile:
for column in row:
if "^" not in data[row][0]:
if "." not in data[row][0]:
outfile.write(data[row])
Update
Now I have:
def Clean():
df = pd.read_csv('Data.csv')
df = df['^' not in df.Symbol]
df = df['.' not in df.Symbol]
but I get KeyError: True
Shouldn't that work?
You should check whether the column Symbol contains any of the characters of interest. Method contains takes a regular expression:
bad_rows = df.Symbol.str.contains('[.^]')
df_clean = df[~bad_rows]
I have been working on code that takes rows from csv file and transfer them into the lists of integers for further mathematical operations. However, if a row turns out to be empty, it causes problems. Also, the user will not know which row is empty, so the solution should be general rather than pointing at a row and removing it. Here is the code:
import csv
import statistics as st
def RepresentsInt(i):
try:
int(i)
return True
except ValueError:
return False
l = []
with open('Test.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
l.append([int(r) if RepresentsInt(r) else 0 for r in row])
for row in l:
row=[x for x in row if x!=0]
row.sort()
print(row)
I've tried l=[row for row in l if row!=[]] and ...
if row==[]:
l.remove(row)
... but both do nothing, and there is no error code for either. Here is my csv file:
1,2,5,4
2,3
43,65,34,56,7
0,5
7,8,9,6,5
33,45,65,4
If I run the code, I will get [] for row 4 and 6 (which are empty).
This worked on my machine:
import csv
def RepresentsInt(i):
try:
int(i)
return True
except ValueError:
return False
l = []
with open('Test.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
l.append([int(r) for r in row if RepresentsInt(r)])
rows = [row for row in l if row]
for row in rows:
print(row)
It is unclear what you are doing with the statistics module, but the following program should you what you asked for. The pprint module is imported to make displaying the generated table easier to read. If this answer solves the problem presented in your question but you are having difficulty somewhere else, make sure you open another question targeted at the new problem.
#! /usr/bin/env python3
import csv
import pprint
def main():
table = []
# Add rows to table.
with open('Test.csv', newline='') as file:
table.extend(csv.reader(file))
# Convert table cells to numbers.
for index, row in enumerate(table):
table[index] = [int(value) if value.isdigit() else 0 for value in row]
# Remove zeros from the rows.
for index, row in enumerate(table):
table[index] = [value for value in row if value]
# Remove empty rows and display the table.
table = [row for row in table if row]
pprint.pprint(table)
if __name__ == '__main__':
main()
I am writing code which takes rows from a CSV file and transfers them into a lists of integers. However, if I leave some blank entries in the row, I get a "list index out of range" error. Here is the code:
import csv
with open('Test.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
rows = [[int(row[0]), int(row[1]),int(row[2]),int(row[3])] for row in reader]
for row in rows:
print(row)
I looked up some similar questions on this website and the best idea for the solution I got was:
rows = [[int(row[0]), int(row[1]),int(row[2]),int(row[3])] for row in reader if len(row)>1]
However, it resulted with the same error.
Thanks in advance!
The problem is that if you don't have an int or it is empty the cast will fail.
The below example inserts a zero '0' in case the value is not an int or is empty. Replace it by what you want.
You can optimize the code but this should work:
Edit: Shorter version
import csv
def RepresentsInt(s):
try:
int(s)
return True
except ValueError:
return False
l = []
with open('test.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for row in reader:
l.append([int(r) if RepresentsInt(r) else 0 for r in row])
for row in l:
print(row)