Reading CSV file Into a Dictionary (KeyError: '","') - python-3.x

I want to read a CSV file into a Dictionary, here is my code:
import csv
import sys
with open('/Users/m/based_final.csv',mode='r') as my_input_file:
csv_reader=csv.DictReader(my_input_file)
line_count=0
for row in csv_reader:
if line_count == 0:
print(str.format('Column names are {",".join(row)}'))
line_count += 1
print(str.format('\t{row["Indication"]} {row["Name"]} {row["Drug id"]} {row["Synonyms"]} '))
line_count += 1
print('Processed {line_count} lines.')
But I get this error:
KeyError: '","'.
Here is how data looks like:
Indication Name Drug id Synonyms
for_treatment bivalirudin ['db00006', 'btd00076'] ['bivalirudina', 'bivalirudinum', 'hirulog']
for_alteplase a name ['db00009', 'btd00050', 'biod00050'] ['alteplase (genetical recombination)', 'alteplase, recombinant']
Any idea how this can be fixed?
Thx

Here is your code fixed with comments telling what I changed, however, you need to change the first line in the file to work with this code. The CVS file's first line needs to be formatted. each key that you want in your dictionary needs to have a coma after it. There can not be any space surrounding the coma either.
import csv
import sys
with open('/Users/m/based_final.csv',mode='r') as my_input_file:
csv_reader=csv.DictReader(my_input_file)
line_count=0
for row in csv_reader:
if line_count == 0:
print(str.format(f'Column names are {", ".join(row)}')) # an f added before the single quotes and a space after the coma for readibility
line_count += 1
print(str.format(f'\t{row["Indication"]} {row["Name"]} {row["Drug id"]} {row["Synonyms"]} ')) # same thing with the f
line_count += 1
print(f'Processed {line_count} lines.') # ditto

Related

cs50 Pset 6 DNA - Issue creating list

I have a code which iterates through the text, and tells me which is the maximum amount of times each dna STR is found. The only step missing to be able to match these values with the CSV file, is to store them into a list, BUT I AM NOT ABLE TO DO SO. When I run the code, the maximum values are printed independently for each STR sequence.
I have tried to "append" the values into a list, but I was not successful, thus, I cannot match it with the dna sequences of the CSV (large nor small).
Any help or advcise is greatly appreciated!
Here is my code, and the results I get with using "text 1" and "small csv":
`
import cs50
import sys
import csv
import os
if len(sys.argv) != 3:
print("Usage: python dna.py data.csv sequence.txt")
csv_db = sys.argv[1]
file_seq = sys.argv[2]
with open(csv_db, newline='') as csvfile: #with open(csv_db) as csv_file
csv_reader = csv.reader(csvfile, delimiter=',')
header = next(csv_reader)
i = 1
while i < len(header):
STR = header[i]
len_STR = len(STR)
with open(file_seq, 'r') as my_file:
file_reader = my_file.read()
counter = 0
a = 0
b = len_STR
list = []
for text in file_reader:
if file_reader[a:b] != STR:
a += 1
b += 1
else:
counter += 1
a += len_STR
b += len_STR
list.append(counter)
print(list)
i += 1
`
The problem is in place of variable "list" declaration. Every time you iterates through STRs in variable "header" you declares:
list = []
Thus, you create absolutely new variable, which stores only the length of current STR. To make a list with all STRs appended you need to declare variable "list" before the while loop and operator "print" after the while loop:
list = []
while i < len(header):
<your loop code>
print(list)
This should solve your problem.
P.S. Avoid to use "list" as a variable declaration. The "list" is a python built-in function and it is automatically declared. So, when you redeclare it, you will not be able to use list() function in your code.

Getting an IndexError: list index out of range in line no. = 19?

import csv
with open('C:/Users/dkarar/Desktop/Mapping project/RC_Mapping.csv', 'r') as file1:
with open('C:/Users/dkarar/Desktop/Mapping project/Thinclient_mapping.csv', 'r') as file2:
with open('C:/Users/dkarar/Desktop/Mapping project/output.csv', 'w') as outfile:
writer = csv.writer(outfile)
reader1 = csv.reader(file1)
reader2 = csv.reader(file2)
for row in reader1:
if not row:
continue
for other_row in reader2:
if not other_row:
continue
# if we found a match, let's write it to the csv file with the id appended
if row[1].lower() == other_row[1].lower():
new_row = other_row
new_row.append(row[0])
writer.writerow(new_row)
continue
# reset file pointer to beginning of file
file2.seek(0)
You seem to be getting at least one row where there is a single element. That's why when accessing row[1] you get an IndexError, there's only one element in the list row.

how to remove the quotations from the string in python?

the csv file returns the column value as dictionary format.but i cant get the value from dictionary by using dic.get("name") .it shows an error like ['str' object has no attribute 'get'].the actual problem is csv return the dict with quates so the python consider this as string.how to remove the quates and how can i fix it. please help!!
with open('file.csv') as file:
reader=csv.reader(file)
count=0
for idx,row in enumerate(reader):
dic=row[5]
if(idx==0):
continue
else:
print(dic.get("name"))
filename file_size file_attributes region_count region_id region_shape_attributes region_attributes
adutta_swan.jpg -1 {"caption":"Swan in lake Geneve","public_domain":"no","image_url":"http://www.robots.ox.ac.uk/~vgg/software/via/images/swan.jpg"} 1 0 {"name":"rect","x":82,"y":105,"width":356,"height":207} {"name":"not_defined","type":"unknown","image_quality":{"good":true,"frontal":true,"good_illumination":true}}
wikimedia_death_of_socrates.jpg -1 {"caption":"The Death of Socrates by David","public_domain":"yes","image_url":"https://en.wikipedia.org/wiki/The_Death_of_Socrates#/media/File:David_-_The_Death_of_Socrates.jpg"} 3 0 {"name":"rect","x":174,"y":139,"width":108,"height":227} {"name":"Plato","type":"human","image_quality":{"good_illumination":true}}
wikimedia_death_of_socrates.jpg -1 {"caption":"The Death of Socrates by David","public_domain":"yes","image_url":"https://en.wikipedia.org/wiki/The_Death_of_Socrates#/media/File:David_-_The_Death_of_Socrates.jpg"} 3 1 {"name":"rect","x":347,"y":114,"width":91,"height":209} {"name":"Socrates","type":"human","image_quality":{"frontal":true,"good_illumination":true}}
wikimedia_death_of_socrates.jpg -1 {"caption":"The Death of Socrates by David","public_domain":"yes","image_url":"https://en.wikipedia.org/wiki/The_Death_of_Socrates#/media/File:David_-_The_Death_of_Socrates.jpg"} 3 2 {"name":"ellipse","cx":316,"cy":180,"rx":17,"ry":12} {"name":"Hemlock","type":"cup"}
Use DictReader which reads the csv as a dictionary!
import csv
import json
with open('graph.csv') as file:
#Read csv as dictreader
reader=csv.DictReader(file)
count=0
#Iterate through rows
for idx,row in enumerate(reader):
#Load the string as a dictionary
region_shape_attributes = json.loads(row['region_shape_attributes'])
print(region_shape_attributes['name'])
`import csv
import ast
with open('file.csv') as file:
#Read csv as dictreader
reader=csv.DictReader(file)
count=0
#Iterate through rows
for idx,row in enumerate(reader):
#print(row)
row_5=row['region_shape_attributes']
y=ast.literal_eval(row_5)
print(y.get("name"))`
this code also work to me

how to read data from csv to feed them to cnn?

I have the following csv structure:
image_id, class_name, color, browneyes, feature2, feature3, feature4
for example:
429759,dog,black,1,0,0,husky
352456,cat,white,0,0,0,any
how can i read the csv file so for each row it reads the image file and feeds it to the model? (the image_id is the image filename)
you haven't mentioned what language you code in your question, but since you've mentioned Python in question's tags I show you an example in python
in python there is a nice python built-in module named csv that you can use it for working with CSV files
see the code below
CSV sample
name,department,birthday month
John Smith,Accounting,November
Erica Meyers,IT,March
Code
import csv
with open('employee_birthday.txt') as csv_file:
csv_reader = csv.reader(csv_file, delimiter=',')
line_count = 0
for row in csv_reader:
if line_count == 0:
print(f'Column names are {", ".join(row)}')
line_count += 1
else:
print(f'\t{row[0]} works in the {row[1]} department, and was born in {row[2]}.')
line_count += 1
print(f'Processed {line_count} lines.')
reference : https://realpython.com/python-csv/
official docs : https://docs.python.org/3/library/csv.html

Creating a dictionary to count the number of occurrences of Sequence IDs

I'm trying to write a function to count the number of each sequence ID that occurs in this file (it's a sample blast file)
The picture above is the input file I'm dealing with.
def count_seq(input):
dic1={}
count=0
for line in input:
if line.startswith('#'):
continue
if line.find('hits found'):
line=line.split('\t')
if line[1] in dic1:
dic1[line]+=1
else:
dic1[line]=1
return dic1
Above is my code which when called just returns empty brackets {}
So I'm trying to count how many times each of the sequence IDs (second element of last 13 lines) occur eg: FO203510.1 occurs 4 times.
Any help would be appreciated immensely, thanks!
Maybe this is what you're after:
def count_seq(input_file):
dic1={}
with open(input_file, "r") as f:
for line in f:
line = line.strip()
if not line.startswith('#'):
line = line.split()
seq_id = line[1]
if not seq_id in dic1:
dic1[seq_id] = 1
else:
dic1[seq_id] += 1
return dic1
print(count_seq("blast_file"))
This is a fitting case for collections.defaultdict. Let f be the file object. Assuming the sequences are in the second column, it's only a few lines of code as shown.
from collections import defaultdict
d = defaultdict(int)
seqs = (line.split()[1] for line in f if not line.strip().startswith("#"))
for seq in seqs:
d[seq] += 1
See if it works!

Resources