Python 3 ~ How to obtain same result using CSV module - python-3.x

I have this in my code:
with file:
csv = csv.reader(file,delimiter=",")
df = pd.read_csv(argv[1])
data = df.loc[:, df.columns != 'name'].to_numpy().tolist()
data.insert(0, df["name"].tolist())
and it output result is this when i run print(data):
[['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]
I would love to know how i can obtain this same result using CSV module or for loops
content:
name,AGATC,AATG,TATC
Alice,2,8,3
Bob,4,1,5
Charlie,3,2,5

import csv
with open('file.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile)
next(csvreader) # skip headers
names = []
data = []
for row in csvreader:
names.append(row[0])
data.append(list(map(int, row[1:])))
print([names] + data)
Prints:
[['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]
EDIT:
import csv
with open('file.csv', newline='') as csvfile:
csvreader = csv.reader(csvfile)
next(csvreader) # skip headers
names = []
data = []
for row in csvreader:
names.append(row[0])
data.append(list(map(int, row[1:])))
all_data = [names] + data
print(all_data[0])
print(all_data[1])
Prints:
['Alice', 'Bob', 'Charlie']
[2, 8, 3]

Would that work for you?
import csv
from sys import argv
def csv_to_list_of_lists(csv_file):
with open(csv_file) as f:
reader = csv.reader(f, delimiter=',')
number_of_columns = len(next(reader))#Notice how next() enables to skip the header row
data = [[] for x in range(number_of_columns)]
for index, line in enumerate(reader):
data[0].append(line[0])
values = [int(x) for x in line[1:]]
data[index+1] = values
return data
output = csv_to_list_of_lists(argv[1])
print(output)
result:
[['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]

Related

add_scatter and custom data

How to add fig.add_scatter with hover data that is in the hover label?
The minimal code is not working.
I need to add another set of data with the same hover template as the first one.
Many thanks
import numpy as np
import pandas as pd
a, b, c = [1, 2], [1, 5], [5, 6]
d, e, f = [5, 5], [4, 4], [5, 5]
s1 = ['A', 'F']
s2 = ['V', 'T']
d = {'a': a, 'b': b, 'c': c, 's1':s1}
df = pd.DataFrame(data=d)
d2 = {'d': d, 'e': e, 'f': f, 's2':s2}
df2 = pd.DataFrame(data=d2)
fig = px.scatter(df, x='a', y='b', hover_data=['c', 's1'], color='s1', color_discrete_sequence=["green", "navy"])
fig.add_scatter(x=df2['d'], y=df2['e'], customdata=['f', 's2'], mode="markers", marker=dict(size=10,color='Purple'), name = 'A') # ------> these custom data are not in label, there is just %{customdata[1]}
fig.update_traces(
hovertemplate="<br>".join([
"<b>G:</b> %{x:.3f}",
"<b>R:</b> %{y:.6f}<extra></extra>",
"<b>D:</b> %{customdata[1]}",
"<b>E:</b> %{customdata[0]}",
])
)
fig.update_xaxes(title_font_family="Trebuchet")
fig.update_traces(marker=dict(size=9),
selector=dict(mode='markers'))
fig.show()
There are errors in creating df2. Have assumed what you are trying to achieve. Below makes hovertext work.
import numpy as np
import pandas as pd
a, b, c = [1, 2], [1, 5], [5, 6]
d, e, f = [5, 5], [4, 4], [5, 5]
s1 = ["A", "F"]
s2 = ["V", "T"]
d = {"a": a, "b": b, "c": c, "s1": s1}
df = pd.DataFrame(data=d)
d2 = {"d": d, "e": e, "f": f, "s2": s2}
# SO question invalid !!!
# df2 = pd.DataFrame(data=d2)
# try this
df2 = pd.DataFrame(d).join(pd.DataFrame({k:v for k,v in d2.items() if k!="d"}))
fig = px.scatter(
df,
x="a",
y="b",
hover_data=["c", "s1"],
color="s1",
color_discrete_sequence=["green", "navy"],
)
fig.add_scatter(
x=df2["a"],
y=df2["e"],
customdata=df2.loc[:,["f", "s2"]].values.reshape([len(df2),2]),
mode="markers",
marker=dict(size=10, color="Purple"),
name="A",
) # ------> these custom data are not in label, there is just %{customdata[1]}
fig.update_traces(
hovertemplate="<br>".join(
[
"<b>G:</b> %{x:.3f}",
"<b>R:</b> %{y:.6f}<extra></extra>",
"<b>D:</b> %{customdata[1]}",
"<b>E:</b> %{customdata[0]}",
]
)
)
fig.update_xaxes(title_font_family="Trebuchet")
fig.update_traces(marker=dict(size=9), selector=dict(mode="markers"))
fig.show()

Python 3 ~ How to take rows from a csv file and put them into a list

I would like to know how to take this file:
name,AGATC,AATG,TATC
Alice,2,8,3
Bob,4,1,5
Charlie,3,2,5
and put it in a list like the following:
[['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]
I'm fairly new to python so excuse me
my current code looks like this:
file = open(argv[1] , "r")
file1 = open(argv[2] , "r")
text = file1.read()
strl = []
with file:
csv = csv.reader(file,delimiter=",")
for row in csv:
strl = row[1:9]
break
df = pd.read_csv(argv[1],header=0)
df = [df[col].tolist() for col in df.columns]
ignore the strl part its for something else unrelated
but it outputs like this:
[['Alice', 'Bob', 'Charlie'], [2, 4, 3], [8, 1, 2], [3, 5, 5]]
i want it to output like this:
[['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]
i would like it to output like the above sample
Using pandas
In [13]: import pandas as pd
In [14]: df = pd.read_csv("a.csv",header=None)
In [15]: df
Out[15]:
0 1 2 3
0 Alice 2 8 3
1 Bob 4 1 5
2 Charlie 3 2 5
In [16]: [df[col].tolist() for col in df.columns]
Out[16]: [['Alice', 'Bob', 'Charlie'], [2, 4, 3], [8, 1, 2], [3, 5, 5]]
Update:
In [51]: import pandas as pd
In [52]: df = pd.read_csv("a.csv",header=None)
In [53]: data = df[df.columns[1:]].to_numpy().tolist()
In [57]: data.insert(0,df[0].tolist())
In [58]: data
Out[58]: [['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]
Update:
In [51]: import pandas as pd
In [52]: df = pd.read_csv("a.csv")
In [94]: df
Out[94]:
name AGATC AATG TATC
0 Alice 2 8 3
1 Bob 4 1 5
2 Charlie 3 2 5
In [97]: data = df.loc[:, df.columns != 'name'].to_numpy().tolist()
In [98]: data.insert(0, df["name"].tolist())
In [99]: data
Out[99]: [['Alice', 'Bob', 'Charlie'], [2, 8, 3], [4, 1, 5], [3, 2, 5]]

Export data to empty cell row Openpyxl

I am trying to automate number groupings of several lists by exporting the data to ms excel using openpyxl. The output is a list of lists with two sets of numbers per element, the first set being the matched number (0 to 99), and the second is the index number where they matched.
def variable_str_to_list_pairs_overlapping (str):
return [''.join(pair) for pair in zip(str[:-1], str[1:])]
list1 = variable_str_to_list_pairs_overlapping (list1)
list2 = variable_str_to_list_pairs_overlapping (list2)
lst_result = []
for i in range(len(list2)):
if list1[i] == list2[i]:
data = [list1[i], i]
data[0] = int(list1[i])
lst_result.append(data)
print(lst_result)
Output:
[[7, 265], [8, 281], [2, 303], [8, 332], [7, 450], [1, 544], [0,
737], [9, 805], [2, 970], [4, 1103], [4, 1145], [8, 1303], [1,
1575], [4, 1592], [2, 1593], [3, 1948], [4, 2200], [5, 2419], [3,
2464], [9, 2477], [1, 2529], [6, 2785], [2, 2842], [8, 2843], [7,
2930], [3, 2991], [8, 3096], [3, 3248], [2, 3437], [7, 3438], [8,
3511], [0, 3522], [0, 3523], [5, 3590], [6, 3621], [1, 3622], [2,
3671], [6, 3835], [7, 3876]]
I'm looking to export the data to excel in such a way that the first element is assigned as the row index and the second as the value inside the cell
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
dest_filename = 'openpyxltest.xlsx'
for x in lst_result:
ws.cell(row = x[0] + 1, column = +2).value = x[1]
wb.save(filename = dest_filename)
Actual Output:
Desired Output:
What do I need to change in my code? Thank you in advance for the help. you guys are awesome! :)
You are overwriting the cells - you never adjust the column you write into - so it gets overwritten by later writes...
You could solve this by using a defaultdict(list) to collect all values of one key into a list, sort it and then create the xlsx from the dictionary like so:
lst_result = [[7, 265], [8, 281], [2, 303], [8, 332], [7, 450], [1, 544], [0,
737], [9, 805], [2, 970], [4, 1103], [4, 1145], [8, 1303], [1,
1575], [4, 1592], [2, 1593], [3, 1948], [4, 2200], [5, 2419], [3,
2464], [9, 2477], [1, 2529], [6, 2785], [2, 2842], [8, 2843], [7,
2930], [3, 2991], [8, 3096], [3, 3248], [2, 3437], [7, 3438], [8,
3511], [0, 3522], [0, 3523], [5, 3590], [6, 3621], [1, 3622], [2,
3671], [6, 3835], [7, 3876]]
from collections import defaultdict
# group all datapoints by 1st value
grpd_data = defaultdict(list)
for k,v in lst_result:
grpd_data[k].append(v)
# sort all grouped values (not needed here, as inputs are sorted)
# for l in grpd_data:
# grpd_data[l].sort()
grpd_data looks like:
# defaultdict(<type 'list'>, {0: [737, 3522, 3523], 1: [544, 1575, 2529, 3622],
# 2: [303, 970, 1593, 2842, 3437, 3671], 3: [1948, 2464, 2991, 3248],
# 4: [1103, 1145, 1592, 2200], 5: [2419, 3590], 6: [2785, 3621, 3835],
# 7: [265, 450, 2930, 3438, 3876], 8: [281, 332, 1303, 2843, 3096, 3511],
# 9: [805, 2477]})
Then create the workbook:
from openpyxl import Workbook
wb = Workbook()
ws = wb.active
dest_filename = 'openpyxltest.xlsx'
for x,data in grpd_data.items():
ws.cell(row = x + 1, column = 1).value = x
for col,d in enumerate(data,2):
ws.cell(row = x + 1, column = col).value = d
wb.save(filename = dest_filename)
Output:
See:
enumerate
How does collections.defaultdict work?

Making a special sort of dictionary

I have a python program in which I have a list which resembles the list below:
a = [[1,2,3], [4,2,7], [5,2,3], [7,8,5]]
Here I want to create a dictionary using the middle value of each sublist as keys which should look something like this:
b = {2:[[1,2,3], [4,2,7], [5,2,3]], 8: [[7,8,5]]}
How can I achieve this?
You can do it simply like this:
a = [[1,2,3], [4,2,7], [5,2,3], [7,8,5]]
b = {}
for l in a:
m = l[len(l) // 2] # : get the middle element
if m in b:
b[m].append(l)
else:
b[m] = [l]
print(b)
Output:
{2: [[1, 2, 3], [4, 2, 7], [5, 2, 3]], 8: [[7, 8, 5]]}
You could also use a defaultdict to avoid the if in the loop:
from collections import defaultdict
b = defaultdict(list)
for l in a:
m = l[len(l) // 2]
b[m].append(l)
print(b)
Output:
defaultdict(<class 'list'>, {2: [[1, 2, 3], [4, 2, 7], [5, 2, 3]], 8: [[7, 8, 5]]})
Here is a solution that uses dictionary comprehension:
from itertools import groupby
a = [[1,2,3], [4,2,7], [5,2,3], [7,8,5]]
def get_mid(x):
return x[len(x) // 2]
b = {key: list(val) for key, val in groupby(sorted(a, key=get_mid), get_mid)}
print(b)

Python Pandas - Update row with dictionary based on index, column

I have a dataframe with empty columns and a corresponding dictionary which I would like to update the empty columns with based on index, column:
import pandas as pd
import numpy as np
dataframe = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [4, 6, 2], [3, 4, 1]])
dataframe.columns = ['x', 'y', 'z']
additional_cols = ['a', 'b', 'c']
for col in additional_cols:
dataframe[col] = np.nan
x y z a b c
0 1 2 3
1 4 5 6
2 7 8 9
3 4 6 2
4 3 4 1
for row, column in x.iterrows():
#caluclations to return dictionary y
y = {"a": 5, "b": 6, "c": 7}
df.loc[row, :].map(y)
Basically after performing the calculations using columns x, y, z I would like to update columns a, b, c for that same row :)
I could use a function as such but as far as the pandas library and a method for the DataFrame object I am not sure...
def update_row_with_dict(dictionary, dataframe, index):
for key in dictionary.keys():
dataframe.loc[index, key] = dictionary.get(key)
The above answer with correct indent
def update_row_with_dict(df,d,idx):
for key in d.keys():
df.loc[idx, key] = d.get(key)
more short would be
def update_row_with_dict(df,d,idx):
df.loc[idx,d.keys()] = d.values()
for your code snipped the syntax would be:
import pandas as pd
import numpy as np
dataframe = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9], [4, 6, 2], [3, 4, 1]])
dataframe.columns = ['x', 'y', 'z']
additional_cols = ['a', 'b', 'c']
for col in additional_cols:
dataframe[col] = np.nan
for idx in dataframe.index:
y = {'a':1,'b':2,'c':3}
update_row_with_dict(dataframe,y,idx)

Resources