how to assign specific color to sunburst chart - colors

how to assign specific color to sunburst chart
I have date in excel with colored cells
I want to let sunburst chart pickup colors from the cells
`
from pathlib import Path
import pandas as pd
import plotly
import plotly.express as px
from tkinter import messagebox
import plotly.graph_objects as go
import openpyxl
# Define excel file location
excel_file = Path(__file__).parent / "data.xlsx"
# Read Data from Excel
df = pd.read_excel(excel_file)
# Assign columns to variables
G1 = df["G1"]
G2 = df["G2"]
G3 = df["G3"]
G4 = df["G4"]
G5 = df["G5"]
G6 = df["G6"]
G7 = df["G7"]
G8 = df["G8"]
G9 = df["G9"]
G10 = df["G10"]
fig= px.sunburst(df ,path=[G1,G2,G3,G4,G5,G6,G7,G8,G9,G10] )
output_path = Path(__file__).parent / "Treemap_xx.html"
plotly.offline.plot(fig, filename=str(output_path))
messagebox.showinfo("showinfo", "Done")
`
Data(https://i.stack.imgur.com/FohO8.png)
Plot(https://i.stack.imgur.com/EaJEs.png)
Please fix the code to get done

Related

Is there a method to change the number format of 100 excel files with 20 sheets each from 2 decimal to 6 decimal using Python?

import pandas as pd
import os
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile
path=os.getcwd()
files=os.listdir(path)
for i in range(len(files)):
filename = files[i]
filepath=os.path.join(path,filename)
print(filepath)
df=pd.ExcelFile(filepath) # read sheet name
sheet = df.sheet_names
print(sheet)
df=pd.read_excel(filepath, sheet_name=sheet,skiprows = 5, nrows=15, usecols = 'E:L')
print(df)
when I click on the number it show 6 digits in the header but I want to change it into the excel also
import openpyxl
wb = openpyxl.load_workbook('ds.xlsx')
ws = wb.active
for row in ws.iter_rows():
for cell in row:
# only relevant column and without header
if cell.column_letter == 'A' and cell.row > 1: # put the cell number from where to where you want to check
ws[cell.coordinate].number_format = '0.0000000' # it will change the number format
wb.save('ds2.xlsx')

Changing the values of a dict in lowercase ( values are code colors ) to be accepted as a color parametrer in plotly.graph.object

So, I'm trying to get the colors from the dictionary 'Disaster_type' to draw the markers in geoscatters depending of the type of disaster.
Basically, I want to reprensent in the graphic the natural diasasters with it's color code. eg; it's is a volcanic activity paint it 'orange'. I want to change the size of the marker as well depending of the magnitude of the disaster, but that's for another day.
here's the link of the dataset: https://www.kaggle.com/datasets/brsdincer/all-natural-disasters-19002021-eosdis
import plotly.graph_objects as go
import pandas as pd
import plotly as plt
df = pd.read_csv('1900_2021_DISASTERS - main.csv')
df.head()
df.tail()
disaster_set = {disaster for disaster in df['Disaster Type']}
disaster_type = {'Storm':'aliceblue',
'Volcanic activity':'orange',
'Flood':'royalblue',
'Mass movement (dry)':'darkorange',
'Landslide':'#C76114',
'Extreme temperature':'#FF0000',
'Animal accident':'gray55',
'Glacial lake outburst':'#7D9EC0',
'Earthquake':'#CD8C95',
'Insect infestation':'#EEE8AA',
'Wildfire':' #FFFF00',
'Fog':'#00E5EE',
'Drought':'#FFEFD5',
'Epidemic':'#00CD66 ',
'Impact':'#FF6347'}
# disaster_type_lower = {(k, v.lower()) for k, v in disaster_type.items()}
# print(disaster_type_lower)
# for values in disaster_type.values():
# disaster_type[values] = disaster_type.lowercase()
fig = go.Figure(data=go.Scattergeo(
lon = df['Longitude'],
lat = df['Latitude'],
text = df['Country'],
mode = 'markers',
marker_color = disaster_type_.values()
)
)
fig.show()
I cant figure how, I've left in comments after the dict how I tried to do that.
It changes them to lowercase, but know I dont know hot to get them...My brain is completly melted
it's a simple case of pandas map
found data that appears same as yours on kaggle so have used that
one type is unmapped Extreme temperature so used a fillna("red") to remove any errors
gray55 gave me an error so replaced it with RGB equivalent
import kaggle.cli
import sys
import pandas as pd
from zipfile import ZipFile
import urllib
import plotly.graph_objects as go
# fmt: off
# download data set
url = "https://www.kaggle.com/brsdincer/all-natural-disasters-19002021-eosdis"
sys.argv = [sys.argv[0]] + f"datasets download {urllib.parse.urlparse(url).path[1:]}".split(" ")
kaggle.cli.main()
zfile = ZipFile(f'{urllib.parse.urlparse(url).path.split("/")[-1]}.zip')
dfs = {f.filename: pd.read_csv(zfile.open(f)) for f in zfile.infolist()}
# fmt: on
df = dfs["DISASTERS/1970-2021_DISASTERS.xlsx - emdat data.csv"]
disaster_type = {
"Storm": "aliceblue",
"Volcanic activity": "orange",
"Flood": "royalblue",
"Mass movement (dry)": "darkorange",
"Landslide": "#C76114",
"Extreme temperature": "#FF0000",
"Animal accident": "#8c8c8c", # gray55
"Glacial lake outburst": "#7D9EC0",
"Earthquake": "#CD8C95",
"Insect infestation": "#EEE8AA",
"Wildfire": " #FFFF00",
"Fog": "#00E5EE",
"Drought": "#FFEFD5",
"Epidemic": "#00CD66 ",
"Impact": "#FF6347",
}
fig = go.Figure(
data=go.Scattergeo(
lon=df["Longitude"],
lat=df["Latitude"],
text=df["Country"],
mode="markers",
marker_color=df["Disaster Type"].map(disaster_type).fillna("red"),
)
)
fig.show()

Locate columns in dataframe to graph

I have an excel file with 3 columns and 60 rows. The first column is the Date but I want to put that on the x axis and plot the other 2 against it. I need help locating the 2 other columns so i can enter it in ax1.plot() and ax2.plot().
I have tried to locate it by [:,1] but that doesnt work and I have tried to locate it by the name of the column. The second column is "S&P/TSX Composite index (^GSPTSE)" and the third column is "Bitcoin CAD (BTC-CAD)"
import pandas as pd
import matplotlib.pyplot as plt
InputData = pd.read_excel('Python_assignment_InputData.xlsx')
#InputData = InputData[:15]
"""
print("a)\n", InputData,"\n")
print("b)")
InputData['Date'] = InputData.DATE.dt.year
InputData['Year'] = pd.to_datetime(InputData.Date).dt.year
"""
#ax1 = InputData.iloc[:,1]
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot()
ax2.plot()
ax1.set_ylabel("TSX", color = 'b')
ax2.set_ylabel("BTC", color = 'g')
ax1.set_xlabel("Year")
plt.title("Question 6")
plt.show()

Why is plot returning "ValueError: could not convert string to float:" when a dataframe column of floats is being passed to the plot function?

I am trying to plot a dataframe I have created from an excel spreadsheet using either matplotlib or matplotlib and pandas ie. df.plot. However, python keeps returning a cannot convert string to float error. This is confusing since when I print the column of the dataframe it appears to be all float values.
I've tried printing the values of the dataframe column and using the pandas.plot syntax. I've also tried saving the column to a new variable.
import pandas as pd
from matplotlib import pyplot as plt
import glob
import openpyxl
import math
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Border, Side, Alignment
import seaborn as sns
import itertools
directory = 'E:\some directory'
#QA_directory = directory + '**/*COPY.xlsx'
wb = openpyxl.load_workbook(directory + '\\Calcs\\' + "excel file.xlsx", data_only = 'True')
plt.figure(figsize=(16,9))
axes = plt.axes()
plt.title('Drag Amplification', fontsize = 16)
plt.xlabel('Time (s)', fontsize = 14)
plt.ylabel('Cf', fontsize = 14)
d = pd.DataFrame()
n=[]
for sheets in wb.sheetnames:
if '2_1' in sheets and '2%' not in sheets and '44%' not in sheets:
name = sheets[:8]
print(name)
ws = wb[sheets]
data = ws.values
cols = next(data)[1:]
data = list(data)
idx = [r[0] for r in data]
data = (itertools.islice(r, 1, None) for r in data)
df = pd.DataFrame(data, index=idx, columns=cols)
df = df.dropna()
#x = df['x/l']
#y = df.Cf
print(df.columns)
print(df.Cf.values)
x=df['x/l'].values
plt.plot(x, df.Cf.values)
"""x = [wb[sheets].cell(row=row,column=1).value for row in range(1,2000) if wb[sheets].cell(row=row,column=1).value]
print(x)
Cf = [wb[sheets].cell(row=row,column=6).value for row in range(1,2000) if wb[sheets].cell(row=row,column=1).value]
d[name+ 'x'] = pd.DataFrame(x)
d[name + '_Cf'] = pd.Series(Cf, index=d.index)
print(name)"""
print(df)
plt.show()
I'm expecting a plot of line graphs with the values of x/l on the x access and Cf on the 'y' with a line for each of the relevant sheets in the workbook. Any insights as to why i am getting this error would be appreciated!

How can I find out which operands are supported before looping through pandas dataframe?

I am attempting to iterate over rows in a Series within a Pandas DataFrame. I would like to take the value in each row of the column csv_df['Strike'] and plug it into variable K, which gets called in function a.
Then, I want the output a1 and a2 to be put into their own columns within the DataFrame.
I am receiving the error: TypeError: unsupported operand type(s) for *: 'int' and 'zip', and I figure that if I can find out which operands are supported, I could convert a1 and a2to that.
Am I thinking about this correctly?
Note: S is just a static number as the df is just one row, while K has many rows.
Code is below:
from scipy.stats import norm
from math import sqrt, exp, log, pi
import pandas as pd
pd.core.common.is_list_like = pd.api.types.is_list_like
import fix_yahoo_finance as yf
yf.pdr_override()
import numpy as np
import datetime
from pandas_datareader import data, wb
import matplotlib.pyplot as plt
#To get data:
start = datetime.datetime.today()
end = datetime.datetime.today()
df = data.get_data_yahoo('AAPL', start, end) #puts data into a pandas dataframe
csv_df = pd.read_csv('./AAPL_TEST.csv')
for row in csv_df.itertuples():
def a(S, K):
a1 = 100 * K
a2 = S
return a1
S = df['Adj Close'].items()
K = csv_df['strike'].items()
a1, a2 = a(S, K)
df['new'] = a1
df['new2'] = a2
It seems an alternate way of doing what you want would be to apply your method to each data frame separately, as in:
df = data.get_data_yahoo('AAPL', start, end)
csv_df = pd.read_csv('./AAPL_TEST.csv')
df['new'] = csv_df['strike'].apply(lambda x: 100 * x)
df['new2'] = df['Adj Close']
Perhaps, applying the calculation directly to the Pandas Series (a column of your data frame) is a way to avoid defining a method that is used only once.
Plus, I wouldn't define a method within a loop as you have.
Cheers
ps. I believe you have forgotten to return both values in your method.
def a(S, K):
a1 = 100 * K
a2 = S
return (a1, a2)

Resources