Locate columns in dataframe to graph - excel

I have an excel file with 3 columns and 60 rows. The first column is the Date but I want to put that on the x axis and plot the other 2 against it. I need help locating the 2 other columns so i can enter it in ax1.plot() and ax2.plot().
I have tried to locate it by [:,1] but that doesnt work and I have tried to locate it by the name of the column. The second column is "S&P/TSX Composite index (^GSPTSE)" and the third column is "Bitcoin CAD (BTC-CAD)"
import pandas as pd
import matplotlib.pyplot as plt
InputData = pd.read_excel('Python_assignment_InputData.xlsx')
#InputData = InputData[:15]
"""
print("a)\n", InputData,"\n")
print("b)")
InputData['Date'] = InputData.DATE.dt.year
InputData['Year'] = pd.to_datetime(InputData.Date).dt.year
"""
#ax1 = InputData.iloc[:,1]
fig, ax1 = plt.subplots()
ax2 = ax1.twinx()
ax1.plot()
ax2.plot()
ax1.set_ylabel("TSX", color = 'b')
ax2.set_ylabel("BTC", color = 'g')
ax1.set_xlabel("Year")
plt.title("Question 6")
plt.show()

Related

Plotly : How to enable text label in line graph for the last value?

I am trying to build a graph where the line graph should show the value of only the last element in some beautiful formating.
line graph with no text at end
Now the current method of the text shows for all elements and is a straight text that creates a lot of collisions with different lines in the same graph and looks clumsy.
Will be very nice to achieve something as mentioned in the below image.
desired line graph with text
This is now handled through:
legendgroup = d.name
Plot 1: All
Plot 2: Deselect GOOG in the legend and see that the marker disappears as well:
Complet code:
# imports
import pandas as pd
import plotly.express as px
# data
df = px.data.stocks()
df = df.drop('AMZN', axis = 1)
colors = px.colors.qualitative.T10
# plotly
fig = px.line(df,
x = 'date',
y = [c for c in df.columns if c != 'date'],
template = 'plotly_dark',
color_discrete_sequence = colors,
title = 'Stocks',
)
# move legend
fig.layout.legend.x = -0.3
# add traces for annotations and text for end of lines
for i, d in enumerate(fig.data):
fig.add_scatter(x=[d.x[-1]], y = [d.y[-1]],
mode = 'markers+text',
text = d.y[-1],
textfont = dict(color=d.line.color),
textposition='middle right',
marker = dict(color = d.line.color, size = 12),
legendgroup = d.name,
showlegend=False)
fig.show()

How to select specific number of colors to show in color bar from a big list ? - Matplotlib

I plotted some data which has 70 classes, so when I built the color bar it's very difficult to distinguish between each legend as shown below:
The code that I'm using is:
formation_colors = # 70 colors
formation_labels = # 70 labels
data = # the section of the entire dataset which only has 13 labels
data = data.sort_values(by='DEPTH_MD')
ztop=data.DEPTH_MD.min(); zbot=data.DEPTH_MD.max()
cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
cluster_f = np.repeat(np.expand_dims(data['Formations'].values,1), 100, 1)
fig = plt.figure(figsize=(2,10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, interpolation='none', aspect='auto', cmap = cmap_formations, vmin=0, vmax=69)
ax.set_xlabel('FORMATION')
ax.set_xticklabels(['']);
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax = cax_f,)
cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
So far, if I just change:
1. cmap_formations = colors.ListedColormap(formation_colors[0:len(formation_colors)], 'indexed')
2. cbar_f.set_ticks(range(0,len(formation_labels))); cbar_f.set_ticklabels(formation_labels)
to:
cmap_formations = colors.ListedColormap(formation_colors[0:len(data['FORMATION'].unique())], 'indexed')
cbar_f.set_ticks(range(0,len(data['FORMATION'].unique()))); cbar_f.set_ticklabels(data['FORMATION'].unique())
I get, the corresponding colors in the cbar, however the plot is no longer correct and also the legends are out of square
Thank you so much if you have any idea how to do this.
Although not explicitly mentioned in the question, I suppose data['FORMATION'] contains indices from 0 till 69 into the lists of formation_colors and formation_labels
The main problem is that data['FORMATION'] needs to be renumbered to be new indices (with numbers 0 till 12) into the new list of unique colors. np.unique(..., return_inverse=True) returns both the list of unique numbers, and the renumbering for the values.
To be able to reindex the list of colors and of labels, it helps to convert them to numpy arrays.
To make the code easier to debug, the following test uses a simple relation between the list of colors and the list of labels.
from matplotlib import pyplot as plt
from matplotlib import colors
from mpl_toolkits.axes_grid1.axes_divider import make_axes_locatable
import numpy as np
import pandas as pd
formation_colors = np.random.choice(list(colors.CSS4_COLORS), 70, replace=False) # 70 random color names
formation_labels = ['lbl_' + c for c in formation_colors] # 70 labels
formation_colors = np.asarray(formation_colors)
formation_labels = np.asarray(formation_labels)
f = np.random.randint(0, 70, 13)
d = np.sort(np.random.randint(0, 5300, 13))
data = pd.DataFrame({'FORMATION': np.repeat(f, np.diff(np.append(0, d))),
'DEPTH_MD': np.arange(d[-1])})
data = data.sort_values(by='DEPTH_MD')
ztop = data['DEPTH_MD'].min()
zbot = data['DEPTH_MD'].max()
unique_values, formation_new_values = np.unique(data['FORMATION'], return_inverse=True)
cmap_formations = colors.ListedColormap(formation_colors[unique_values], 'indexed')
cluster_f = formation_new_values.reshape(-1, 1)
fig = plt.figure(figsize=(3, 10))
ax = fig.add_subplot()
im_f = ax.imshow(cluster_f, extent=[0, 1, zbot, ztop],
interpolation='none', aspect='auto', cmap=cmap_formations, vmin=0, vmax=len(unique_values)-1)
ax.set_xlabel('FORMATION')
ax.set_xticks([])
divider_f = make_axes_locatable(ax)
cax_f = divider_f.append_axes("right", size="20%", pad=0.05)
cbar_f = plt.colorbar(im_f, cax=cax_f)
cbar_f.set_ticks(np.linspace(0, len(unique_values)-1, 2*len(unique_values)+1)[1::2])
cbar_f.set_ticklabels(formation_labels[unique_values])
plt.subplots_adjust(left=0.2, right=0.5)
plt.show()
Here is a comparison plot:

Second title of a plot/photo is the value of a column in a CSV but only the last value is used in all photos

I have a script that makes a photo that shows a basemap and where an earthquake happened. So 1 earthquake, 1 photo. The second title of each plot should be the date of the earthquake. However, only the last value, which is "2020-04-10", is used in all photos.
from shapely.geometry import Point
from geopandas import GeoDataFrame
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import os
os.chdir(r'path')
def plotPoint():
df = pd.read_csv('earthquakes.csv')
basemap = gpd.read_file('basemap.shp')
crs = "epsg:32651"
geometry = gpd.points_from_xy(df.Longitude, df.Latitude)
gdf = GeoDataFrame(df, crs=crs, geometry=geometry)
for d in df['Date'].values:
date = d
for i in range(gdf.shape[0]):
ax = basemap.plot(figsize=(15,10))
ax.axis('off')
g = gdf.iloc[i].geometry
plt.plot(g.x, g.y, marker='o', color='red', markersize=15)
title = 'Earthquakes in the ___ from 2008 to 2020'
dateInfo = str(date)
plt.suptitle(title)
plt.title(dateInfo)
plt.savefig("earthquake_{0}.png".format(i))
plotPoint()
Get the values of "Date" column
for i in df['Date'].values:
print(i)
Result
2020-04-22
2020-04-22
2020-04-21
2020-04-18
2020-04-10
Sample CSV
Latitude,Longitude,Date,Time_UTC,Depth,Depth Type,Magnitude Type,Magnitude,Region Name,Last Update,Eqid,unknown field
13.81,121.1,2020-04-22,03:19:57,10,f,mb,4.5,MINDORO, PHILIPPINES,2020-04-28 23:17,850323
13.76,120.92,2020-04-22,02:36:19,10, , M,4.2,MINDORO, PHILIPPINES,2020-04-22 03:50,850325
10.45,125.2,2020-04-21,21:43:05,10,f,mb,4.7,LEYTE, PHILIPPINES,2020-04-21 22:55,850252
6.69,125.23,2020-04-18,15:22:16,32, , M,3.6,MINDANAO, PHILIPPINES,2020-04-18 15:35,849329
5.65,126.54,2020-04-10,18:45:49,80, ,Mw,5.2,MINDANAO, PHILIPPINES,2020-04-11 06:41,846838
Changed your code, you were using date from a different for loop and that's why it picked up only the last date, you can use the Date from gdf too I'm guessing:
# for d in df['Date'].values:
# date = d
for i in range(gdf.shape[0]):
ax = basemap.plot(figsize=(15,10))
ax.axis('off')
g = gdf.iloc[i].geometry
plt.plot(g.x, g.y, marker='o', color='red', markersize=15)
title = 'Earthquakes in the ___ from 2008 to 2020'
# Added this line
date = gdf.iloc[i]['Date']
dateInfo = str(date)
plt.suptitle(title)
# Changed this line
plt.title(dateInfo)
plt.savefig("earthquake_{0}.png".format(i))
plt.show()

Why is plot returning "ValueError: could not convert string to float:" when a dataframe column of floats is being passed to the plot function?

I am trying to plot a dataframe I have created from an excel spreadsheet using either matplotlib or matplotlib and pandas ie. df.plot. However, python keeps returning a cannot convert string to float error. This is confusing since when I print the column of the dataframe it appears to be all float values.
I've tried printing the values of the dataframe column and using the pandas.plot syntax. I've also tried saving the column to a new variable.
import pandas as pd
from matplotlib import pyplot as plt
import glob
import openpyxl
import math
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.styles import Border, Side, Alignment
import seaborn as sns
import itertools
directory = 'E:\some directory'
#QA_directory = directory + '**/*COPY.xlsx'
wb = openpyxl.load_workbook(directory + '\\Calcs\\' + "excel file.xlsx", data_only = 'True')
plt.figure(figsize=(16,9))
axes = plt.axes()
plt.title('Drag Amplification', fontsize = 16)
plt.xlabel('Time (s)', fontsize = 14)
plt.ylabel('Cf', fontsize = 14)
d = pd.DataFrame()
n=[]
for sheets in wb.sheetnames:
if '2_1' in sheets and '2%' not in sheets and '44%' not in sheets:
name = sheets[:8]
print(name)
ws = wb[sheets]
data = ws.values
cols = next(data)[1:]
data = list(data)
idx = [r[0] for r in data]
data = (itertools.islice(r, 1, None) for r in data)
df = pd.DataFrame(data, index=idx, columns=cols)
df = df.dropna()
#x = df['x/l']
#y = df.Cf
print(df.columns)
print(df.Cf.values)
x=df['x/l'].values
plt.plot(x, df.Cf.values)
"""x = [wb[sheets].cell(row=row,column=1).value for row in range(1,2000) if wb[sheets].cell(row=row,column=1).value]
print(x)
Cf = [wb[sheets].cell(row=row,column=6).value for row in range(1,2000) if wb[sheets].cell(row=row,column=1).value]
d[name+ 'x'] = pd.DataFrame(x)
d[name + '_Cf'] = pd.Series(Cf, index=d.index)
print(name)"""
print(df)
plt.show()
I'm expecting a plot of line graphs with the values of x/l on the x access and Cf on the 'y' with a line for each of the relevant sheets in the workbook. Any insights as to why i am getting this error would be appreciated!

Plotly iplot() doesnt run within a function

I am trying to use iplot() within a function within Jupyter so that i can use a filter on the graph and have it change dynamically. The code works in a cell on its own like this
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker('ABBV')))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But once this is placed within a function the graph fails to load
def graph(ticker):
# Code for put by ticker
data = []
opPriceDic = priceToArray(getPuts(getOptionPricesByTicker(ticker)))
for key, values in opPriceDic.items():
trace = go.Scatter(
x = numberOfDays,
y = values,
name = 'option',
line = dict(
width = 4)
)
data.append(trace)
# Edit the layout
layout = dict(title = 'Call prices for ' ,
xaxis = dict(title = 'Days to Expiration'),
yaxis = dict(title = 'Price '),
)
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='calls For ')
But if I change the iplot() to plot() it calls the plotly API and opens a new tab with the graph displaying.
I am just wondering if anyone has noticed this before and may have come across a solution?
(if I am in the wrong area I will remove the post)
I have tried to use pandas data.reader calls to pull ticker data between a start and end date. The data.reader seems to work from within the function. In the question code, if the opPriceDic dictionary could be converted to a dataframe, then iplot() could plot it without use of layout and fig as below:
# Import libraries
import datetime
from datetime import date
import pandas as pd
import numpy as np
from plotly import __version__
%matplotlib inline
import cufflinks as cf
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
init_notebook_mode(connected=True)
cf.go_offline()
# Create function that uses data.reader and iplot()
def graph(ticker):
# create sample data set
start = datetime.datetime(2006, 1, 1)
end = datetime.datetime(2016, 1, 1)
df = data.DataReader(ticker, 'morningstar', start, end)
df = df.reset_index()
df['numberOfDays'] = df.apply(lambda x: abs((datetime.datetime.now() - x['Date']).days), axis=1)
# call iplot within the function graph()
df.iplot(kind='line', x='numberOfDays', y='Close', xTitle='Days', yTitle='Value', title='Prices', width=4)

Resources