I'm using plotly, plotly express and plotly figure factory to plot U.S data. But I have an issue with color band.
1- Can I add a different color band from the original one? like a new type which is not based on the graph?
2- I want a gradient one but it shows a discrete one.
I would appreciate if anyone can help me.
=========================
import plotly.figure_factory as ff
import numpy as np
import pandas as pd
df_sample = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/laucnty16.csv')
df_sample['State FIPS Code'] = df_sample['State FIPS Code'].apply(lambda x: str(x).zfill(2))
df_sample['County FIPS Code'] = df_sample['County FIPS Code'].apply(lambda x: str(x).zfill(3))
df_sample['FIPS'] = df_sample['State FIPS Code'] + df_sample['County FIPS Code']
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
"#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
"#08519c","#0b4083","#08306b"]
endpts = list(np.linspace(1, 12, len(colorscale) - 1))
fips = df_sample['FIPS'].tolist()
values = df_sample['Unemployment Rate (%)'].tolist()
fig = ff.create_choropleth(
fips=fips, values=values,
binning_endpoints=endpts,
colorscale=colorscale,
show_state_data=False,
show_hover=True, centroid_marker={'opacity': 0},
asp=2.9, title='USA by Unemployment %',
legend_title='% unemployed'
)
fig.layout.template = None
fig.write_image("fig1.png", scale=20)
fig.show()
=====================================enter image description here
I recommend moving to https://plotly.com/python/mapbox-county-choropleth/
figure generation is quicker
uses a continuous color scale as you require
provides much more flexible zoom and pan
data prep
import requests
import urllib
from pathlib import Path
from zipfile import ZipFile
import geopandas as gpd
import pandas as pd
import plotly.express as px
# get geometry data as a geopandas dataframe
# fmt: off
src = [{"name": "counties", "suffix": ".shp", "url": "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_20m.zip",},]
data = {}
for s in src:
f = Path.cwd().joinpath(urllib.parse.urlparse(s["url"]).path.split("/")[-1])
if not f.exists():
r = requests.get(s["url"],stream=True,)
with open(f, "wb") as fd:
for chunk in r.iter_content(chunk_size=128): fd.write(chunk)
fz = ZipFile(f)
fz.extractall(f.parent.joinpath(f.stem))
data[s["name"]] = gpd.read_file(f.parent.joinpath(f.stem).joinpath([f.filename for f in fz.infolist() if Path(f.filename).suffix == s["suffix"]][0])).assign(source_name=s["name"])
gdf = pd.concat(data.values()).to_crs("EPSG:4326")
df_sample = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/laucnty16.csv')
df_sample['State FIPS Code'] = df_sample['State FIPS Code'].apply(lambda x: str(x).zfill(2))
df_sample['County FIPS Code'] = df_sample['County FIPS Code'].apply(lambda x: str(x).zfill(3))
df_sample['FIPS'] = df_sample['State FIPS Code'] + df_sample['County FIPS Code']
gdf2 = pd.merge(gdf, df_sample, left_on=["STATEFP","COUNTYFP"], right_on=["State FIPS Code","County FIPS Code"])
plot
colorscale = ["#f7fbff","#ebf3fb","#deebf7","#d2e3f3","#c6dbef","#b3d2e9","#9ecae1",
"#85bcdb","#6baed6","#57a0ce","#4292c6","#3082be","#2171b5","#1361a9",
"#08519c","#0b4083","#08306b"]
# build choropleth
fig = px.choropleth_mapbox(
gdf2,
geojson=gdf2.geometry,
locations=gdf2.index,
color="Unemployment Rate (%)",
color_continuous_scale=colorscale,
hover_data=["NAME", "FIPS"]
).update_layout(
mapbox={"style": "white-bg", "zoom": 1.5, "center": {"lat": 50, "lon": -120}},
margin={"l":0,"r":0,"t":30,"b":0},
title='USA by Unemployment %',
coloraxis_colorbar_title="Rate"
)
fig
Related
I am a bery beginner with plotly graphing library, that is definetely an awesome tool for vizualisation and plot beautiful maps.
On spyder 5, I am developping to plot a spatial heat map according to the pd.Dataframe that I have inferred from my raw data .
import plotly.io as pio
import plotly.express as px
#from plotly.offline import plot
import pandas as pd
from netCDF4 import Dataset, num2date
import numpy as np
import numpy.ma as ma
pio.renderers.default = "svg"
nc_filename = '20220809_dataset-sargassum.nc'
#%% Ouverture du fichier & extraction de variables
if __name__ == '__main__':
nc = Dataset(nc_filename, 'r')
latitude = nc.variables['latitude'][:]
longitude = nc.variables['longitude'][:]
fai = nc.variables['fai_anomaly_daily_mean']
fai_data = fai[:]
fill_value= fai._FillValue
time = nc.variables['time']
time = num2date(time[0], time.units)
#%% Construction du Dataframe
x, y = np.meshgrid(ma.getdata(longitude), ma.getdata(latitude))
x = x.flatten()
y = y.flatten()
fai = ma.getdata(fai).flatten()
FAI_df = pd.DataFrame({'latitude':y,
'longitude' : x,
'Fai_anomaly':fai})
FAI_df['Fai_anomaly'].replace(fill_value, np.nan, inplace=True)
FAI_df['Fai_anomaly'][FAI_df['Fai_anomaly'] < 0] = np.nan
min_FAI = FAI_df['Fai_anomaly'].min()
max_FAI = FAI_df['Fai_anomaly'].max()
fig = px.density_mapbox(FAI_df,
lat='latitude',
lon='longitude',
z ='Fai_anomaly',
hover_data={"Fai_anomaly":True},
color_continuous_scale='Turbo',
range_color=(min_FAI,max_FAI)
)
#fig.update_layout(mapbox_style="stamen-terrain")
fig.update_layout(mapbox_style="white-bg",
title =('FAI'))
fig.show()
Nevertheless, when time comes to plot , I am getting the image below with mapbox_style='white-bg'. But the worst is that I am getting mapbox_style='open-street-map/stamen-terrain/or whatever the error :
File "/home/barbiersa#ad.meteo.fr/.conda/envs/py38_spyder/lib/python3.8/site-packages/kaleido/scopes/plotly.py", line 161, in transform
raise ValueError(
ValueError: Transform failed with error code 525: Mapbox error.
May someone help me to understand what is happening, I do not really have a clue of why neither the tile nor the colorbar are not showing.
import plotly.graph_objects as go
import plotly.express as px
fig = px.histogram(df, nbins = 5, x = "numerical_col", color = "cat_1", animation_frame="date",
range_x=["10000","500000"], facet_col="cat_2")
fig.update_layout(
margin=dict(l=25, r=25, t=20, b=20))
fig.show()
How can I fix the output? I would like multiple subplots based on cat_2 where the hue is cat_1.
you have not provided sample data, so I've simulated it based on code you are using to generate figure
I have encountered one issue range_x does not work, it impacts y-axis as well. Otherwise approach fully works.
import plotly.graph_objects as go
import plotly.express as px
import numpy as np
import pandas as pd
# data not provided.... simulate some
DAYS = 5
ROWS = DAYS * 2000
df = pd.DataFrame(
{
"date_d": np.repeat(pd.date_range("1-Jan-2021", periods=DAYS), ROWS // DAYS),
"numerical_col": np.random.uniform(10000, 500000, ROWS),
"cat_1": np.random.choice(list("ABCD"), ROWS),
"cat_2": np.random.choice(list("UVWXYZ"), ROWS),
}
)
# animation frame has to be a string not a date...
df["date"] = df["date_d"].dt.strftime("%Y-%b-%d")
# always best to provide pre-sorted data to plotly
df = df.sort_values(["date", "cat_1", "cat_2"])
fig = px.histogram(
df,
nbins=5,
x="numerical_col",
color="cat_1",
animation_frame="date",
# range_x=[10000, 500000],
facet_col="cat_2",
)
fig.update_layout(margin=dict(l=25, r=25, t=20, b=20))
I have a dataframe central
Then I want to plot the pairwise relationships between the columns with sns.pairplot(central). Could you please explain why the process just runs forever? I tried on both my laptop and Colab, but the problem persists.
import urllib3
%matplotlib inline
%config InlineBackend.figure_format = 'svg' # Change the image format to svg for better quality
import networkx as nx
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
## Import dataset
http = urllib3.PoolManager()
url = 'https://raw.githubusercontent.com/leanhdung1994/WebMining/main/airports.net'
f = http.request('GET', url)
open('airports.net', 'wb').write(f.data)
G = nx.read_pajek('airports.net', encoding = 'UTF-8')
G = nx.DiGraph(G)
## Compute measures of centrality
degree_central = nx.degree_centrality(G)
closeness_central = nx.closeness_centrality(G)
eigen_central = nx.eigenvector_centrality_numpy(G, max_iter = 200)
katz_central = nx.katz_centrality_numpy(G)
between_central = nx.betweenness_centrality(G)
pagerank = nx.pagerank_numpy(G)
[hub, authority] = nx.hits(G)
## Create a dataframe using with above calculated centralities
central = pd.DataFrame([degree_central, closeness_central, eigen_central, katz_central, between_central, hub, authority]).T
central.columns = ['degree', 'closeness', 'eigen', 'katz', 'between', 'hub', 'authority']
central
## Plot the pairwise relationships between centralities
sns.pairplot(central)
For reasons unknown to me, the histplot for column eigen_central has a problem determining a reasonable number of bins. The pairplot works with kde plots in the diagonal sns.pairplot(central, diag_kind="kde"), and the histplot for column eigen_central alone also does not work as expected. You can overcome this problem by defining the bin number:
sns.pairplot(central, diag_kws = {"bins": 10})
Output:
I will upvote any answer that can provide a reason why seaborn has problems defining the bins. This problem is seaborn-specific as plt.hist(central.eigen) works as expected but not sns.histplot(central.eigen).
I want to fill the gridded map with colors based on the value of interest. A sample data is here:
import pandas as pd
df = pd.DataFrame()
df['lon'] = [100,105,110,115,120,125,130]
df['lat'] = [38,40,42,44,46,48,50]
df['value'] = [1,2,3,4,5,6,7]
Specifically, is it possible to do this with Cartopy? I found a similar question here:https://stackoverflow.com/questions/53412785/plotting-pandas-csv-data-onto-cartopy-map. But that post was to plot scattered points, I need to fill the grids with colors.
I myself tried:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
lon, lat = np.meshgrid(df['lon'], df['lat'])
fig = plt.figure(figsize=[15,15])
ax = plt.axes(projection=ccrs.PlateCarree())
ax.pcolormesh(lon,lat,df['variable'],latlon=True,cmap='jet')
plt.show()
The error is at ax.pcolormesh(...), it says "not enough values to unpack (expected 2, got 1)"
Many thanks for your help.
For discrete data you can create rectangular patches for each point. Here is a possible solution for your sample data set. Each row of data (lat, long, value) is used to create a rectangular patch. The value is normalized by dividing with max(value) to enable using colormap for coloring the patches.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import matplotlib.patches as mpatches
def make_rect(clon, clat, dlon, dlat):
lon_min = clon - dlon/2.
lat_min = clat - dlat/2.
lon_max = clon + dlon/2.
lat_max = clat + dlat/2.
# clockwise from LL
#lons = [lon_min, lon_min, lon_max, lon_max, lon_min]
#lats = [lat_min, lat_max, lat_max, lat_min, lat_min]
ll = [lon_min,lat_min]
ul = [lon_min,lat_max]
ur = [lon_max,lat_max]
lr = [lon_max,lat_min]
return [ll, ul, ur, lr, ll]
df = pd.DataFrame()
df['lon'] = [100,105,110,115,120,125,130]
df['lat'] = [38,40,42,44,46,48,50]
df['value'] = [1,2,3,4,5,6,7] # not suffice for meshgrid plot
# The colormap to use.
cm = plt.cm.get_cmap('jet')
fig = plt.figure(figsize=[8,6])
ax = plt.axes(projection=ccrs.PlateCarree(), extent=[95, 134, 35, 52])
# plot the red dots using the available data
# comment out if not needed
ax.plot(df['lon'], df['lat'], 'ro')
# plot rectangular patches at the data points
dlon, dlat = 5, 2 #spacings between data points
for lon1, lat1, val1 in zip(df['lon'], df['lat'], df['value']):
pcorners = make_rect(lon1, lat1, dlon, dlat)
poly = mpatches.Polygon(pcorners, ec='gray', fill=True, lw=0.25, \
fc=cm(val1 / max(df['value'])), transform=ccrs.PlateCarree())
ax.add_patch(poly)
ax.gridlines(draw_labels=True)
plt.show()
The output plot:
I am trying to create a slider for time in Jupyter Notebook using ipywidgets. I would like to take the tabulated experimental data (see figure below) and control the value bounds with the help of a slider. The graph should be a force-displacement graph, evolving in time:
This is my python code:
from ipywidgets import IntSlider, interact, FloatSlider
u = fdat1['C_1_Weg_R4[mm]'].values
f = fdat1['C_1_Kraft_R4[kN]'].values
t = fdat1['S/No'].values
#interact(t = IntSlider(min = 0, max = max(fdat0['S/No'].values)))
def aa_(t):
plt.plot(f[t],u[t])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
fdat1 is the name of the tabulated data. I have also considered using "C_1_Zeit[s]" column as my slider values, but these are not integer values.
The problem is that nothing gets plotted, but the slider works and the graph changes scale.
I have been searching online for some time now and would really appreciate some help.
Thank you in advance!
Edit:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
df = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
Inside your plotting function, create a slice of your results dataframe that slices based on the slider value.
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
results = pd.DataFrame.from_records(
[np.linspace(0,30, num=30), np.linspace(0,20, num=30), ]).T
results.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = 21))
def aa_(t):
df = results.iloc[:t] # make the slice here
plt.scatter(df['A'], df['B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(0, 30)
plt.ylim(0, 30)
So, basically, this should have been the correct code:
from ipywidgets import IntSlider, interact, FloatSlider
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
u = fdat1['C_1_Weg_R4[mm]'].values #loads displacement values from fdat1
f = fdat1['C_1_Kraft_R4[kN]'].values #loads force values from fdat1
df = pd.DataFrame.from_dict([u,f]).T #creates a dataframe
df.columns=['A', 'B']
#interact(t = IntSlider(min = 0, max = df.shape[0])) #interactive scatterplot with a slider for time
def scatterplot_(t):
plt.scatter(df.loc[0:t,'A'], df.loc[0:t,'B'])
plt.grid()
plt.xlabel("force [kN]")
plt.ylabel("displacement [mm]")
plt.title("Load-displacement curve for \nexperiment")
plt.xlim(-5, 5)
plt.ylim(-25, 25)