Simple problem I'm sure, but after doing tons of hunting around I cant find any reason this shouldn't be working. While trying to plot a grouped bar chart of 2010 and 2014 population data side by side, only 2014 is visible and the side for 2010 is blank. What is my mistake here?
import matplotlib.pyplot as plt
import pandas as pd
import datetime
import numpy as np
myFrame = pd.read_csv('top12cities.csv', sep = ',', engine = 'python')
fig = plt.figure(figsize=(10,6))
ind = np.arange(len(myFrame['City']))
width = .35
p1 = plt.bar(ind, myFrame['2014 estimate'], width)
p2 = plt.bar(ind + .35, myFrame['2010 Census'], width, color='y')
plt.legend([p1[0],p2[0]], ['2014', '2010'], loc = 'best')
plt.xticks(ind + width /2,myFrame['City'],rotation = 90)
plt.xlabel("Name")
plt.ylabel('population')
plt.savefig('popbar.png')
plt.show()
Your code ran fine on my machine with a test csv with the schema
City,2010 Census,2014 estimate
New York, 34, 123
L.A., 14, 89
San Fran, 14, 30
It may be worth trying to check if your csv file has issues of some kind, like non-numeric entries in the problematic column. I got this:
Related
I really appreciate your help in developing my code since I am not an expert in python. I attempt to write a code to be able to:
Read all the points (longitude, latitude, cumulative forecasted rainfall for 24, 48, and 72 hours) from a csv file (Mean_PCP_REPS_12_20220809_Gridded.csv).
Read the polygon representing the watershed boundary (NelsonRiverBasin.shp).
Mask/remove the points outside of the watershed polygon.
Create a rainfall colormap image or raster for the points inside the watershed polygon.
Color boundaries should be based on rainfall value. I defined the rainfall range for each color in my code.
I tried many ways but I was not successful in creating an image or raster with desired color map (please click here as an example of the intended image). My python code is as follows. It creates and saves "New_ras.tiff" but my code cannot remap the colors of this image based on the range of rainfall after its creation.
from __future__ import division
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import Point, Polygon, MultiPolygon
import operator
#extending the code
import os
from matplotlib.patches import Patch
from matplotlib.colors import ListedColormap
import matplotlib.colors as colors
import seaborn as sns
import numpy as np
import rioxarray as rxr
import earthpy as et
import earthpy.plot as ep
from scipy.interpolate import griddata #added code up to here
import rasterio
# load the data that should be cropped by the polygon
# this assumes that the csv file already includes
# a geometry column with point data as performed below
dat_gpd = pd.read_csv(r'Mean_PCP_REPS_12_20220809_Gridded.csv')
# make shapely points out of the X and Y coordinates
point_data = [Point(xy) for xy in zip(dat_gpd.iloc[:,0], dat_gpd.iloc[:,1])]
all_pts = list(zip(dat_gpd.iloc[:,0], dat_gpd.iloc[:,1]))
# assign shapely points as geometry to a geodataframe
# Like this you can also inspect the individual points if needed
arr_gpd = gpd.GeoDataFrame(dat_gpd, crs=4269, geometry=point_data)
# assign defined polygon to a new dataframe
nlpoly = gpd.read_file('NelsonRiverBasin.shp')
nlpoly = nlpoly.to_crs('epsg:4269')
mask = [nlpoly.contains(Point(p)).any() for p in all_pts]
# define a new dataframe from the spatial join of the dataframe with the data to be cropped
# and the dataframe with the polygon data, using the within function.
#dat_fin = gpd.sjoin(arr_gpd, nlpoly[['OCEAN_EN', 'COUNT', 'geometry']], predicate = 'within')
#dat_fin = dat_fin.to_crs('epsg:4326')
#dat_fin.plot(column= 'Hr72')
#plt.savefig('Raster2.tiff')
data = dat_gpd[['Long', 'Lat', 'Hr72']]
pts = list(zip(data.Long, data.Lat))
print (pts)
print(type(pts))
pts2 = [pts[i] for i in range(len(pts)) if mask[i]]
print(pts2)
print(type(pts2))
pts_val = data.Hr72.values
pts_val2 = [pts_val[i] for i in range(len(pts_val)) if mask[i]]
new_pts = [Point(xy) for xy in pts2]
print(type(pts_val2[1]))
pts3=[]
for tup, j in zip(pts2,range(len(pts_val2))):
pts3.append(list(tup)+[pts_val2[j]])
print(type(pts3))
masked_pts = pd.DataFrame(pts3)
print(masked_pts)
masked_pts.columns = pd.Series(['Long', 'Lat', 'Hr72'])
new_arr_gpd = gpd.GeoDataFrame(masked_pts, crs = 4269, geometry = new_pts)
new_arr_gpd.plot(column = 'Hr72')
plt.savefig('new_ras.tiff')
rRes = 0.01
#xRange = np.arange(data.Long.min(), data.Long.max(), rRes)
#yRange = np.arange(data.Lat.min(), data.Lat.max(), rRes)
#print(xRange[:5],yRange[:5])
#gridX, gridY = np.meshgrid(xRange, yRange)
#grid_pcp = griddata(pts2, pts_val2, (gridX, gridY), method = 'linear')
#Extending the code
sns.set(font_scale = 1, style = "white")
lidar_chm = rxr.open_rasterio(r'new_ras.tiff', masked=True).squeeze()
# Define the colors you want
cmap = ListedColormap(["white", "lightskyblue","dodgerblue","mediumblue","lawngreen","limegreen", "forestgreen","darkgreen", "yellow", "orange","darkorange", "chocolate", "red", "maroon", "indianred","lightpink", "pink", "lightgray", "whitesmoke" ])
# Define a normalization from values -> colors
norm = colors.BoundaryNorm([0, 1, 5, 10, 15, 20, 25, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 150, 200, 250], 19)
fig, ax = plt.subplots(figsize=(9, 5))
chm_plot = ax.imshow(np.squeeze(r'new_ras.tiff'),cmap=cmap,norm=norm)
#print(chm_plot)
map_title = input ("Enter a title for this map (for ex. 72-hr accumulated forecast map):")
ax.set_title("Hydrologic Forecast Centre (MTI)\n" + map_title)
# Add a legend for labels
legend_labels = {"white": "0-1", "lightskyblue": "1-5","dodgerblue": "5-10","mediumblue": "10-15","lawngreen": "15-20","limegreen": "20-25", "forestgreen": "25-30","darkgreen": "30-40", "yellow": "40-50", "orange": "50-60","darkorange": "60-70", "chocolate": "70-80", "red": "80-90", "maroon": "90-100","indianred": "100-110", "lightpink": "110-120", "pink": "120-150", "lightgray": "150-200", "whitesmoke": "200-250"}
patches = [Patch(color=color, label=label) for color, label in legend_labels.items()]
ax.legend(handles=patches,bbox_to_anchor=(1.2, 1),facecolor="white")
ax.set_axis_off()
plt.show()
New to Matplotlib, trying to format dates on x axis. If I just use plt.xticks, the date is correct. But if I try to format the values using ax.xaxis.set_major_formatter, it changes my axis values to Jan-1-1970 based. I'm sure this is newbie stuff, thx for the bootstrap. (BTW, running in JupyterLabs notebook).
import pandas as pd
from datetime import date
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
# data to plot
#df_plot = df_dts[df_dts.dt>"8/17/2020"]
df_plot = pd.DataFrame({
'dt': [date(2020,8,19), date(2020,8,20), date(2020,8,21), date(2020,8,22)],
'open_cnt': [2,15,2,7],
'close_cnt': [0,2,11,0]
})
# create plot
fig, ax = plt.subplots()
fig.set_size_inches(10, 5, forward=True)
index = np.arange(len(df_plot))
bar_width = 0.35
opacity = 0.8
rects1 = plt.bar(index, df_plot.open_cnt, bar_width, alpha=opacity, color='orange', label='Open')
rects2 = plt.bar(index + bar_width, df_plot.close_cnt, bar_width, alpha=opacity, color='g', label='Close')
plt.xlabel('Date')
plt.ylabel('Workitems')
plt.title('Open & Close Rates')
plt.xticks(index + bar_width/2, df_plot.dt)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b-%d-%y'))
plt.show()
Instead of messing up with formatter, set the index in your DataFrame
to proper text representation of your dates and call plot.bar on this
object:
fig, ax = plt.subplots(figsize=(10,5))
ax = df_plot.set_index(df_plot.dt.map(lambda s: s.strftime('%b-%d-%y')))\
.plot.bar(ax=ax, legend=False, title='Open & Close Rates', rot=0,
color=['orange', 'green'])
ax.set_xlabel('Date')
ax.set_ylabel('Workitems');
For your data I got the following picture:
As you can see, my code is more concise than yours.
If you don't want to change your original code much, you can simply do the transformation when you set the xticks.
df_plot['dt'] = pd.to_datetime(df_plot['dt'], format='%Y-%m-%d')
plt.xticks(index + bar_width/2, df_plot['dt'].dt.strftime('%b-%d-%y'))
I have added a table to the bottom of my plot, but there are a number of issues with it:
The right has too much padding.
The left has too little padding.
The bottom has no padding.
The cells are too small for the text within them.
The table is too close to the bottom of the plot.
The cells belonging to the row names are not colored to match those of the bars.
I'm going out of my mind fiddling with this. Can someone help me fix these issues?
Here is the code (Python 3):
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
# Set styles
plt.style.use(['seaborn-paper', 'seaborn-whitegrid'])
plt.style.use(['seaborn'])
sns.set(palette='colorblind')
matplotlib.rc("font", family="Times New Roman", size=12)
labels = ['n=1','n=2','n=3','n=4','n=5']
a = [98.8,98.8,98.8,98.8,98.8]
b = [98.6,97.8,97.0,96.2,95.4]
bar_width = 0.20
data = [a,b]
print(data)
colors = plt.cm.BuPu(np.linspace(0, 0.5, len(labels)))
columns = ('n=1', 'n=2', 'n=3', 'n=4', 'n=5')
index = np.arange(len(labels))
plt.bar(index, a, bar_width)
plt.bar(index+bar_width+.02, b, bar_width)
plt.table(cellText=data,
rowLabels=['a', 'b'],
rowColours=colors,
colLabels=columns,
loc='bottom')
plt.subplots_adjust(bottom=0.7)
plt.ylabel('Some y label which effect the bottom padding!')
plt.xticks([])
plt.title('Some title')
plt.show()
This is the output:
Update
This is working now, but in case someone else is having issues: Make sure you are not viewing your plots and the changes you make to them with IntelliJ SciView as it does not represent changes accurately and introduces some formatting issues!
I think you can fix the first problem by setting the bounding box when you make the table using bbox like this:
bbox=[0, 0.225, 1, 0.2]
where the parameters are [left, bottom, width, height].
For the second issue (the coloring), that is because the color array is not corresponding to the seaborn coloring. You can query the seaborn color palette with
sns.color_palette(palette='colorblind')
this will give you a list of the colors seaborn is using.
Check the modifications below:
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
# Set styles
plt.style.use(['seaborn-paper', 'seaborn-whitegrid'])
plt.style.use(['seaborn'])
sns.set(palette='colorblind')
matplotlib.rc("font", family="Times New Roman", size=12)
labels = ['n=1','n=2','n=3','n=4','n=5']
a = [98.8,98.8,98.8,98.8,98.8]
b = [98.6,97.8,97.0,96.2,95.4]
bar_width = 0.20
data = [a,b]
colors = sns.color_palette(palette='colorblind')
columns = ('n=1', 'n=2', 'n=3', 'n=4', 'n=5')
index = np.arange(len(labels))
fig = plt.figure(figsize=(12,9))
plt.bar(index, a, bar_width)
plt.bar(index+bar_width+.02, b, bar_width)
plt.table(cellText=data,
rowLabels=[' a ', ' b '],
rowColours=colors,
colLabels=columns,
loc='bottom',
bbox=[0, 0.225, 1, 0.2])
fig.subplots_adjust(bottom=0.1)
plt.ylabel('Some y label which effect the bottom padding!')
plt.xticks([])
plt.title('Some title')
plt.show()
I also changed the subplot adjustment to subplot_adjust(bottom=0.1) because it wasn't coming out right otherwise. Here is the output:
considering the following pandas DataFrame:
labels values_a values_b values_x values_y
0 date1 1 3 150 170
1 date2 2 6 200 180
It is easy to plot this with Seaborn (see example code below). However, due to the big difference between values_a/values_b and values_x/values_y, the bars for values_a and values_b are not easily visible (actually, the dataset given above is just a sample and in my real dataset the difference is even bigger). Therefore, I would like to use two y-axis, i.e., one y-axis for values_a/values_b and one for values_x/values_y. I tried to use plt.twinx() to get a second axis but unfortunately, the plot shows only two bars for values_x and values_y, even though there are at least two y-axis with the right scaling. :) Do you have an idea how to fix that and get four bars for each label whereas the values_a/values_b bars relate to the left y-axis and the values_x/values_y bars relate to the right y-axis?
Thanks in advance!
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
columns = ["labels", "values_a", "values_b", "values_x", "values_y"]
test_data = pd.DataFrame.from_records([("date1", 1, 3, 150, 170),\
("date2", 2, 6, 200, 180)],\
columns=columns)
# working example but with unreadable values_a and values_b
test_data_melted = pd.melt(test_data, id_vars=columns[0],\
var_name="source", value_name="value_numbers")
g = sns.barplot(x=columns[0], y="value_numbers", hue="source",\
data=test_data_melted)
plt.show()
# values_a and values_b are not displayed
values1_melted = pd.melt(test_data, id_vars=columns[0],\
value_vars=["values_a", "values_b"],\
var_name="source1", value_name="value_numbers1")
values2_melted = pd.melt(test_data, id_vars=columns[0],\
value_vars=["values_x", "values_y"],\
var_name="source2", value_name="value_numbers2")
g1 = sns.barplot(x=columns[0], y="value_numbers1", hue="source1",\
data=values1_melted)
ax2 = plt.twinx()
g2 = sns.barplot(x=columns[0], y="value_numbers2", hue="source2",\
data=values2_melted, ax=ax2)
plt.show()
This is probably best suited for multiple sub-plots, but if you are truly set on a single plot, you can scale the data before plotting, create another axis and then modify the tick values.
Sample Data
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
columns = ["labels", "values_a", "values_b", "values_x", "values_y"]
test_data = pd.DataFrame.from_records([("date1", 1, 3, 150, 170),\
("date2", 2, 6, 200, 180)],\
columns=columns)
test_data_melted = pd.melt(test_data, id_vars=columns[0],\
var_name="source", value_name="value_numbers")
Code:
# Scale the data, just a simple example of how you might determine the scaling
mask = test_data_melted.source.isin(['values_a', 'values_b'])
scale = int(test_data_melted[~mask].value_numbers.mean()
/test_data_melted[mask].value_numbers.mean())
test_data_melted.loc[mask, 'value_numbers'] = test_data_melted.loc[mask, 'value_numbers']*scale
# Plot
fig, ax1 = plt.subplots()
g = sns.barplot(x=columns[0], y="value_numbers", hue="source",\
data=test_data_melted, ax=ax1)
# Create a second y-axis with the scaled ticks
ax1.set_ylabel('X and Y')
ax2 = ax1.twinx()
# Ensure ticks occur at the same positions, then modify labels
ax2.set_ylim(ax1.get_ylim())
ax2.set_yticklabels(np.round(ax1.get_yticks()/scale,1))
ax2.set_ylabel('A and B')
plt.show()
I have the following data in a csv file
SourceID BSs hour Type
7208 87 11 MAIN
11060 67 11 MAIN
3737 88 11 MAIN
9683 69 11 MAIN
I have the following python code.I want to plot a graph with the following specifications.
For each SourceID and Type I want to plot a graph of BSs over time. I would prefer if each SourceID and Type is a subplot on single plot.I have tried a lot of options using groupby, but can't seem to get it work.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
COLLECTION = 'NEW'
DATA = r'C:\Analysis\Test\{}'.format(COLLECTION)
INPUT_FILE = DATA + r'\in.csv'
OUTPUT_FILE = DATA + r'\out.csv'
with open(INPUT_FILE) as fin:
df = pd.read_csv(INPUT_FILE,
usecols=["SourceID", 'hour','BSs','Type'],
header=0)
df.drop_duplicates(inplace=True)
df.reset_index(inplace=True)
It's still not 100% clear to me what sort of plot you actually want, but my guess is that you're looking for something like this:
from matplotlib import pyplot as plt
# group by SourceID and Type, find out how many unique combinations there are
grps = df.groupby(['SourceID', 'Type'])
ngrps = len(grps)
# make a grid of axes
ncols = int(np.sqrt(ngrps))
nrows = -(-ngrps // ncols)
fig, ax = plt.subplots(nrows, ncols, sharex=True, sharey=True)
# iterate over the groups, plot into each axis
for ii, (idx, rows) in enumerate(grps):
rows.plot(x='hour', y='BSs', style='-s', ax=ax.flat[ii], legend=False,
scalex=False, scaley=False)
# hide any unused axes
for aa in ax.flat[ngrps:]:
aa.set_axis_off()
# set the axis limits
ax.flat[0].set_xlim(df['hour'].min() - 1, df['hour'].max() + 1)
ax.flat[0].set_ylim(df['BSs'].min() - 5, df['BSs'].max() + 5)
fig.tight_layout()