I am trying to have my x tick evenly spaced using matplotlib.
here is the problem plot; the code is below
Issue: The x axis bars and values are not evenly spaced, despite my best efforts
I would appreciate any and all help, thank you!
here is the code im trying to plot
# define and store student IDs
student_IDs = np.array([1453,1454,1456,1457,1459,1460,1462,1463,1464,1465,1466, 1467, 1468, 1469,1470])
before_IP_abs_scores = np.array([51,56,73,94,81,83,71,36,43,83,66,62,70,50,83])
after_IP_abs_scores = np.array([65,60,82,71,65,85,78,51,34,80,63,63,62,55,77])
change_IP_abs_scores = after_IP_abs_scores - before_IP_abs_scores
Here is how i store the valuables from this array
ip_sc = collections.OrderedDict()
for ii in student_IDs:
ip_sc[ii] = []
for count, key in enumerate(student_IDs):
sci_id[key] = [before_science_ID_abs_scores[count],after_science_ID_abs_scores[count],change_science_ID_abs_scores[count]]
ip_sc[key] = [before_IP_abs_scores[count],after_IP_abs_scores[count],change_IP_abs_scores[count]]
Here is my plotting code:
fig = plt.figure(4)
fig.set_figheight(18)
fig.set_figwidth(18)
ax = plt.subplot(111)
plt.grid(True)
# ax = fig.add_axes([0,0,1,1])
for ii in student_IDs:
# plt.plot([1,2], ip_sc[ii][:-1],label=r'${}$'.format(ii))
ax.bar(ii, ip_sc[ii][0], width=.5, color='#30524F',edgecolor="white",hatch="//",align='center')
ax.bar(ii, ip_sc[ii][1], width=.5, color='#95BC89',align='center')
ax.bar(ii, ip_sc[ii][2], width=.5, color='#4D8178',align='center')
plt.ylabel('Absolute Score',size=30)
plt.xlabel('Student ID',size=30)
plt.title('IP Scale Scores',size=30)
plt.axhspan(0, 40, facecolor='navy', alpha=0.2,)
plt.axhspan(40, 60, facecolor='#95BC89', alpha=0.2)
plt.axhspan(60, 100, facecolor='seagreen', alpha=0.3)
ax.tick_params(axis='x', which='major', labelsize=16)
ax.tick_params(axis='y', which='major', labelsize=16)
plt.xticks(student_IDs, ['1453','1454','1456','1457','1459', '1460', '1462', '1463', '1464','1465', '1466', '1467', '1468', '1469', '1470'])
# ax.set_yticks(np.arange(0, 81, 10))
plt.ylim(-25,100)
ax.legend(labels=["Intense and Frequent IP ","Moderate IP ","few IP ",'Pre', 'Post','Change'],fontsize=15)
plt.show()
The student_ids aren't numbered consecutive.
Instead of using the student_ids as x-values, you could just use their index as x. With set_xticklabels you can thereafter set the student_ids as label corresponding to these positions.
So, you could make following modifications to the code (leaving out the call to plt.xticks):
for ind, stud_id in enumerate(student_IDs):
ax.bar(ind, ip_sc[stud_id][0], width=.5, color='#30524F', edgecolor="white", hatch="//", align='center')
ax.bar(ind, ip_sc[stud_id][1], width=.5, color='#95BC89', align='center')
ax.bar(ind, ip_sc[stud_id][2], width=.5, color='#4D8178', align='center')
ax.set_xticks(range(len(student_IDs)))
ax.set_xticklabels(student_IDs)
Related
I'm plotting a map panel with cartopy in eps format. The resulting plot looks fine but has very broad margins when I insert it into my latex document. When checking the plot with adobe illustrator, it seems like the cartopy plots all the gridlines/contourlines/coastlines, even those outside of the canvas, which are hidden but do take up some spaces in the plot.
I tried to use constrained_layout and tight_layout, but they are incompatible with subplots_adjust which I use for adding the shared colorbar.
The code I use to plot is as follows:
proj2 = ccrs.LambertConformal(central_longitude=0, central_latitude=50)
proj_lonlat = ccrs.PlateCarree()
fig = plt.figure(figsize=(12, 9), constrained_layout=True)
# define a function to plot
def plot_era5_500z_MSLp(f500, fsurf, time, label, ax):
# read data
for i in np.arange(len(f500.time.values)):
if pd.to_datetime(f500.time.values[i]) == pd.to_datetime(time):
print('processing time: ' + time)
lons = f500.longitude.values # 1-d array
lats = f500.latitude.values # 1-d array
gph500 = f500.z.values[i,:,:]/98 # geopotential (m2 s-2) -> geopotential height (dagpm) [time = 72, lat = 241, lon = 561]
pmsl = fsurf.msl.values[i,:,:]/100 # mean sea level pressure Pa -> hPa
# create base map
ax.set_extent([-35, 30, 25, 70]) # x0, x1, y0, y1
gl = ax.gridlines(crs=proj_lonlat, draw_labels=True, xlocs=[-60,-40,-20,0,20,40,60], ylocs=[20,30,40,50,60],
x_inline=False, y_inline=False, color='k', alpha=0.5, linestyle='dotted')
gl.top_labels=False
gl.right_labels=False
gl.xlabel_style = {'size': 14, 'color': 'k'}
gl.ylabel_style = {'size': 14, 'color': 'k'}
gl.rotate_labels = False
ax.add_feature(cfeature.COASTLINE.with_scale('50m'), lw=0.4, alpha=0.9) # add coastline feature
# plot 500hPa geopotential height (zc: z contour)
z_levels = np.arange(500, 580+10, 8)
zc = ax.contour(lons, lats, gph500, transform=proj_lonlat,
levels=z_levels, extent='both', colors='mediumblue', linewidths=0.5)
ax.clabel(zc, inline=True, fontsize=10, fmt='%.0f')
# plot MSL pressure (mslps: MSL p shading; mslpc: MSL p contour)
levels = np.arange(960, 1057, 4)
mslps = ax.contourf(lons, lats, pmsl, levels=levels, cmap='Spectral_r', transform=proj_lonlat)
mslpc = ax.contour(lons, lats, pmsl, levels=levels, colors='k', linewidths=0.5, alpha=0.6, transform=proj_lonlat)
ax.set_title(label + ' ' + time, loc= 'left', pad=0.5, fontsize=14)
return mslps
# fig (a)
ax1 = plt.subplot(2, 2, 1, projection=proj2)
plot_era5_500z_MSLp(f500_2016nov, fsurf_2016nov, '2016-11-20 12:00', '(a)', ax1)
# fig (b)
ax2 = plt.subplot(2, 2, 2, projection=proj2)
plot_era5_500z_MSLp(f500_2016nov, fsurf_2016nov, '2016-11-24 00:00', '(b)', ax2)
# fig (c)
ax3 = plt.subplot(2, 2, 3, projection=proj2)
plot_era5_500z_MSLp(f500_2017feb, fsurf_2017feb, '2017-02-27 18:00', '(c)', ax3)
# fig (4)
ax4 = plt.subplot(2, 2, 4, projection=proj2)
mslps = plot_era5_500z_MSLp(f500_2017mar, fsurf_2017mar, '2017-03-04 06:00', '(d)', ax4) # only return mslps here for plotting the sharred colorbar
fig.subplots_adjust(right=0.8, wspace=0.2, hspace=0.000001)
cbar_ax = fig.add_axes([0.82, 0.2, 0.02, 0.55]) # left border, bottom border, width, height
cbar = fig.colorbar(mslps, cax=cbar_ax)
cbar.set_label(label='Mean sea level pressure (hPa)', size=16)
cbar.ax.tick_params(labelsize=14)
The resulting eps plot looks good, but in adobe illustrator, one can see the excess lines outside of the canvas:
Is there any way I can limit the plotting range of the data, or disable the lines outside of the canvas?
I'm trying to draw with matplotlib two average vertical line for every overlapping histograms using a loop. I have managed to draw the first one, but I don't know how to draw the second one. I'm using two variables from a dataset to draw the histograms. One variable (feat) is categorical (0 - 1), and the other one (objective) is numerical. The code is the following:
for chas in df[feat].unique():
plt.hist(df.loc[df[feat] == chas, objective], bins = 15, alpha = 0.5, density = True, label = chas)
plt.axvline(df[objective].mean(), linestyle = 'dashed', linewidth = 2)
plt.title(objective)
plt.legend(loc = 'upper right')
I also have to add to the legend the mean and standard deviation values for each histogram.
How can I do it? Thank you in advance.
I recommend you using axes to plot your figure. Pls see code below and the artist tutorial here.
import numpy as np
import matplotlib.pyplot as plt
# Fixing random state for reproducibility
np.random.seed(19680801)
mu1, sigma1 = 100, 8
mu2, sigma2 = 150, 15
x1 = mu1 + sigma1 * np.random.randn(10000)
x2 = mu2 + sigma2 * np.random.randn(10000)
fig, ax = plt.subplots(1, 1, figsize=(7.2, 7.2))
# the histogram of the data
lbs = ['a', 'b']
colors = ['r', 'g']
for i, x in enumerate([x1, x2]):
n, bins, patches = ax.hist(x, 50, density=True, facecolor=colors[i], alpha=0.75, label=lbs[i])
ax.axvline(bins.mean())
ax.legend()
Background:
I have a list_of_x_and_y_list that contains x and y values which looks like:
[[(44800, 14888), (132000, 12500), (40554, 12900)], [(None, 193788), (101653, 78880), (3866, 160000)]]
I have another data_name_list ["data_a","data_b"] so that
"data_a" = [(44800, 14888), (132000, 12500), (40554, 12900)]
"data_b" = [(None, 193788), (101653, 78880), (3866, 160000)]
The len of list_of_x_and_y_list / or len of data_name_list is > 20.
Question:
How can I create a scatter plot for each item (being the same colour) in the data_name_list?
What I have tried:
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax = plt.axes(facecolor='#FFFFFF')
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']
print(list_of_x_and_y_list)
for x_and_y_list, data_name, color in zip(list_of_x_and_y_list, data_name_list, colors):
for x_and_y in x_and_y_list,:
print(x_and_y)
x, y = x_and_y
ax.scatter(x, y, label=data_name, color=color) # "label=data_name" creates
# a huge list as a legend!
# :(
plt.title('Matplot scatter plot')
plt.legend(loc=2)
file_name = "3kstc.png"
fig.savefig(file_name, dpi=fig.dpi)
print("Generated: {}".format(file_name))
The Problem:
The legend appears to be a very long list, which I don't know how to rectify:
Relevant Research:
Matplotlib scatterplot
Scatter Plot
Scatter plot in Python using matplotlib
The reason you get a long repeated list as a legend is because you are providing each point as a separate series, as matplotlib does not automatically group your data based on the labels.
A quick fix is to iterate over the list and zip together the x-values and the y-values of each series as two tuples, so that the x tuple contains all the x-values and the y tuple the y-values.
Then you can feed these tuples to the plt.plot method together with the labels.
I felt that the names list_of_x_and_y_list were uneccessary long and complicated, so in my code I've used shorter names.
import matplotlib.pyplot as plt
data_series = [[(44800, 14888), (132000, 12500), (40554, 12900)],
[(None, 193788), (101653, 78880), (3866, 160000)]]
data_names = ["data_a","data_b"]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax = plt.axes(facecolor='#FFFFFF')
prop_cycle = plt.rcParams['axes.prop_cycle']
colors = prop_cycle.by_key()['color']
for data, data_name, color in zip(data_series, data_names, colors):
x,y = zip(*data)
ax.scatter(x, y, label=data_name, color=color)
plt.title('Matplot scatter plot')
plt.legend(loc=1)
To only get one entry per data_name, you should add data_name only once as a label. The rest of the calls should go with label=None.
The simplest you can achieve this using the current code, is to set data_name to None at the end of the loop:
from matplotlib import pyplot as plt
from random import randint
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.set_facecolor('#FFFFFF')
# create some random data, suppose the sublists have different lengths
list_of_x_and_y_list = [[(randint(1000, 4000), randint(2000, 5000)) for col in range(randint(2, 10))]
for row in range(10)]
data_name_list = list('abcdefghij')
colors = plt.rcParams['axes.prop_cycle'].by_key()['color']
for x_and_y_list, data_name, color in zip(list_of_x_and_y_list, data_name_list, colors):
for x_and_y in x_and_y_list :
x, y = x_and_y
ax.scatter(x, y, label=data_name, color=color)
data_name = None
plt.legend(loc=2)
plt.show()
Some things can be simplified, making the code 'more pythonic', for example:
for x_and_y in x_and_y_list :
x, y = x_and_y
can be written as:
for x, y in x_and_y_list:
Another issue, is that with a lot of data calling scatter for every point could be rather slow. All the x and y belonging to the same list can be plotted together. For example using list comprehension:
for x_and_y_list, data_name, color in zip(list_of_x_and_y_list, data_name_list, colors):
xs = [x for x, y in x_and_y_list]
ys = [y for x, y in x_and_y_list]
ax.scatter(xs, ys, label=data_name, color=color)
scatter could even get a list of colors per point, but plotting all the points in one go, wouldn't allow for labels per data_name.
Very often, numpy is used to store numerical data. This has some advantages, such as vectorization for quick calculations. With numpy the code would look like:
import numpy as np
for x_and_y_list, data_name, color in zip(list_of_x_and_y_list, data_name_list, colors):
xys = np.array(x_and_y_list)
ax.scatter(xys[:,0], xys[:,1], label=data_name, color=color)
I want to plot some data from a csv file using dataframe.
My code currently displays 266=14*19 separate subplots. Currently, it is coded to display all 266 subplots on one screen and each is small and difficult to read.
I tried to use a 1x1 plot
#fig, cx=plt.subplots(1,1, sharex=False, sharey=False, figsize=(18,12))
Main code:
fig, cx=plt.subplots(14,19, sharex=False, sharey=False, figsize=(18,12))
#fig, cx=plt.subplots(1,1, sharex=False, sharey=False, figsize=(18,12))
plt.subplots_adjust(hspace=0.5)
cx = cx.ravel()
for i in range(0,len(Bond)):
cx[i].plot(VelLog[Bond[i]], color='b')
cx[i].set_xlabel('Time (ms)')
cx[i].set_ylabel('Velocity (m/s)')
cx[i].set_ylim(-250,150)
cx[i].set_title(Bond[i])
plt.savefig('Velocity.png', dpi=120)
plt.show()
##################################
Error message when I un-comment Line 1
cx[i].plot(VelLog[Bond[i]], color='b')
IndexError: index 209 is out of bounds for axis 0 with size 209
How can I only display a few subplots on a screen at a time to increase readability?
Like 5x5 + 5x5 + 5x5 + 5x5 + 5x5,+ 5x5 + 5x5 + 5x5 + 5x5 + 5x5 + 4x4 = 266
11 different screens.
Is there a way to add a chart filter as an alternative?
Here is my updated code with your suggestions. It now creates 11 figures. I was able to plot all 266 graphs but each graph looks the same
Nrows, Ncols = 14, 19
Nplots = Nrows*Ncols
nrows, ncols = 5, 5
naxes = nrows*ncols
nfigures = Nplots//naxes
count = 0
figures = []
for fignum in range(nfigures+1):
fig, axes = plt.subplots(nrows, ncols, sharex=False, sharey=False, figsize=(18,12))
plt.subplots_adjust(hspace=0.5)
#axes = axes.ravel()
figures.append(fig)
axes = axes.flat
for ax in axes:
#print(count)
if count<Nplots:
for i in range(0,len(Bond)):
cvs_row, cvs_col = divmod(count, Ncols)
cvs_row, cvs_col = cvs_row+1, cvs_col+1
ax.plot(VelLog[Bond[i]], color='b', label='%d,%d'%(cvs_row, cvs_col))
ax.set_xlabel('Time (ms)')
ax.set_ylabel('Velocity (m/s)')
ax.set_ylim(-250,150)
ax.set_title(Bond[i])
count = count+1
plt.savefig('Velocity.png', dpi=120)
plt.show()
Here is the result for one figure
enter image description here
Here I plot the same function 14*19 times, but you can get an idea
We have two problems, keeping track of what we are plotting and
delaying the actual plotting until we are finished with all of them.
The first issue is solved here using a global counter and the divmod
builtin, the second storing all the figures in a list and calling
plt.show() only at the very end of the plotting phase.
To show what I mean with "keeping track" I have added a label and a
legend to each individual subplot.
To keep things reasonable I don't check if the last figure is empty
and also I don't check is some subplots of the last figures are empty,
but remember that you can remove some subplots from a figure if you
want a clean last figure.
import matplotlib.pyplot as plt
import numpy as np
x = np.linspace(0, np.pi, 31)
y = np.sin(x)
Nrows, Ncols = 14, 19
Nplots = Nrows*Ncols
nrows, ncols = 5, 5
naxes = nrows*ncols
nfigures = Nplots//naxes
count = 0
figures = []
for fignum in range(nfigures+1):
fig, axes = plt.subplots(nrows, ncols)
figures.append(fig)
axes = axes.flat
for ax in axes:
print(count)
if count<Nplots:
cvs_row, cvs_col = divmod(count, Ncols)
cvs_row, cvs_col = cvs_row+1, cvs_col+1
ax.plot(x, y, label='%d,%d'%(cvs_row, cvs_col))
ax.legend()
count = count+1
plt.show()
Here I show just the last figure, to show what I mean with "empty
subplots"...
I am trying to make a 2x2 subplot, with each of the inner subplots consisting of two x axes and two y axes; the first xy correspond to a linear scale and the second xy correspond to a logarithmic scale. Before assuming this question has been asked before, the matplotlib docs and examples show how to do multiple scales for either x or y but not both. This post on stackoverflow is the closest thing to my question, and I have attempted to use this idea to implement what I want. My attempt is below.
Firstly, we initialize data, ticks, and ticklabels. The idea is that the alternate scaling will have the same tick positions with altered ticklabels to reflect the alternate scaling.
import numpy as np
import matplotlib.pyplot as plt
# xy data (global)
X = np.linspace(5, 13, 9, dtype=int)
Y = np.linspace(7, 12, 9)
# xy ticks for linear scale (global)
dtick = dict(X=X, Y=np.linspace(7, 12, 6, dtype=int))
# xy ticklabels for linear and logarithmic scales (global)
init_xt = 2**dtick['X']
dticklabel = dict(X1=dtick['X'], Y1=dtick['Y']) # linear scale
dticklabel['X2'] = ['{}'.format(init_xt[idx]) if idx % 2 == 0 else '' for idx in range(len(init_xt))] # log_2 scale
dticklabel['Y2'] = 2**dticklabel['Y1'] # log_2 scale
Borrowing from the linked SO post, I will plot the same thing in each of the 4 subplots. Since similar methods are used for both scalings in each subplot, the method is thrown into a for-loop. But we need the row number, column number, and plot number for each.
# 2x2 subplot
# fig.add_subplot(row, col, pnum); corresponding iterables = (irows, icols, iplts)
irows = (1, 1, 2, 2)
icols = (1, 2, 1, 2)
iplts = (1, 2, 1, 2)
ncolors = ('red', 'blue', 'green', 'black')
Putting all of this together, the function to output the plot is below:
def initialize_figure(irows, icols, iplts, ncolors, figsize=None):
""" """
fig = plt.figure(figsize=figsize)
for row, col, pnum, color in zip(irows, icols, iplts, ncolors):
ax1 = fig.add_subplot(row, col, pnum) # linear scale
ax2 = fig.add_subplot(row, col, pnum, frame_on=False) # logarithmic scale ticklabels
ax1.plot(X, Y, '-', color=color)
# ticks in same positions
for ax in (ax1, ax2):
ax.set_xticks(dtick['X'])
ax.set_yticks(dtick['Y'])
# remove xaxis xtick_labels and labels from top row
if row == 1:
ax1.set_xticklabels([])
ax2.set_xticklabels(dticklabel['X2'])
ax1.set_xlabel('')
ax2.set_xlabel('X2', color='gray')
# initialize xaxis xtick_labels and labels for bottom row
else:
ax1.set_xticklabels(dticklabel['X1'])
ax2.set_xticklabels([])
ax1.set_xlabel('X1', color='black')
ax2.set_xlabel('')
# linear scale on left
if col == 1:
ax1.set_yticklabels(dticklabel['Y1'])
ax1.set_ylabel('Y1', color='black')
ax2.set_yticklabels([])
ax2.set_ylabel('')
# logarithmic scale on right
else:
ax1.set_yticklabels([])
ax1.set_ylabel('')
ax2.set_yticklabels(dticklabel['Y2'])
ax2.set_ylabel('Y2', color='black')
ax1.tick_params(axis='x', colors='black')
ax1.tick_params(axis='y', colors='black')
ax2.tick_params(axis='x', colors='gray')
ax2.tick_params(axis='y', colors='gray')
ax1.xaxis.tick_bottom()
ax1.yaxis.tick_left()
ax1.xaxis.set_label_position('top')
ax1.yaxis.set_label_position('right')
ax2.xaxis.tick_top()
ax2.yaxis.tick_right()
ax2.xaxis.set_label_position('top')
ax2.yaxis.set_label_position('right')
for ax in (ax1, ax2):
ax.set_xlim([4, 14])
ax.set_ylim([6, 13])
fig.tight_layout()
plt.show()
plt.close(fig)
Calling initialize_figure(irows, icols, iplts, ncolors) produces the figure below.
I am applying the same xlim and ylim so I do not understand why the subplots are all different sizes. Also, the axis labels and axis ticklabels are not in the specified positions (since fig.add_subplot(...) indexing starts from 1 instead of 0.
What is my mistake and how can I achieve the desired result?
(In case it isn't clear, I am trying to put the xticklabels and xlabels for the linear scale on the bottom row, the xticklabels and xlabels for the logarithmic scale on the top row, the 'yticklabelsandylabelsfor the linear scale on the left side of the left column, and the 'yticklabels and ylabels for the logarithmic scale on the right side of the right column. The color='black' kwarg corresponds to the linear scale and the color='gray' kwarg corresponds to the logarithmic scale.)
The irows and icols lists inn the code do not serve any purpose. To create 4 subplots in a 2x2 grid you would loop over the range(1,5),
for pnum in range(1,5):
ax1 = fig.add_subplot(2, 2, pnum)
This might not be the only problem in the code, but as long as the subplots aren't created correctly it's not worth looking further down.