Matplotlib plot is not displaying all xticks and yticks - python-3.x

I am creating subplots in matplotlib but not all xticks and yticks are being displayed. I have tried everything from setting xlim and ylim, chainging figure size etc. The thing is this is a handson on hackerrnak and they are evaluating my output against their expected output. The 0.0 in xaxis and 1.0 on yaxis are simply not matching up. What am I doing wrong here.
Here is the code,
import matplotlib.pyplot as plt
import numpy as np
def test_generate_figure2():
np.random.seed(1000)
x = np.random.rand(10)
y = np.random.rand(10)
z = np.sqrt(x**2 + y**2)
fig = plt.figure(figsize=(8,6))
axes1 = plt.subplot(2, 2, 1, title="Scatter plot with Upper Triangle Markers")
axes1.set_xticks([0.0, 0.4, 0.8, 1.2])
axes1.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes1.set_ylim(-0.2,1.0) #Doing this still doesnot get the expected output
axes1.set_xlim(0.0,1.2)
print(axes1.get_yticks())
axes1.scatter(x, y, marker="^", s=80, c=z)
axes2 = plt.subplot(2, 2, 2, title="Scatter plot with Plus Markers")
axes2.set_xticks([0.0, 0.4, 0.8, 1.2])
axes2.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes2.scatter(x, y, marker="+", s=80, c=z)
axes3 = plt.subplot(2, 2, 3, title="Scatter plot with Circle Markers")
axes3.set_xticks([0.0, 0.4, 0.8, 1.2])
axes3.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes3.scatter(x, y, marker="o", s=80, c=z)
axes4 = plt.subplot(2, 2, 4, title="Scatter plot with Diamond Markers")
axes4.set_xticks([0.0, 0.4, 0.8, 1.2])
axes4.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes4.scatter(x, y, marker="d", s=80,c=z)
plt.tight_layout()
plt.show()
test_generate_figure2()
My Output,
Expected Output,

Your set_xlim & set_ylim approach works. You just need to set it for every subplot:
https://akuiper.com/console/5vaLIq0ZC_KO
import matplotlib.pyplot as plt
import numpy as np
def test_generate_figure2():
np.random.seed(1000)
x = np.random.rand(10)
y = np.random.rand(10)
z = np.sqrt(x**2 + y**2)
fig = plt.figure(figsize=(8,6))
axes1 = plt.subplot(2, 2, 1, title="Scatter plot with Upper Triangle Markers")
axes1.set_xticks([0.0, 0.4, 0.8, 1.2])
axes1.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes1.set_ylim(-0.2,1.0) #Doing this still doesnot get the expected output
axes1.set_xlim(0.0,1.2)
print(axes1.get_yticks())
axes1.scatter(x, y, marker="^", s=80, c=z)
axes2 = plt.subplot(2, 2, 2, title="Scatter plot with Plus Markers")
axes2.set_xticks([0.0, 0.4, 0.8, 1.2])
axes2.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes2.set_ylim(-0.2,1.0) #Doing this still doesnot get the expected output
axes2.set_xlim(0.0,1.2)
axes2.scatter(x, y, marker="+", s=80, c=z)
axes3 = plt.subplot(2, 2, 3, title="Scatter plot with Circle Markers")
axes3.set_xticks([0.0, 0.4, 0.8, 1.2])
axes3.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes3.set_ylim(-0.2,1.0) #Doing this still doesnot get the expected output
axes3.set_xlim(0.0,1.2)
axes3.scatter(x, y, marker="o", s=80, c=z)
axes4 = plt.subplot(2, 2, 4, title="Scatter plot with Diamond Markers")
axes4.set_xticks([0.0, 0.4, 0.8, 1.2])
axes4.set_yticks([-0.2, 0.2, 0.6, 1.0])
axes4.set_ylim(-0.2,1.0) #Doing this still doesnot get the expected output
axes4.set_xlim(0.0,1.2)
axes4.scatter(x, y, marker="d", s=80,c=z)
plt.tight_layout()
plt.show()
test_generate_figure2()

Related

How to group already grouped bar?

Let's assume I have pandas dataframe looking like this:
import pandas as pd
df=pd.DataFrame({'run-1a':[0.3, 0.3, 0.4, 0.4], 'run-1b':[0.3, 0.3, 0.4, 0.5],"run-2a":[0.7, 0.9, 0.8, 0.9],"run-2b":[0.2, 0.3, 0.5, 0.5], "Person":["person1","person2","person3","person4"]})
df
and now I make a bar out of it:
import matplotlib.pyplot as plt
color_list = ['b','lightskyblue', 'g','lightgreen']
ax = df.plot(x='Person',y=['run-1a','run-1b','run-2a','run-2b'], kind='bar', color=color_list)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xticks(rotation="horizontal")
plt.show()
and the result looks like this:
However I'd like to group run-1a with run1b and run2a with run2b for each of the persons. When I googled some solutions, I found only how to make grouped bars looking like my plot above, which I don't need.
This is sketch how my plot should look like:
Is there an option how to group it like that for each person,please?
Thank you very much.
If I understood correctly, You can just add empty bar between other bars, so that You will get 4 columns, where each two bars are separated.
import pandas as pd
df=pd.DataFrame({'':[0,0,0,0],'run-1a':[0.3, 0.3, 0.4, 0.4], 'run-1b':[0.3, 0.3, 0.4, 0.5],"run-2a":[0.7, 0.9, 0.8, 0.9],"run-2b":[0.2, 0.3, 0.5, 0.5], "Person":["person1","person2","person3","person4"]})
color_list = ['b','lightskyblue','w', 'g','lightgreen']
ax = df.plot(x='Person',y=['run-1a','run-1b','','run-2a','run-2b'], kind='bar', color=color_list)
plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))
plt.xticks(rotation="horizontal")
plt.show()

gridsearch.predict_proba results in list rather than array

I followed example and tried to use gridsearch with a random forest classifier to generate roc_auc_score, however, the y_prob=model.predict_proba(X_test)
I generated was in list (two arrays) rather than one. So I was wondering what went wrong here.
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import label_binarize
from sklearn.model_selection import GridSearchCV, StratifiedShuffleSplit
from sklearn.metrics import roc_auc_score
X = np.random.rand(50,10)
y = np.random.permutation([1] * 25 + [0] * 25)
y= label_binarize(y, classes=[0, 1])
y= np.hstack((1-y, y))
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=7)
index_split = sss.split(X, y)
train_index = []
test_index = []
for train_ind, test_ind in index_split:
train_index.extend(train_ind)
test_index.extend(test_ind)
data_train = X[train_index]
out_train = y[train_index]
data_test = X[test_index]
out_test = y[test_index]
rf = RandomForestClassifier()
grids = {
'n_estimators': [10, 50, 100, 200],
'max_features': ['auto', 'sqrt', 'log2'],
'criterion': ['gini', 'entropy']
}
rf_grids_searched = GridSearchCV(rf,
grids,
scoring = "roc_auc",
n_jobs = -1,
refit=True,
cv = 5,
verbose=10)
rf_grids_searched.fit(data_train, out_train)
rf_best = rf_grids_searched.best_estimator_
y_prob=rf_best.predict_proba(data_test)
print(roc_auc_score(out_test, y_prob))
my result:
array([[0.5, 0.5],
[0.5, 0.5],
[0.7, 0.3],
[0.3, 0.7],
[0.7, 0.3],
[0.5, 0.5],
[0.1, 0.9],
[0.6, 0.4],
[0.6, 0.4],
[0.4, 0.6]]), array([[0.5, 0.5],
[0.5, 0.5],
[0.3, 0.7],
[0.7, 0.3],
[0.3, 0.7],
[0.5, 0.5],
[0.9, 0.1],
[0.4, 0.6],
[0.4, 0.6],
[0.6, 0.4]])]
expected results with probability of [0,1]:
array([[0.5, 0.5],
[0.5, 0.5],
[0.7, 0.3],
[0.3, 0.7],
[0.7, 0.3],
[0.5, 0.5],
[0.1, 0.9],
[0.6, 0.4],
[0.6, 0.4],
I also tried not to binarize y in the first place and then train gridsearch to get the following array y_prob. Later, I binarize y_test to match the dimension of y_prob and get the score. I was wondering if the sequence is correct?
code:
out_test1= label_binarize(out_test, classes=[0, 1])
out_test1= np.hstack((1-out_test1, out_test1))
print(roc_auc_score(out_test1, y_prob))
array([[0.6, 0.4],
[0.5, 0.5],
[0.6, 0.4],
[0.5, 0.5],
[0.7, 0.3],
[0.3, 0.7],
[0.8, 0.2],
[0.4, 0.6],
[0.8, 0.2],
[0.4, 0.6]])
The grid search's predict_proba method is just a dispatch to the best estimator's predict_proba. And from the docstring for RandomForestClassifier.predict_proba (emphasis added):
Returns
p : ndarray of shape (n_samples, n_classes), or a list of n_outputs
such arrays if n_outputs > 1. ...
Since you've specified two outputs (two columns in y), you get predicted probabilities for each of the two classes for each of the two targets.

matplotlib shift pcolormesh plot to symmetrized coordinates

I have some 2D data with x and y coordinates both within [0,1], plotted using pcolormesh.
Now I want to symmetrize the plot to [-0.5, 0.5] for both x and y coordinates. In Matlab I was able to achieve this by changing x and y from e.g. [0, 0.2, 0.4, 0.6, 0.8] to [0, 0.2, 0.4, -0.4, -0.2], without rearranging the data. However, with pcolormesh I cannot get the desired result.
A minimum example is shown below, with data represented simply by x+y:
import matplotlib.pyplot as plt
import numpy as np
x,y = np.mgrid[0:1:5j,0:1:5j]
fig,(ax1,ax2,ax3) = plt.subplots(1,3,figsize=(9,3.3),constrained_layout=1)
# original plot spanning [0,1]
img1 = ax1.pcolormesh(x,y,x+y,shading='auto')
# shift x and y from [0,1] to [-0.5,0.5]
x = x*(x<0.5)+(x-1)*(x>0.5)
y = y*(y<0.5)+(y-1)*(y>0.5)
img2 = ax2.pcolormesh(x,y,x+y,shading='auto') # similar code works in Matlab
# for this specific case, the following is close to the desired result, I can just rename x and y tick labels
# to [-0.5,0.5], but in general data is not simply x+y
img3 = ax3.pcolormesh(x+y,shading='auto')
fig.colorbar(img1,ax=[ax1,ax2,ax3],orientation='horizontal')
The corresponding figure is below, any suggestion on what is missed would be appreciated!
Let's look at what you want to achieve in a 1D example.
You have x values between 0 and 1 and a dummy function f(x) = 20*x to produce some values.
# x = [0, .2, .4, .6, .8] -> [0, .2, .4, -.4, -.2] -> [-.4, .2, .0, .2, .4])
# fx = [0, 4, 8, 12, 16] -> [0, 4, 8, 12, 16] -> [ 12, 16, 0, 4, 8]
# ^ only flip and shift x not fx ^
You could use np.roll() to achieve the last operation.
I used n=14 to make the result better visible and show that this approach works for arbitrary n.
import numpy as np
import matplotlib.pyplot as plt
n = 14
x, y = np.meshgrid(np.linspace(0, 1, n, endpoint=False),
np.linspace(0, 1, n, endpoint=False))
z = x + y
x_sym = x*(x <= .5)+(x-1)*(x > .5)
# array([[ 0. , 0.2, 0.4, -0.4, -0.2], ...
x_sym = np.roll(x_sym, n//2, axis=(0, 1))
# array([[-0.4, -0.2, 0. , 0.2, 0.4], ...
y_sym = y*(y <= .5)+(y-1)*(y > .5)
y_sym = np.roll(y_sym, n//2, axis=(0, 1))
z_sym = np.roll(z, n//2, axis=(0, 1))
# array([[1.2, 1.4, 0.6, 0.8, 1. ],
# [1.4, 1.6, 0.8, 1. , 1.2],
# [0.6, 0.8, 0. , 0.2, 0.4],
# [0.8, 1. , 0.2, 0.4, 0.6],
# [1. , 1.2, 0.4, 0.6, 0.8]])
fig, (ax1, ax2) = plt.subplots(1, 2)
img1 = ax1.imshow(z, origin='lower', extent=(.0, 1., .0, 1.))
img2 = ax2.imshow(z_sym, origin='lower', extent=(-.5, .5, -.5, .5))

Matplotlib cannot produce a plot with errorbars in a timely manner despite the fact that MWE can do it

I have written a code that shows takes forever to plot an errorbar plot using matplotlib package. However, when I actually make it into a MWE where I have evaluated x, y, xerr and yerr and manually insert them into the errorbar line, the code runs smoothly. These values are exactly the values taken from the original code. I don't know why the original code doesn't produce plots while the following MWE actually produces it. Here is the example that is working:
import numpy as np
import matplotlib.pyplot as plt
from numpy import *
from matplotlib.ticker import MaxNLocator
my_x=np.array([20.05, 20.15, 20.25, 20.35, 20.45, 20.55, 20.65, 20.75, 20.85, 20.95, 21.05, 21.15, 21.25, 21.35, 21.45, 21.55, 21.65, 21.75, 21.85, 21.95, 22.1, 22.3])
my_x_err_lolim, my_x_err_uplim=np.array([0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0.1]), np.array([0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.05, 0.1, 0.1])
my_y=np.array([0.6372361, 0.74868714, 0.71499067, 0.73164494, 0.66726811, 0.58116623, 0.50617463, 0.42101772, 0.34221605, 0.29805772, 0.21592373, 0.17151431, 0.14598236, 0.09210864, 0.06828108, 0.05423759, 0.04408597, 0.02110087, 0.01564229, 0.00350187, 0.00175509, 0.00175509])
my_y_err_lolim, my_y_err_uplim=np.array([0.23176874, 0.27230452, 0.2600488, 0.26610612, 0.24269166, 0.2113756, 0.18593597, 0.1546548, 0.12570814, 0.10948721, 0.08039986, 0.06386388, 0.05528444, 0.0355843, 0.02698123, 0.02197097, 0.01785867, 0.00958897, 0.00859537, 0.00445642, 0.0022335, 0.0022335]), np.array([0.23176874, 0.27230452, 0.2600488, 0.26610612, 0.24269166, 0.2113756, 0.18593597, 0.1546548, 0.12570814, 0.10948721, 0.08039986, 0.06386388, 0.05528444, 0.0355843, 0.02698123, 0.02197097, 0.01785867, 0.00958897, 0.00859537, 0.00445642, 0.0022335, 0.0022335])
pdf_HI, (ax1) = plt.subplots(1, 1, sharex=True, sharey=False, figsize=(14,20))
ax1.errorbar(my_x, my_y, xerr = array([my_x_err_lolim, my_x_err_uplim]), yerr = array([my_y_err_lolim, my_y_err_uplim]), ecolor = 'blue', fmt= 'b.', elinewidth = 1, capsize = 2, linestyle = '', label='')
ax1.vlines(20.3, ymin=-0.1, ymax=3, colors='blue', linestyles='dotted', linewidths=2, label='DLA Column Density')
ax1.hlines(1, xmin=0, xmax=22.3, colors='black', linestyles='dotted', linewidths=2, label='')
ax1.hlines(0, xmin=0, xmax=22.3, colors='black', linestyles='dotted', linewidths=2, label='')
ax1.legend(loc='upper right', ncol=6, fontsize=15)
ax1.set_xlim([19, 23])
ax1.set_ylim([-0.1, 1.1])
ax1.yaxis.set_major_locator(MaxNLocator(prune='upper'))
ax1.set_ylabel('$Y$', fontsize=22)
ax1.set_xlabel('X$', fontsize=22)
ax1.set_xscale("linear", nonposx='clip')
ax1.set_yscale("linear", nonposy='clip')
ax1.xaxis.set_tick_params(width=2)
ax1.yaxis.set_tick_params(width=2)
ax1.get_xaxis().tick_bottom()
ax1.get_yaxis().tick_left()
pdf_HI.subplots_adjust(top=0.996,bottom=0.101,left=0.076,right=0.997)
plt.setp([a.get_xticklabels() for a in pdf_HI.axes[:-1]], visible=True)
pdf_HI.tight_layout()
plt.show()
I have printed out all x, y, xerr and yerr to make sure they are all in the same consistent patter. So, what is so special about the MWE that makes it work?

Python interpolation and extracting value of z for x and y?

Would you please help
I have this data where z is a function for specific x and y
xs = [0.15, 0.35, 0.5, 0.67, 0.8]
ys = [0.01,0.01, 0.01, 0.01, 0.01]
z = [0.75, 0.83, 1.00, 0.92, 0.91]
I arranged the values in this shape
How can I do interpolation for the points so I can call z value later different than the the one I have?
A simple search would have helped already.
Your problem is basically the example of the
scipy.interpolate.interp2d documentation.
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy import interpolate
import numpy as np
xs = [ 0.15, 0.35, 0.5, 0.67, 0.8 ]
ys = [ 0.01, 0.05, 0.1, 0.2, 0.3 ]
zz = np.array( [
0.75, 0.83, 1.00, 0.92, 0.91,
0.75, 0.82, 0.87, 0.88, 0.88,
0.74, 0.81, 0.84, 0.83, 0.83,
0.72, 0.76, 0.77, 0.76, 0.76,
0.72, 0.72, 0.72, 0.72, 0.72
] ).reshape( ( 5, 5 ) )
xx, yy = np.meshgrid( xs, ys )
f = interpolate.interp2d( xx, yy, zz, kind='cubic' )
fig = plt.figure()
ax = fig.add_subplot( 1, 1, 1, projection='3d' )
ax.plot_surface( xx, yy, zz)
x2 = np.linspace( .15,.8,50 )
y2 = np.linspace( .01,.3,50 )
xx2, yy2 = np.meshgrid( x2, y2 )
zz2 = f( x2, y2 )
fig2 = plt.figure()
bx = fig2.add_subplot( 1, 1, 1, projection='3d' )
bx.plot_surface( xx2, yy2, zz2 )
plt.show()
providing the original data
and the cubic interpolation on a 50 by 50 grid
tck = interpolate.bisplrep(x, y, z, s=0)
def givemeZ(x,y):
return interpolate.bisplev(x,y,tck)
Now by running the code, it will give z for specific x and y.
This can be used without plot. just put it under the values and make sure that the values are arranged in the same way

Resources