Given the following data frame:
import pandas as pd
import numpy as np
df=pd.DataFrame({'A':['A','B','C','D','E','F','G','H','I','J','K','L','M','N'],
'B':[20,25,39,43,32,17,40, 40, 34, 56, 76, 23, 54, 34]})
I'd like to create a bubble chart where each y-tick label is the same color as its respective dot. The code below works great if I only had say 4 rows of data and 4 colors in my color list. However, for some reason, when I have more than 9 or so rows of data (and colors in my color list), it only takes the first 9 elements of colors in the l.set_color(i) line. Any thoughts as to why this occurs? Is it a limitation of zip when iterating? Related to the data frame?
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
labels=df.A[::-1]
vals=df.B[::-1]
ind=np.arange(len(labels))
colors1=['r','g','b','c','y','y','y','g','b','c','y','y','y','g']
fig, ax = plt.subplots(1, 1, figsize = (6,4))
for i in ind:
plt.plot(vals[i],i,marker='o',markeredgecolor='none', markersize=17, alpha=.5, linestyle='none', color=colors1[i])
ax.tick_params(axis='x',which='both',bottom='on',top='off',color='grey',labelcolor='grey')
ax.tick_params(axis='y',which='both',left='off',right='off',color='grey',labelcolor='grey')
ax.spines['top'].set_visible(False);ax.spines['right'].set_visible(False);
ax.spines['bottom'].set_visible(False);ax.spines['left'].set_visible(False)
ax.set_xlim([0,50])
ax.set_ylim([min(ind)-1,max(ind)+1])
fontcols=colors1[::-1]
for l,i in zip(ax.yaxis.get_ticklabels(),fontcols):
l.set_color(i)
l.set_fontsize(11)
print(l,i) #This shows that only 9 members are being colored for some reason
plt.yticks(ind,labels,fontsize=14)
plt.show()
Thanks in advance!
You just need to set the yticks before you try and set the colours. As it is, matplotlib creates 9 ticks by default, you set their colours, then you tell it you want 14 ticks after. With just a little reordering, it all works:
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import pandas as pd
import numpy as np
df=pd.DataFrame({'A':['A','B','C','D','E','F','G','H','I','J','K','L','M','N'],
'B':[20,25,39,43,32,17,40, 40, 34, 56, 76, 23, 54, 34]})
labels=df.A[::-1]
vals=df.B[::-1]
ind=np.arange(len(labels))
colors1=['r','g','b','c','y','y','y','g','b','c','y','y','y','g']
fig, ax = plt.subplots(1, 1, figsize = (6,4))
for i in ind:
plt.plot(vals[i],i,marker='o',markeredgecolor='none', markersize=17, alpha=.5, linestyle='none', color=colors1[i])
ax.tick_params(axis='x',which='both',bottom='on',top='off',color='grey',labelcolor='grey')
ax.tick_params(axis='y',which='both',left='off',right='off',color='grey',labelcolor='grey')
ax.spines['top'].set_visible(False);ax.spines['right'].set_visible(False);
ax.spines['bottom'].set_visible(False);ax.spines['left'].set_visible(False)
ax.set_xlim([0,80]) # I increased this to fit all your data in
ax.set_ylim([min(ind)-1,max(ind)+1])
fontcols=colors1 # NOTE: you don't need to reverse this
plt.yticks(ind,labels,fontsize=14)
for l,i in zip(ax.yaxis.get_ticklabels(),fontcols):
l.set_color(i)
l.set_fontsize(11)
print(l,i)
plt.show()
Also note, you don't need to reverse the colour list before setting the tick colours
Related
I am attempting to run a for loop in order to plot multiple scatter plots. For the code that I have, I only get one plot at the end. How to go about generating the correct row x column plots to save?
I have checked out some of the answers given here and here, but it does not work for me. Is there a more optimum way to generate these plots?
Here is my code:
from sklearn.datasets import make_classification
import seaborn as sns
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
# Generate noisy Data
num_trainsamples = 500
num_testsamples = 50
X_train,y_train = make_classification(n_samples=num_trainsamples,
n_features=240,
n_informative=9,
n_redundant=0,
n_repeated=0,
n_classes=10,
n_clusters_per_class=1,
class_sep=9,
flip_y=0.2,
#weights=[0.5,0.5],
random_state=17)
n_components=2
n_neighbours=[1, 5, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 25, 30]
local_connectivity=2
min_dist=0.15
target_names = ['t1', 't2', 't3', 't4', 't5', 't6', 't7', 't8', 't9', 't10']
plt.figure(figsize=(15,15))
for i in range(0, len(n_neighbours)):
plt.subplot(3,5,i+1)
plt.clf()
plt.scatter(
X_train[:, 0],
X_train[:, 1],
s = 20,
c=y_train,
cmap=plt.cm.nipy_spectral,
edgecolor="k",
linewidths=0.75,
label=y_train,
alpha=0.45,
)
plt.title(f'n_components = {n_components}, n_neighbors = {n_neighbours[i]}, local_conn = {local_connectivity}, min_dist = {min_dist}')
cbar = plt.colorbar(boundaries=np.arange(11)-0.5)
cbar.set_ticks(np.arange(10))
cbar.set_ticklabels(target_names)
The reason for you seeing just one plot is the line plt.clf(). This command tells matplotlib to clear current figure. So, each time you loop through the code, it clear the previous figure and so, you see just the last one. Commenting that line will give you the below figure, which is what I think you are looking for...
PLOT
I have the 5 plots generated by the code below. What i need is a quick way to only rename the title of the 3th plot from dataset(100,33) to dataset-trial. Whats the fasted way to do that?
import numpy as np
import matplotlib.pyplot as plt
ratios = [(100, 2), (100, 20),(100,33),(100, 40), (100, 80)]
plt.figure(figsize = (20,6))
for j,i in enumerate(ratios):
plt.subplot(1, 5, j+1)
X_p=np.random.normal(0,0.05,size=(i[0],2))
X_n=np.random.normal(0.13,0.02,size=(i[1],2))
y_p=np.array([1]*i[0]).reshape(-1,1)
y_n=np.array([0]*i[1]).reshape(-1,1)
X=np.vstack((X_p,X_n))
y=np.vstack((y_p,y_n))
plt.title("dataset" + str(j+1) +str(i))
plt.scatter(X_p[:,0],X_p[:,1])
plt.scatter(X_n[:,0],X_n[:,1],color='red')
plt.show()
Thanks
in my code, if I call pred and test object I get these results.
Pred = array([16.88414476, 33.73226078, 75.357018 , 26.79480124, 60.49103328])
test = array([20, 27, 69, 30, 62], dtype=int64)
I apply:
plt.scatter(pred,test)
How I plot both pred and test results on the graph?
so please help! , how to find the desired output.
Scatter plot would take the given two values as x and y values of the plot.
If you want to plot both of them as separate data, use plt.plot
import matplotlib.pyplot as plt
import numpy as np
Pred = np.array([16.88414476, 33.73226078, 75.357018 , 26.79480124, 60.49103328])
test = np.array([20, 27, 69, 30, 62])
plt.plot(Pred)
plt.plot(test, linestyle='--')
Use could also use the pandas plot functionality
pd.DataFrame({'pred': Pred, 'test': test}).plot()
I think it is not possible to plot 2 arrays in scatter but you can do it in a plot
from matplotlib import pyplot as plt
import numpy as np
Pred = np.array([16.88414476, 33.73226078, 75.357018 , 26.79480124, 60.49103328])
test = np.array([20, 27, 69, 30, 62])
plt.plot(Pred, label='Pred Label')
plt.plot(test, label='Test Label')
plt.legend() # To Show the the labels' names
I am trying to create a stereographic plot using Basemap offset from the north pole, but the west-east directions are apparently reversed. Is this an error in my implementation, or a bug?
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
m = Basemap(projection='stere',
lat_0=90, lon_0=270, lat_ts=(90.+35.)/2.,
llcrnrlon=150,urcrnrlon=-60,llcrnrlat=50,urcrnrlat=50)
m.drawmeridians(np.arange(0,360,30),labels=[1,1,1,0])
m.drawparallels(np.arange(-90,90,5))
m.drawcoastlines()
m.shadedrelief()
plt.show()
Here is the result:
result from script
How might I reproduce the following map (which is offset-centred, and rotated?)
Restricted map
Using an azimuthal type of map projection always requires a set of proper parameters to get a good result. In this case, Stereographic projection centered at the north pole, its proper parameters are not what you usually use when implement with PlateCaree projection which is often used. Here is a working code that you may try.
# Stereographic projection coverage
# should be specified less than half of a hemisphere
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
m = Basemap(projection='stere', resolution='c',
lat_0=90, lon_0=270, lat_ts=(90.+35.)/2., width=15000000, height=10000000)
# (width, height) is the plot extents in meters
m.drawmeridians(np.arange(0, 360, 30), labels=[1,1,1,0])
m.drawparallels(np.arange(0, 90, 10), labels=[0,0,0,1])
m.drawcoastlines()
m.shadedrelief()
plt.show()
The resulting plot (map 1):
To get other part of the world into the plotting area is achieved by recentering the map.
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
# projection center point
lon0 = 180
lat0 = 60
m = Basemap(projection='stere', resolution='c',
lat_0=lat0, lon_0=lon0, lat_ts=lat0, width=15000000, height=10000000)
m.drawmeridians(np.arange(0, 360, 30), labels=[1,0,0,1]) # left, right, top, bottom
m.drawparallels(np.arange(0, 90, 10), labels=[0,1,1,0])
m.drawcoastlines()
m.shadedrelief()
plt.show()
The output plot (map 2):
By specifying proper values of llcrnrlon, urcrnrlon, llcrnrlat, urcrnrlat, in Basemap() one can get the map extents as required. Here is another example of plot as requested by the OP.
from mpl_toolkits.basemap import Basemap
import matplotlib.pyplot as plt
import numpy as np
fig = plt.figure(figsize=(8,8))
m = Basemap(projection='stere', resolution='c',
lat_0=90, lon_0=-90, lat_ts=(90.+35.)/2.,
llcrnrlon=-142, urcrnrlon=78, llcrnrlat=19, urcrnrlat=45)
m.drawmeridians(np.arange(0, 360, 30), labels=[1,0,1,0]) # left, right, top, bottom
m.drawparallels(np.arange(0, 90, 10), labels=[0,1,0,1])
m.drawcoastlines()
m.shadedrelief()
plt.show()
The resulting plot (map 3):
i have a Pandas dataframe, which contains 6000 values ranging between 1 and 2500, i would like to create a chart that shows a predetermined x-axis, i.e. [1,2,4,8,16,32,64,128,256,512,more] and the a bar for each of these counts, i've been looking into the numpy.histogram, bit that does not let me choose the bin range (it estimates one) same goes for matplotlib.
The codes i've tried so far is,
plt.hist(df['cnt'],bins=[0,1,2,4,8,16,32,64,128,256,512])
plt.show()
np.histogram(df['cnt'])
And the plotting the np data, but i does not look like i want it.
I hope my question makes sense, else i will try to expand.
EDIT
when i run the
plt.hist(df['cnt'],bins=[0,1,2,4,8,16,32,64,128,256,512])
plt.show()
i get:
What i want:
Where the second one have been made in Excel using the data analysis histogram function. I hope this gives a better picture of what i would like to do.
I think you want a base-2 logarithmic scale on the xaxis.
You can do that by setting ax.set_xscale('log', basex=2)
You also then need to adjust the tick locations and formatting, which you can do with ax.xaxis.set_major_locator(ticker.FixedLocator(bins)) and ax.xaxis.set_major_formatter(ticker.ScalarFormatter()
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
fig, ax = plt.subplots(1)
# Some fake data
cnt = np.random.lognormal(0.5, 2.0, 6000)
# Define your bins
bins = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
# Plot the histogram
ax.hist(cnt, bins=bins)
# Set scale to base2 log
ax.set_xscale('log', basex=2)
# Set ticks and ticklabels using ticker
ax.xaxis.set_major_locator(ticker.FixedLocator(bins))
ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
plt.show()