Why is Python matplot not starting from the point where my Data starts [duplicate] - python-3.x

So currently learning how to import data and work with it in matplotlib and I am having trouble even tho I have the exact code from the book.
This is what the plot looks like, but my question is how can I get it where there is no white space between the start and the end of the x-axis.
Here is the code:
import csv
from matplotlib import pyplot as plt
from datetime import datetime
# Get dates and high temperatures from file.
filename = 'sitka_weather_07-2014.csv'
with open(filename) as f:
reader = csv.reader(f)
header_row = next(reader)
#for index, column_header in enumerate(header_row):
#print(index, column_header)
dates, highs = [], []
for row in reader:
current_date = datetime.strptime(row[0], "%Y-%m-%d")
dates.append(current_date)
high = int(row[1])
highs.append(high)
# Plot data.
fig = plt.figure(dpi=128, figsize=(10,6))
plt.plot(dates, highs, c='red')
# Format plot.
plt.title("Daily high temperatures, July 2014", fontsize=24)
plt.xlabel('', fontsize=16)
fig.autofmt_xdate()
plt.ylabel("Temperature (F)", fontsize=16)
plt.tick_params(axis='both', which='major', labelsize=16)
plt.show()

There is an automatic margin set at the edges, which ensures the data to be nicely fitting within the axis spines. In this case such a margin is probably desired on the y axis. By default it is set to 0.05 in units of axis span.
To set the margin to 0 on the x axis, use
plt.margins(x=0)
or
ax.margins(x=0)
depending on the context. Also see the documentation.
In case you want to get rid of the margin in the whole script, you can use
plt.rcParams['axes.xmargin'] = 0
at the beginning of your script (same for y of course). If you want to get rid of the margin entirely and forever, you might want to change the according line in the matplotlib rc file:
axes.xmargin : 0
axes.ymargin : 0
Example
import seaborn as sns
import matplotlib.pyplot as plt
tips = sns.load_dataset('tips')
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
tips.plot(ax=ax1, title='Default Margin')
tips.plot(ax=ax2, title='Margins: x=0')
ax2.margins(x=0)
Alternatively, use plt.xlim(..) or ax.set_xlim(..) to manually set the limits of the axes such that there is no white space left.

If you only want to remove the margin on one side but not the other, e.g. remove the margin from the right but not from the left, you can use set_xlim() on a matplotlib axes object.
import seaborn as sns
import matplotlib.pyplot as plt
import math
max_x_value = 100
x_values = [i for i in range (1, max_x_value + 1)]
y_values = [math.log(i) for i in x_values]
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))
sn.lineplot(ax=ax1, x=x_values, y=y_values)
sn.lineplot(ax=ax2, x=x_values, y=y_values)
ax2.set_xlim(-5, max_x_value) # tune the -5 to your needs

Related

Matplotlib Formatting X-Axis Shows Jan-1-1970

New to Matplotlib, trying to format dates on x axis. If I just use plt.xticks, the date is correct. But if I try to format the values using ax.xaxis.set_major_formatter, it changes my axis values to Jan-1-1970 based. I'm sure this is newbie stuff, thx for the bootstrap. (BTW, running in JupyterLabs notebook).
import pandas as pd
from datetime import date
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
%matplotlib inline
# data to plot
#df_plot = df_dts[df_dts.dt>"8/17/2020"]
df_plot = pd.DataFrame({
'dt': [date(2020,8,19), date(2020,8,20), date(2020,8,21), date(2020,8,22)],
'open_cnt': [2,15,2,7],
'close_cnt': [0,2,11,0]
})
# create plot
fig, ax = plt.subplots()
fig.set_size_inches(10, 5, forward=True)
index = np.arange(len(df_plot))
bar_width = 0.35
opacity = 0.8
rects1 = plt.bar(index, df_plot.open_cnt, bar_width, alpha=opacity, color='orange', label='Open')
rects2 = plt.bar(index + bar_width, df_plot.close_cnt, bar_width, alpha=opacity, color='g', label='Close')
plt.xlabel('Date')
plt.ylabel('Workitems')
plt.title('Open & Close Rates')
plt.xticks(index + bar_width/2, df_plot.dt)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b-%d-%y'))
plt.show()
Instead of messing up with formatter, set the index in your DataFrame
to proper text representation of your dates and call plot.bar on this
object:
fig, ax = plt.subplots(figsize=(10,5))
ax = df_plot.set_index(df_plot.dt.map(lambda s: s.strftime('%b-%d-%y')))\
.plot.bar(ax=ax, legend=False, title='Open & Close Rates', rot=0,
color=['orange', 'green'])
ax.set_xlabel('Date')
ax.set_ylabel('Workitems');
For your data I got the following picture:
As you can see, my code is more concise than yours.
If you don't want to change your original code much, you can simply do the transformation when you set the xticks.
df_plot['dt'] = pd.to_datetime(df_plot['dt'], format='%Y-%m-%d')
plt.xticks(index + bar_width/2, df_plot['dt'].dt.strftime('%b-%d-%y'))

Matplotlib get all axes artist objects for ArtistAnimation?

I am trying to make an animation using ArtistAnimation like this:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
fig, ax = plt.subplots()
ims = []
for i in range(60):
x = np.linspace(0,i,1000)
y = np.sin(x)
im = ax.plot(x,y, color='black')
ims.append(im)
ani = animation.ArtistAnimation(fig, ims, interval=50, blit=True,
repeat_delay=1000)
plt.show()
This animates a sine wave growing across the figure. Currently I'm just adding the Lines2D object returned by ax.plot() to ims. However, I would like to potentially draw multiple overlapping plots on the Axes and adjust the title, legend and x-axis range for each frame. How do I get an object that I can add to ims after plotting and making all the changes I want for each frame?
The list you supply to ArtistAnimation should be a list of lists of artists, one list per frame.
artist_list = [[line1a, line1b, title1], [line2a, line2b, title2], ...]
where the first list is shown in the first frame, the second list in the second frame etc.
The reason your code works is that ax.plot returns a list of lines (in your case only a list of a single line).
In any case, the following might be a more understandable version of your code where an additional text is animated.
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
fig, ax = plt.subplots()
artist_list = []
for i in range(60):
x = np.linspace(0,i,1000)
y = np.sin(x)
line, = ax.plot(x,y, color='black')
text = ax.text(i,0,i)
artist_list.append([line, text])
ani = animation.ArtistAnimation(fig, artist_list, interval=50, blit=True,
repeat_delay=1000)
plt.show()
In general, it will be hard to animate changing axes limits with ArtistAnimation, so if that is an ultimate goal consider using a FuncAnimation instead.

Seaborn barplot with two y-axis

considering the following pandas DataFrame:
labels values_a values_b values_x values_y
0 date1 1 3 150 170
1 date2 2 6 200 180
It is easy to plot this with Seaborn (see example code below). However, due to the big difference between values_a/values_b and values_x/values_y, the bars for values_a and values_b are not easily visible (actually, the dataset given above is just a sample and in my real dataset the difference is even bigger). Therefore, I would like to use two y-axis, i.e., one y-axis for values_a/values_b and one for values_x/values_y. I tried to use plt.twinx() to get a second axis but unfortunately, the plot shows only two bars for values_x and values_y, even though there are at least two y-axis with the right scaling. :) Do you have an idea how to fix that and get four bars for each label whereas the values_a/values_b bars relate to the left y-axis and the values_x/values_y bars relate to the right y-axis?
Thanks in advance!
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
columns = ["labels", "values_a", "values_b", "values_x", "values_y"]
test_data = pd.DataFrame.from_records([("date1", 1, 3, 150, 170),\
("date2", 2, 6, 200, 180)],\
columns=columns)
# working example but with unreadable values_a and values_b
test_data_melted = pd.melt(test_data, id_vars=columns[0],\
var_name="source", value_name="value_numbers")
g = sns.barplot(x=columns[0], y="value_numbers", hue="source",\
data=test_data_melted)
plt.show()
# values_a and values_b are not displayed
values1_melted = pd.melt(test_data, id_vars=columns[0],\
value_vars=["values_a", "values_b"],\
var_name="source1", value_name="value_numbers1")
values2_melted = pd.melt(test_data, id_vars=columns[0],\
value_vars=["values_x", "values_y"],\
var_name="source2", value_name="value_numbers2")
g1 = sns.barplot(x=columns[0], y="value_numbers1", hue="source1",\
data=values1_melted)
ax2 = plt.twinx()
g2 = sns.barplot(x=columns[0], y="value_numbers2", hue="source2",\
data=values2_melted, ax=ax2)
plt.show()
This is probably best suited for multiple sub-plots, but if you are truly set on a single plot, you can scale the data before plotting, create another axis and then modify the tick values.
Sample Data
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import numpy as np
columns = ["labels", "values_a", "values_b", "values_x", "values_y"]
test_data = pd.DataFrame.from_records([("date1", 1, 3, 150, 170),\
("date2", 2, 6, 200, 180)],\
columns=columns)
test_data_melted = pd.melt(test_data, id_vars=columns[0],\
var_name="source", value_name="value_numbers")
Code:
# Scale the data, just a simple example of how you might determine the scaling
mask = test_data_melted.source.isin(['values_a', 'values_b'])
scale = int(test_data_melted[~mask].value_numbers.mean()
/test_data_melted[mask].value_numbers.mean())
test_data_melted.loc[mask, 'value_numbers'] = test_data_melted.loc[mask, 'value_numbers']*scale
# Plot
fig, ax1 = plt.subplots()
g = sns.barplot(x=columns[0], y="value_numbers", hue="source",\
data=test_data_melted, ax=ax1)
# Create a second y-axis with the scaled ticks
ax1.set_ylabel('X and Y')
ax2 = ax1.twinx()
# Ensure ticks occur at the same positions, then modify labels
ax2.set_ylim(ax1.get_ylim())
ax2.set_yticklabels(np.round(ax1.get_yticks()/scale,1))
ax2.set_ylabel('A and B')
plt.show()

Seaborn right ytick [duplicate]

This question already has answers here:
multiple axis in matplotlib with different scales [duplicate]
(3 answers)
Closed 5 years ago.
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
d = ['d1','d2','d3','d4','d5','d6']
value = [111111, 222222, 333333, 444444, 555555, 666666]
y_cumsum = np.cumsum(value)
sns.barplot(d, value)
sns.pointplot(d, y_cumsum)
plt.show()
I'm trying to make pareto diagram with barplot and pointplot. But I can't print percentages to the right side ytick. By the way, if I manuplate yticks it overlaps itself.
plt.yticks([1,2,3,4,5])
overlaps like in the image.
Edit: I mean that I want to quarter percentages (0, 25%, 50%, 75%, 100%) on the right hand side of the graphic, as well.
From what I understood, you want to show the percentages on the right hand side of your figure. To do that, we can create a second y axis using twinx(). All we need to do then is to set the limits of this second axis appropriately, and set some custom labels:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
d = ['d1','d2','d3','d4','d5','d6']
value = [111111, 222222, 333333, 444444, 555555, 666666]
fig, ax = plt.subplots()
ax2 = ax.twinx() # create a second y axis
y_cumsum = np.cumsum(value)
sns.barplot(d, value, ax=ax)
sns.pointplot(d, y_cumsum, ax=ax)
y_max = y_cumsum.max() # maximum of the array
# find the percentages of the max y values.
# This will be where the "0%, 25%" labels will be placed
ticks = [0, 0.25*y_max, 0.5*y_max, 0.75*y_max, y_max]
ax2.set_ylim(ax.get_ylim()) # set second y axis to have the same limits as the first y axis
ax2.set_yticks(ticks)
ax2.set_yticklabels(["0%", "25%","50%","75%","100%"]) # set the labels
ax2.grid("off")
plt.show()
This produces the following figure:

Can't add matplotlib colorbar ticks

I am trying to add ticks and labels to a color bar, but it just doesn't seem to show up in the output. I have tried two approaches(as shown in the code below). Second appraoch was to do as shown in another question on Stack Overflow here: How to add Matplotlib Colorbar Ticks.
I must be overlooking something very simple here as I am a beginner in Matplotlib and Python.
I have managed to obtain the color bar, but the ticks I want just don't show up. Any help here will be greatly appreciated as I have been stuck at it for hours after trying and searching.
Here is the code I used to generate a heatmap using hexbin over a basemap.
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
from matplotlib.colors import LinearSegmentedColormap
from matplotlib import cm
#Loading data from CSV file
DATA_FILE = '....../Population_data.csv'
roc_data = pd.read_csv(DATA_FILE)
roc_data.head()
#Creating figure window
fig = plt.figure(figsize=(14,10))
ax = fig.add_subplot(111)
#Drawing the basemap
m = Basemap(projection='merc', lat_0=43.12, lon_0=-77.626,
resolution = 'i',llcrnrlon=-78.236,
llcrnrlat=42.935,
urcrnrlon=-77.072,
urcrnrlat=43.349)
m.drawcoastlines()
m.drawcounties(zorder=20, color='red')
m.drawcountries()
m.drawmapboundary()
#plotting the heatmap using hexbin
x, y = m(roc_data['Longitude'].values, roc_data['Latitude'].values)
values = roc_data['Total(20-64)']
m.hexbin(x, y, gridsize = 125, bins = 'log', C = values, cmap = cm.Reds)
#Defining minimum, mean and maximum population values
max_p = roc_data['Total(20-64)'].max()
min_p = roc_data['Total(20-64)'].min()
mean_p = roc_data['Total(20-64)'].mean()
#Adding Colorbar
cb = m.colorbar(location = 'bottom', format = '%d', label = 'Population by Census Blocks')
#setting ticks
#cb.set_ticks([48, 107, 1302]) #First approach, didn't work
#cb.set_ticklabels(['Min', 'Mean', 'Max'])
cb.set_ticks([min_p, mean_p, max_p]) #Second appraoch, assumed ticks and tick labels should be same
cb.set_ticklabels([min_p, mean_p, max_p]) #from the above mentioned stackoverflow question, but did't work
plt.show()
The output I get by using the first or second approach for colorbar ticks is the same. It is as here:
Heatmap and colorbar with no ticks and labels
I want the minimum, median and maximum population values (48, 107 and 1302) to be shown on the colorbar with the labels Min, Mean and Max. Thank you for your time
When plotting the hexbin plot with mode bins = 'log', the colors will be plotted with a logarithmic scaling. This means that if the data minimum, mean and maximum are min, mean and max, their values on the logarithmically scaled colorbar are log10(min), log10(mean), log10(max).
The ticks on the colorbar therefore needs to be set with the log values. The ticklabels can be set to any value. However I would think that simply putting something like "mean" on a logarithmic scale may not be too informative.
A particularity is that the minimum of the colorbar is actually log10(min+1). The +1 is due to the log which is negative below 1.
Here is a complete example.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
np.random.seed(42)
from mpl_toolkits.basemap import Basemap
from matplotlib import cm
lon = -78.236+np.random.rand(1000)*(-77.072+78.236)
lat = 42.935 + np.random.rand(1000)*(43.349-42.935)
t = 99+np.random.normal(10,20,1000)
t[:50] = np.linspace(48,1302)
roc_data = pd.DataFrame({'Longitude':lon, 'Latitude':lat, "T":t })
#Creating figure window
fig = plt.figure(figsize=(8,6))
ax = fig.add_subplot(111)
#Drawing the basemap
m = Basemap(projection='merc', lat_0=43.12, lon_0=-77.626,
resolution = 'i',llcrnrlon=-78.236,
llcrnrlat=42.935,
urcrnrlon=-77.072,
urcrnrlat=43.349)
m.drawcoastlines()
m.drawcounties(zorder=20, color='red')
m.drawcountries()
m.drawmapboundary()
#plotting the heatmap using hexbin
x, y = m(roc_data['Longitude'].values, roc_data['Latitude'].values)
values = roc_data['T']
m.hexbin(x, y, gridsize = 125, bins = 'log', C = values, cmap = cm.Reds) #bins = 'log',
#Defining minimum, mean and maximum population values
max_p = roc_data['T'].max()
min_p = roc_data['T'].min()
mean_p = roc_data['T'].mean()
print [min_p, mean_p, max_p]
print [np.log10(min_p), np.log10(mean_p), np.log10(max_p)]
#Adding Colorbar
cb = m.colorbar(location = 'bottom', format = '%d', label = 'Population by Census Blocks') #format = '%d',
#setting ticks
cb.set_ticks([np.log10(min_p+1), np.log10(mean_p), np.log10(max_p)])
cb.set_ticklabels(['Min\n({:.1f})'.format(min_p), 'Mean\n({:.1f})'.format(mean_p), 'Max\n({:.1f})'.format(max_p)])
plt.tight_layout()
plt.show()

Resources