Error when making a decision surface graph on a decision tree - python-3.x

My version of python is 3.
I have adapted this code for my data.
And when trying to make the graph, on the line
X = l_atributos[:, pair]
I have the error:
list indices must be integers or slices, not tuple
But I'm not seeing where the problem is. Could you help me?
for pairidx, pair in enumerate([[0, 1],[0, 2],[0, 3],[1, 2],[1, 3],[2, 3]]):
# We only take the two corresponding features
X = l_atributos[:, pair]
y = etiquetas
# Train
clf = DecisionTreeClassifier().fit(X, y)
# Plot the decision boundary
plt.subplot(2, 3, pairidx + 1)
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
np.arange(y_min, y_max, plot_step))
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
cs = plt.contourf(xx, yy, Z, cmap=plt.cm.RdYlBu)
plt.xlabel(['so2', 'no2', 'temp', 'viento', 'precipitacion'][pair[0]])
plt.ylabel(['so2', 'no2', 'temp', 'viento', 'precipitacion'][pair[1]])
# Plot the training points
for i, color in zip(range(n_classes), plot_colors):
idx = np.where(y == i)
plt.scatter(X[idx, 0], X[idx, 1], c=color, label=['nivel 0', 'nivel 1', 'nivel 2', 'nivel 3'][i], cmap=plt.cm.RdYlBu, edgecolor='black', s=15)
plt.suptitle("Decision surface of a decision tree using paired features")
plt.legend(loc='lower right', borderpad=0, handletextpad=0)
plt.axis("tight")
plt.figure()
clf = DecisionTreeClassifier().fit(l_atributos, etiquetas)
plot_tree(clf, filled=True)
plt.show()

The common problem in data structures used to represent the data in the example and your code.
If you print the content of iris example you may see next data:
from sklearn.datasets import load_iris
iris = load_iris()
print(iris.data)
output
array([[5.1, 3.5, 1.4, 0.2],
[4.9, 3. , 1.4, 0.2],
[4.7, 3.2, 1.3, 0.2],
...
As you can see this is the 2D array was wrapped with numpy.array(...) wrapper.
But in your example you have just 2D array:
print(l_atributos[:3])
result
[['66', '26.0', '12.1', '16.0', '0.0'], ['75', '16.0', '10.0', '26.0', '5.9'], ['61', '25.0', '8.0', '23.0', '29.4']]
If you want to use scikit's example with minimum changes just wrap your data with numpy.array:
import numpy as np
l_atributos = np.array([['66', '26.0', '12.1', '16.0', '0.0'], ['75', '16.0', '10.0', '26.0', '5.9'], ['61', '25.0', '8.0', '23.0', '29.4']])

Related

plotting of 3-d softmax function using matplotlib

I would like to illustrate a detailed function similar to the softmax function.
The formula is as shown in the image.
I wrote it in python based on the following blog. Sorry, written in Japanese.
https://www.anarchive-beta.com/entry/2020/06/07/180000
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib
from mpl_toolkits.mplot3d.axes3d import Axes3D
%matplotlib notebook
def softmax(x, a, b, d):
x = x - np.max(x, axis=-1, keepdims=True)
return (np.exp(a * (x - d))*b / np.sum(np.exp(a * (x - d)), axis=-1, keepdims=True))
# input
x_vals = np.arange(0, 10, 0.01)
x2_vals = np.arange(0, 10, 0.01)
X0_vals, X1_vals = np.meshgrid(x_vals, x2_vals)
X_vals = np.array([
X0_vals.flatten(),
X1_vals.flatten()
]).T
print(X_vals)
print(X_vals[:5])
print(X_vals.shape)
input_shape = X0_vals.shape
print(input_shape)
Y_vals = softmax(X_vals, 12, 0.8, [10,10])
print(np.round(Y_vals[:5], 3))
print(np.sum(Y_vals[:5], axis=1))
Y0_vals = np.array(Y_vals[:, 0]).reshape(input_shape)
fig = plt.figure(figsize=(5, 5))
ax = Axes3D(fig)
ax.plot_wireframe(X0_vals, X1_vals, Y0_vals, label='$y_0$')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_zlabel('$y_0$')
ax.set_title('Softmax Function', fontsize=20)
ax.legend()
ax.set_zlim(0, 1)
ax.view_init(elev=20, azim=240)
plt.show()
Y_vals = softmax(X_vals, 12, 0.8, 0)
print(np.round(Y_vals[:5], 3))
print(np.sum(Y_vals[:5], axis=1))
Y0_vals = np.array(Y_vals[:, 0]).reshape(input_shape)
fig = plt.figure(figsize=(5, 5))
ax = Axes3D(fig)
ax.plot_wireframe(X0_vals, X1_vals, Y0_vals, label='$y_0$')
ax.set_xlabel('$x_0$')
ax.set_ylabel('$x_1$')
ax.set_zlabel('$y_0$')
ax.set_title('Softmax Function', fontsize=20)
ax.legend()
ax.set_zlim(0, 1)
ax.view_init(elev=20, azim=240)
plt.show()
(1)
I try to plot the picture with a_1 = a_2 = 12, b = 0.8, c_1 = c_2 = 12. However, I could not find the differences with a_1 = a_2 = 12, b = 0.8, c_1 = c_2 = 0.
How should I write the code?
(2)
I have no idea to plot when a_1 is not equal to a_2, or c_1 is not equal to c_2.
(3)
I would like to overlap a scatterplot of any point on a function, but it does not overlap properly.
There might be similar questions, but I'm not so familiar with the 3-D plot, so I would be glad to show me the details.

How to compute the distance of data points to decision boundary when using the EllipticEnvelope of sklearn?

How can I compute the euclidean distance to the boundary decision of the EllipticEnvelope? Here is my code :
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.covariance import EllipticEnvelope
from sklearn.model_selection import train_test_split
feature, output = "temperature", "consumption"
data = pd.DataFrame(np.random.normal(0,15, size=(2355,2)), columns=[feature, output])
X = data[[feature, output]]
X_train, X_test = train_test_split(X, shuffle=True, train_size=0.8)
model = EllipticEnvelope(contamination=0.18)
model.fit(X_train)
# extract the model predictions
y_pred = pd.Series(model.predict(X), index=X.index, name="anomaly")
# define the meshgrid : X = (u,v).T
u_min, u_max = X_train.iloc[:, 0].min() - 1.5, X_train.iloc[:, 0].max() + 1.5
v_min, v_max = X_train.iloc[:, 1].min() - 1.5, X_train.iloc[:, 1].max() + 1.5
n_points = 500
u = np.linspace(u_min, u_max, n_points)
v = np.linspace(v_min, v_max, n_points)
U, V = np.meshgrid(u, v)
# evaluate the decision function on the meshgrid
W = model.decision_function(np.c_[U.ravel(), V.ravel()])
W = W.reshape(U.shape)
plt.figure(figsize=(20,6))
a = plt.contour(U, V, W, levels=[0], linewidths=2, colors="black")
b = plt.scatter(X.loc[y_pred == 1].iloc[:, 0], X.loc[y_pred == 1].iloc[:, 1], c="yellowgreen", edgecolors='k')
c = plt.scatter(X.loc[y_pred == -1].iloc[:, 0], X.loc[y_pred == -1].iloc[:, 1], c="tomato", edgecolors='k')
plt.legend([a.collections[0], b, c], ['learned frontier', 'regular observations', 'abnormal observations'], bbox_to_anchor=(1.05, 1))
plt.axis('tight')
plt.show()
Edits
I am able to get the decision boundary points using the following code. Now, the problem can be solved by computing numerically the distance.
for item in a.collections:
for i in item.get_paths():
v = i.vertices
x = v[:, 0]
y = v[:, 1]
I have an obvious solution. Getting all data points d and compute the euclidean distance between d and e=(x,y). But, it is a brute-force technique.. :D I will continue my research !
Another solution would be to fit an ellipse and compute the distance using the formula described by #epiliam there : https://math.stackexchange.com/questions/3670465/calculate-distance-from-point-to-ellipse-edge
I will provide one solution tomorrow based on the brute-force. It seems to work well for small dataset (n_rows < 10000). I did not test for larger ones.

How reduce the scale of a scatter plot with row coordinates to merge it with a circle of correlations to make a bibplot?

I have a dataset composed of data with the same unit of measurement. Before making my pca, I centered my data using sklearn.preprocessing.StandardScaler(with_std=False).
I don't understand why but using the sklearn.decomposition.PCA.fit_transform(<my_dataframe>) method when I want to display a correlation circle I get two perfectly represented orthogonal variables, thus indicating that they are independent, but they are not. With a correlation matrix I observe perfectly that they are anti-correlated.
Through dint of research I came across the "prince" package which manages to get the perfect coordinates of my centered but unscaled variables.
When I do my pca with it, I can perfectly display the projection of my lines. It also has the advantage of being able to display ellipses. The only problem is that there is no function for a bibplot.
I managed to display a circle of correlations using the column_correlations() method to get the coordinates of the variables. By tinkering here is what I managed to get:
When I try to put my two graphs together to form a biplot, my scatter plot is displayed in a scale that is way too large compared to the correlation circle.
I would just like to merge the two charts together using this package.
Here is the code that allowed me to get the graph showing row principal coordinates:
Note: In order to propose a model to reproduce I use the iris dataset, resembling in form to my dataset.
import pandas as pd
import prince
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
dataset = pd.read_csv(url, names=names)
dataset = dataset.set_index('Class')
sc = StandardScaler(with_std=False)
dataset = pd.DataFrame(sc.fit_transform(dataset),
index=dataset.index,
columns=dataset.columns)
prince_pca = prince.PCA(n_components=2,
n_iter=3,
rescale_with_mean=True,
rescale_with_std=False,
copy=True,
check_input=True,
engine='auto',
random_state=42)
prince_pca = prince_pca.fit(dataset)
ax = prince_pca.plot_row_coordinates(dataset,
ax=None,
figsize=(10, 10),
x_component=0,
y_component=1,
labels=None,
color_labels=dataset.index,
ellipse_outline=True,
ellipse_fill=True,
show_points=True)
plt.show()
Here's the one I tinkered with to get my circle of correlations:
pcs = prince_pca.column_correlations(dataset)
pcs_0=pcs[0].to_numpy()
pcs_1=pcs[1].to_numpy()
pcs_coord = np.concatenate((pcs_0, pcs_1))
fig = plt.subplots(figsize=(10,10))
plt.xlim(-1,1)
plt.ylim(-1,1)
plt.quiver(np.zeros(pcs_0.shape[0]), np.zeros(pcs_1.shape[0]),
pcs_coord[:4], pcs_coord[4:], angles='xy', scale_units='xy', scale=1, color='r', width= 0.003)
for i, (x, y) in enumerate(zip(pcs_coord[:4], pcs_coord[4:])):
plt.text(x, y, pcs.index[i], fontsize=12)
circle = plt.Circle((0,0), 1, facecolor='none', edgecolor='b')
plt.gca().add_artist(circle)
plt.plot([-1,1],[0,0],color='silver',linestyle='--',linewidth=1)
plt.plot([0,0],[-1,1],color='silver',linestyle='--',linewidth=1)
plt.title("Correlation circle of variable", fontsize=22)
plt.xlabel('F{} ({}%)'.format(1, round(100*prince_pca.explained_inertia_[0],1)),
fontsize=14)
plt.ylabel('F{} ({}%)'.format(2, round(100*prince_pca.explained_inertia_[1],1)),
fontsize=14)
plt.show()
And finally here is the one that tries to bring together the circle of correlations as well as the main row coordinates graph from the "prince" package:
pcs = prince_pca.column_correlations(dataset)
pcs_0 = pcs[0].to_numpy()
pcs_1 = pcs[1].to_numpy()
pcs_coord = np.concatenate((pcs_0, pcs_1))
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, aspect="equal")
plt.xlim(-1, 1)
plt.ylim(-1, 1)
plt.quiver(np.zeros(pcs_0.shape[0]),
np.zeros(pcs_1.shape[0]),
pcs_coord[:4],
pcs_coord[4:],
angles='xy',
scale_units='xy',
scale=1,
color='r',
width=0.003)
for i, (x, y) in enumerate(zip(pcs_coord[:4], pcs_coord[4:])):
plt.text(x, y, pcs.index[i], fontsize=12)
plt.scatter(
x=prince_pca.row_coordinates(dataset)[0],
y=prince_pca.row_coordinates(dataset)[1])
circle = plt.Circle((0, 0), 1, facecolor='none', edgecolor='b')
plt.gca().add_artist(circle)
plt.plot([-1, 1], [0, 0], color='silver', linestyle='--', linewidth=1)
plt.plot([0, 0], [-1, 1], color='silver', linestyle='--', linewidth=1)
plt.title("Correlation circle of variable", fontsize=22)
plt.xlabel('F{} ({}%)'.format(1,
round(100 * prince_pca.explained_inertia_[0],
1)),
fontsize=14)
plt.ylabel('F{} ({}%)'.format(2,
round(100 * prince_pca.explained_inertia_[1],
1)),
fontsize=14)
plt.show()
Bonus question: how to explain that the PCA class of sklearn does not calculate the correct coordinates for my variables when they are centered but not scaled? Any method to overcome this?
Here is the circle of correlations obtained by creating the pca object with sklearn where the "length" and "margin_low" variables appear as orthogonal:
Here is the correlation matrix demonstrating the negative correlation between the "length" and "margin_low" variables:
I managed to mix the two graphs.
Here is the code to display the graph combining the circle of correlations and the scatter with the rows:
import pandas as pd
import prince
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
import numpy as np
# Import dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
# Preparing the dataset
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']
dataset = pd.read_csv(url, names=names)
dataset = dataset.set_index('Class')
# Preprocessing: centered but not scaled
sc = StandardScaler(with_std=False)
dataset = pd.DataFrame(sc.fit_transform(dataset),
index=dataset.index,
columns=dataset.columns)
# PCA setting
prince_pca = prince.PCA(n_components=2,
n_iter=3,
rescale_with_mean=True,
rescale_with_std=False,
copy=True,
check_input=True,
engine='auto',
random_state=42)
# PCA fiting
prince_pca = prince_pca.fit(dataset)
# Component coordinates
pcs = prince_pca.column_correlations(dataset)
# Row coordinates
pca_row_coord = prince_pca.row_coordinates(dataset).to_numpy()
# Preparing the colors for parameter 'c'
colors = dataset.T
# Display row coordinates
ax = prince_pca.plot_row_coordinates(dataset,
figsize=(12, 12),
x_component=0,
y_component=1,
labels=None,
color_labels=dataset.index,
ellipse_outline=True,
ellipse_fill=True,
show_points=True)
# We plot the vectors
plt.quiver(np.zeros(pcs.to_numpy().shape[0]),
np.zeros(pcs.to_numpy().shape[0]),
pcs[0],
pcs[1],
angles='xy',
scale_units='xy',
scale=1,
color='r',
width=0.003)
# Display the names of the variables
for i, (x, y) in enumerate(zip(pcs[0], pcs[1])):
if x >= xmin and x <= xmax and y >= ymin and y <= ymax:
plt.text(x,
y,
prince_pca.column_correlations(dataset).index[i],
fontsize=16,
ha="center",
va="bottom",
color="red")
# Display a circle
circle = plt.Circle((0, 0),
1,
facecolor='none',
edgecolor='orange',
linewidth=1)
plt.gca().add_artist(circle)
# Title
plt.title("Row principal coordinates and circle of correlations", fontsize=22)
# Display the percentage of inertia on each axis
plt.xlabel('F{} ({}%)'.format(1,
round(100 * prince_pca.explained_inertia_[0],
1)),
fontsize=14)
plt.ylabel('F{} ({}%)'.format(2,
round(100 * prince_pca.explained_inertia_[1],
1)),
fontsize=14)
# Display the grid to better read the values ​​of the circle of correlations
plt.grid(visible=True)
plt.show()

Wrong spatial information after resampling 2D spatial data (different origin and resolution) with pyresample in Python

I need to resample regularly gridded (in lon-lat) data to a new grid with lower resolution and different origin. I though I'd use pyresample.
Problem: I get an obviously wrong spatial location of my results after resampling.
In the following example, I construct a simple 2D array with some spatial grid (defined in sourcegrid which is a pyresample AreaDefinition object) and some mask, to resample it to another targetgrid. The spatial information is lost somewhere in the process, I can't figure out where... any idea?
import numpy as np
from pyresample.geometry import AreaDefinition
from pyresample.kd_tree import resample_nearest
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
# Source data
lonmin = -10; lonmax = 10.; latmin=40.; latmax=60.; nlon = 300; nlat = 250
lon = np.linspace(lonmin, lonmax, nlon); lat = np.linspace(latmin, latmax, nlat)
dlon = lon[1] - lon[0]; dlat = lat[1] - lat[0]
lon2d, lat2d = np.meshgrid(lon, lat)
sourcedata = np.cos(np.deg2rad(lat2d)*100) + np.sin(np.deg2rad(lon2d)*100)
# Introduce a polygon as mask
xpol = [frac*(nlon-1) for frac in (0, 0.5, 0.4, 0.6, 0.9, 0., 0)]
ypol = [frac*(nlat-1) for frac in (0, 0.4, 0.6, 0.5, 1., 1., 0)]
polygon = [xy for xy in zip(xpol, ypol)]
img = Image.new('L', (nlon, nlat), 0)
ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
mask = np.array(img)
xpol = [lon[int(x)] for x in xpol]; ypol = [lat[int(y)] for y in ypol] # translate in lon-lat for plot
sourcedata = np.ma.masked_where(mask, sourcedata)
# Define source and target areas
sourceextent = [lonmin-dlon/2, latmin-dlat/2, lonmax+dlon/2, latmax+dlat/2] # [xmin, ymin, xmax, ymax]
sourceextentforplot = [sourceextent[i] for i in (0,2,1,3)] # [xmin, xmax, ymin, ymax]
targetextent = [lonmin-dlon/2 + 0.12*(lonmax-lonmin), latmin-dlat/2 + 0.24*(latmax-latmin),
lonmin-dlon/2 + 0.78*(lonmax-lonmin), latmin-dlat/2 + 0.91*(latmax-latmin)]
targetextentforplot = [targetextent[i] for i in (0,2,1,3)]
sourcegrid = AreaDefinition(area_id='Grd1', description='Source Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=nlon, height=nlat, area_extent=sourceextent)
# Lower resolution, different origin
targetgrid = AreaDefinition(area_id='Grd2', description='Target Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=123, height=97, area_extent=targetextent)
# Resample sourcedata to newdata
newdata = resample_nearest(sourcegrid, sourcedata, targetgrid, fill_value=None, radius_of_influence=50000)
# Plot
def doplt(ax, data, extent):
ax.coastlines(resolution='50m', color='gray', alpha=1., linewidth=2.)
ax.gridlines(draw_labels=True)
ax.imshow(data, origin='lower', transform=ccrs.PlateCarree(), extent=extent)
ax.plot(xpol, ypol, 'k--', transform=ccrs.PlateCarree())
ax.plot([targetextentforplot[x] for x in (0, 1, 1, 0, 0)], [targetextentforplot[y] for y in (2, 2, 3, 3, 2)],
'r--', lw=3, transform=ccrs.PlateCarree())
ax.set_extent([-12, 12, 38, 62])
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5,10), subplot_kw={'projection': ccrs.PlateCarree()})
doplt(ax1, sourcedata, extent=sourceextentforplot)
ax1.set_title('Source data, target area in red')
doplt(ax2, newdata, extent=targetextentforplot)
ax2.set_title('New data, with wrong spatial ref (or plotting?)')
plt.show()
Note: other suggestions to do the resampling operation than pyresample, ideally with example, are welcome.
So the problem is that you're assuming row 0 is the bottom of the image, but as shown in this example, pyresample uses row 0 as the top. I modified your example to tweak the polygon latitudes as well as using origin='upper' to plot with imshow:
import numpy as np
from pyresample.geometry import AreaDefinition
from pyresample.kd_tree import resample_nearest
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
# Source data
lonmin = -10; lonmax = 10.; latmin=40.; latmax=60.; nlon = 300; nlat = 250
lon = np.linspace(lonmin, lonmax, nlon); lat = np.linspace(latmin, latmax, nlat)
dlon = lon[1] - lon[0]; dlat = lat[1] - lat[0]
lon2d, lat2d = np.meshgrid(lon, lat)
sourcedata = np.hypot(lon2d, lat2d - 50) * (np.cos(np.deg2rad(lat2d)*100) + np.sin(np.deg2rad(lon2d)*100))
# Introduce a polygon as mask
xpol = [frac*(nlon-1) for frac in (0, 0.5, 0.4, 0.6, 0.9, 0., 0)]
ypol = [frac*(nlat-1) for frac in (0, 0.4, 0.6, 0.5, 1., 1., 0)]
polygon = [xy for xy in zip(xpol, ypol)]
img = Image.new('L', (nlon, nlat), 0)
ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
mask = np.array(img)
xpol = [lon[int(x)] for x in xpol]; ypol = [lat[nlat - 1 - int(y)] for y in ypol] # translate in lon-lat for plot
sourcedata = np.ma.masked_where(mask, sourcedata)
# Define source and target areas
sourceextent = [lonmin-dlon/2, latmin-dlat/2, lonmax+dlon/2, latmax+dlat/2] # [xmin, ymin, xmax, ymax]
sourceextentforplot = [sourceextent[i] for i in (0,2,1,3)] # [xmin, xmax, ymin, ymax]
targetextent = [lonmin-dlon/2 + 0.12*(lonmax-lonmin), latmin-dlat/2 + 0.24*(latmax-latmin),
lonmin-dlon/2 + 0.78*(lonmax-lonmin), latmin-dlat/2 + 0.91*(latmax-latmin)]
targetextentforplot = [targetextent[i] for i in (0,2,1,3)]
sourcegrid = AreaDefinition(area_id='Grd1', description='Source Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=nlon, height=nlat, area_extent=sourceextent)
# Lower resolution, different origin
targetgrid = AreaDefinition(area_id='Grd2', description='Target Grid', proj_id='proj_id_blabla',
projection='EPSG:4326', width=123, height=97, area_extent=targetextent)
# Resample sourcedata to newdata
newdata = resample_nearest(sourcegrid, sourcedata, targetgrid, fill_value=None, radius_of_influence=50000)
# Plot
def doplt(ax, data, extent):
ax.coastlines(resolution='50m', color='gray', alpha=1., linewidth=2.)
ax.gridlines(draw_labels=True)
ax.imshow(data, transform=ccrs.PlateCarree(), extent=extent, norm=plt.Normalize(0, 20), origin='upper')
ax.plot(xpol, ypol, 'k--', transform=ccrs.PlateCarree())
ax.plot([targetextentforplot[x] for x in (0, 1, 1, 0, 0)], [targetextentforplot[y] for y in (2, 2, 3, 3, 2)],
'r--', lw=3, transform=ccrs.PlateCarree())
ax.set_extent([-12, 12, 38, 62])
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(5,10), subplot_kw={'projection': ccrs.PlateCarree()})
doplt(ax1, sourcedata, extent=sourceextentforplot)
ax1.set_title('Source data, target area in red')
doplt(ax2, newdata, extent=targetextentforplot)
ax2.set_title('New data, with wrong spatial ref (or plotting?)');
That gives:
I found it helpful to use an image with more variation to be able to line it up better with the source data.
I found out that using a SwathDefinition instead of an AreaDefinition (see doc) solved the problem.
Defining sourcegrid and targetgrid as follows in the original code gives good results:
sourcegrid = SwathDefinition(lons=lon2d, lats=lat2d)
lon2dtarget, lat2dtarget = np.meshgrid(np.linspace(targetextent[0], targetextent[2], 123),
np.linspace(targetextent[1], targetextent[3], 97))
targetgrid = SwathDefinition(lons=lon2dtarget, lats=lat2dtarget)

How to plot values on a Python Basemap?

Is it possible to plot values on a basemap?
Let's say I have 3 lists of data.
lat = [50.3, 62.1, 41.4, ...]
lon = [12.4, 14.3, 3.5, ...]
val = [3, 5.4, 7.4, ...]
I've created a simple basemap:
def create_map(ax=None, lllon=6.00, lllat=47.0, urlon=16.00, urlat=55.10):
m = Basemap(llcrnrlon=lllon, llcrnrlat=lllat, \
urcrnrlon=urlon, urcrnrlat=urlat, \
resolution='h', \
projection='tmerc', \
lon_0=(lllon+urlon)/2, lat_0=(lllat+urlat)/2)
m.drawcoastlines()
m.drawcountries()
m.drawrivers()
return m
Now I want to plot the values of the "val" list on this map depending of their coordinates:
m = create_map()
x, y = m(lon,lat)
m.scatter(x, y, val) # somthing like that
plt.show()
Well, i already figured out that basemap is unable to plot 3d values, but is there a way to realize it?
The short, sweet, and simple answer to your first question is yes, you can plot using basemap (here's the documentation for it).
If you're looking to plot in 3d, there is documentation that explains how to plot using Basemap. Here's a simple script to get you started:
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
plt.close('all')
fig = plt.figure()
ax = fig.gca(projection='3d')
extent = [-127, -65, 25, 51]
# make the map and axis.
m = Basemap(llcrnrlon=extent[0], llcrnrlat=extent[2],
urcrnrlon=extent[1], urcrnrlat=extent[3],
projection='cyl', resolution='l', fix_aspect=False, ax=ax)
ax.add_collection3d(m.drawcoastlines(linewidth=0.25))
ax.add_collection3d(m.drawcountries(linewidth=0.25))
ax.add_collection3d(m.drawstates(linewidth=0.25))
ax.view_init(azim = 230, elev = 15)
ax.set_xlabel(u'Longitude (°E)', labelpad=10)
ax.set_ylabel(u'Latitude (°N)', labelpad=10)
ax.set_zlabel(u'Altitude (ft)', labelpad=20)
# values to plot - change as needed. Plots 2 dots, one at elevation 0 and another 100.
# also draws a line between the two.
x, y = m(-85.4808, 32.6099)
ax.plot3D([x, x], [y, y], [0, 100], color = 'green', lw = 0.5)
ax.scatter3D(x, y, 100, s = 5, c = 'k', zorder = 4)
ax.scatter3D(x, y, 0, s = 2, c = 'k', zorder = 4)
ax.set_zlim(0., 400.)
plt.show()

Resources