I have two geometries with the same CRS (4326), but they have in are completely different X-Y axis formats. They should be overlapping. I am having issues with this geometry below. It says its 4326 but its the X/Y isn't in that projection.
import osmnx as ox
import networkx as nx
from shapely.geometry import Point, LineString, Polygon
import geopandas as gpd
from descartes import PolygonPatch
configure the place, network type, trip times, and travel speed
place = 'Stockholm, Sweden'
network_type = 'drive'
trip_times = [15] #in minutes
travel_speed = 4.5 #walking speed in km/hour
G_4326 = ox.graph_from_place(place, network_type=network_type)
get nearest node
urban_intervention = ox.distance.get_nearest_node(G_4326, (59.33039855957031, 18.022981643676758))
G_4326 = ox.project_graph(G_4326)
add an edge attribute for time in minutes required to traverse each edge
meters_per_minute = travel_speed * 1000 / 60 #km per hour to m per minute
for u, v, k, data in G_4326.edges(data=True, keys=True):
data['time'] = data['length'] / meters_per_minute
isochrone_polys = []
for trip_time in sorted(trip_times, reverse=True):
subgraph = nx.ego_graph(G_4326, urban_intervention, radius=trip_time, distance='time')
node_points = [Point((data['x'], data['y'])) for node, data in subgraph.nodes(data=True)]
bounding_poly = gpd.GeoSeries(node_points).unary_union.convex_hull
isochrone_polys.append(bounding_poly)
convert to geopandas
treatment_radius = isochrone_polys[0]
treatment_radius_gdf = gpd.GeoDataFrame(index=[0], crs='epsg:4326', geometry=[treatment_radius])
print(treatment_radius_gdf.crs)
print(treatment_radius_gdf)
result:
Any help would be very welcome!
You have a line of code that is projecting from EPSG:4326 to a UTM CRS. Additionally you are using very poor variable naming. you have called a variable G_4326 when it is not after projection!
This is clearly documented: https://osmnx.readthedocs.io/en/stable/osmnx.html#osmnx.projection.project_graph
# this line changes geometry from epsg:4326 to UTM CRS
G_4326 = ox.project_graph(G_4326)
This makes sense given you are calculating in meters. However that means that the generated geometry CRS is not EPSG:4326. Set the CRS correctly, then you can project back to EPSG:4326. Clearly this means G_4326 is badly named, have not addressed that.
output
+proj=utm +zone=33 +ellps=WGS84 +datum=WGS84 +units=m +no_defs +type=crs
geometry
0 POLYGON ((676968.606 6569863.360, 676868.881 6...
epsg:4326
geometry
0 POLYGON ((18.10176 59.23074, 18.10003 59.23086...
full code
import osmnx as ox
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point
place = "Stockholm, Sweden"
network_type = "drive"
trip_times = [15] # in minutes
travel_speed = 4.5 # walking speed in km/hour
G_4326 = ox.graph_from_place(place, network_type=network_type)
gdf_nodes1, gdf_edges1 = ox.graph_to_gdfs(G_4326)
# this line changes geometry from epsg:4326 to UTM CRS
G_4326 = ox.project_graph(G_4326)
gdf_nodes, gdf_edges = ox.graph_to_gdfs(G_4326)
m = gdf_edges.explore()
urban_intervention = ox.distance.get_nearest_node(
G_4326, (59.33039855957031, 18.022981643676758)
)
meters_per_minute = travel_speed * 1000 / 60 # km per hour to m per minute
for u, v, k, data in G_4326.edges(data=True, keys=True):
data["time"] = data["length"] / meters_per_minute
isochrone_polys = []
for trip_time in sorted(trip_times, reverse=True):
subgraph = nx.ego_graph(
G_4326, urban_intervention, radius=trip_time, distance="time"
)
node_points = [
Point((data["x"], data["y"])) for node, data in subgraph.nodes(data=True)
]
bounding_poly = gpd.GeoSeries(node_points).unary_union.convex_hull
isochrone_polys.append(bounding_poly)
treatment_radius = isochrone_polys[0]
treatment_radius_gdf = gpd.GeoDataFrame(index=[0], crs=gdf_edges.crs, geometry=[treatment_radius])
print(treatment_radius_gdf.crs)
print(treatment_radius_gdf)
print(treatment_radius_gdf.to_crs("epsg:4326").crs)
print(treatment_radius_gdf.to_crs("epsg:4326"))
treatment_radius_gdf.explore(m=m, color="red")
Related
I'm using plotly's Scattermapbox to overlay a map with a shaded image of polygons created by datashader's shade function (based on https://plotly.com/python/datashader/), but the projections do not seem to align, see picture below. Any suggestions how I can overcome this problem using plotly's Scattermapbox and datashader?
Reproducible example:
import geopandas as gpd
import plotly.graph_objects as go
import spatialpandas as spd
import datashader as ds
from colorcet import fire
import datashader.transfer_functions as tf
# load data
world = gpd.read_file(
gpd.datasets.get_path('naturalearth_lowres')
)
# world = world.to_crs(epsg=3857)
# create spatialpandas DataFrame
df_world = spd.GeoDataFrame(world)
# create datashader canvas and aggregate
cvs = ds.Canvas(plot_width=1000, plot_height=1000)
agg = cvs.polygons(df_world, geometry='geometry', agg=ds.mean('pop_est'))
# create shaded image
tf.shade(agg, cmap=fire)
shaded image
# create shaded image and convert to Python image
img = tf.shade(agg, cmap=fire)[::-1].to_pil()
coords_lat, coords_lon = agg.coords["y"].values, agg.coords["x"].values
# Corners of the image, which need to be passed to mapbox
coordinates = [
[coords_lon[0], coords_lat[0]],
[coords_lon[-1], coords_lat[0]],
[coords_lon[-1], coords_lat[-1]],
[coords_lon[0], coords_lat[-1]],
]
fig = go.Figure(go.Scattermapbox())
fig.update_layout(
mapbox_style="open-street-map",
mapbox_layers=[
{
"sourcetype": "image",
"source": img,
"coordinates": coordinates,
}
]
)
fig.show()
overlayed map
I read that Scattermapbox only supports Mercator projection which I found confusing as the examples in plotly's documentation seem to be in long/lat format, but I tried converting the coordinates of the GeoDataFrame to epsg 3857, see
# world = world.to_crs(epsg=3857)
The results is that the shaded image becomes invisible. Any help would be highly appreciated.
Have you tried with epsg:4326? In my case, I use this one and the geometries are placed correctly.
On the other hand, with geopandas to convert the geometry column of the dataframe you have to use the parameter "inplace=True".
We have discovered the solution to this issue: Below is each step / function code and description:
Imports for reference :
import datashader as ds
import datashader.transfer_functions as tf
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import rasterio
import shapely.geometry
import xarray as xr
_helper_add_pseudomercator_optimized : Creates array from the meshgrid with the proper mercator coordinates from the original raster with epsg:4326.
def _helper_add_pseudomercator_optimized(raster):
"""Adds mercator coordinates epsg:3857 from a raster with epsg:4326.
Originally defined as `add_psuedomercator_adam_manuel_optimized`
Args:
raster: xr.DataArray: `xr.DataArray` to transform coordinates
Returns:
`xr.DataArray` with coordinates (x, y) transformed from epsg:4326 to epsg:3857
"""
# Transformer that converts coordinates from epsg 4326 to 3857
gcs_to_3857 = Transformer.from_crs(4326, 3857, always_xy=True)
x_vals = list(raster.x.values.squeeze()) # x values from the raster dimension x
y_vals = list(raster.y.values.squeeze()) # x values from the raster dimension x
# Allows transformation of non-square coordinates
y_dummy_vals = [raster.y.values[0] for v in raster.x.values] # dummy values
x_dummy_vals = [raster.x.values[0] for v in raster.y.values] # dummy values
x, _ = gcs_to_3857.transform(x_vals, y_dummy_vals) # Obtain x output here only
_, y = gcs_to_3857.transform(x_dummy_vals, y_vals) # Obtain y output here only\
# Create meshgrid with the x and y mercator converted coordinates
lon, lat = np.meshgrid(x, y)
# Add meshgrid to raster -> raster now has mercator coordinates for every point
raster["x_mercator"] = xr.DataArray(lon, dims=("y", "x"))
raster["y_mercator"] = xr.DataArray(lat, dims=("y", "x"))
return raster
def _helper_affine_transform(raster):
"""Create new affine from a raster. Used to get new affine from the transformed affine.
Args:
raster: xr.DataArray: `xr.DataArray` to get the original affine and then transform
Returns:
New affine transform for a coarsened array
"""
res = (raster.x[-1].values - raster.x[0].values) / raster.x.shape[0]
scale = Affine.scale(res, -res)
transform = (
Affine.translation(raster.x[0].values - res / 2, raster.y[0].values - res / 2)
* scale
)
return transform
def _helper_to_datashader_quadmesh(raster, y="lat", x="lon"):
"""Create lower level quadmesh with data based on flood raster. Map Flooding
to lower level map.
Args:
raster: xr.DataArray: `xr.DataArray` raster of flooded regions
Returns:
`datashader.Canvas` based on quadmesh from original flood raster
"""
cvs = ds.Canvas(plot_height=5000, plot_width=5000)
z = xr.DataArray(
raster.values.squeeze(),
dims=["y", "x"],
coords={
"Qy": (["y", "x"], raster[y].values),
"Qx": (["y", "x"], raster[x].values),
},
name="z",
)
return cvs.quadmesh(z, x="Qx", y="Qy")
def _helper_img_coordinates(raster):
"""Get coordinates of the corners of the baseline raster.
Args:
raster: xr.DataArray: `xr.DataArray` to get corner coordinates from
Returns:
coordinates of where to plot the flooded raster on the map
"""
coords_lat, coords_lon = (raster.y.values, raster.x.values)
if len(coords_lat.shape) > 1:
coords_lat = coords_lat[:, 0]
coords_lon = coords_lon[0, :]
coordinates = [
[coords_lon[0], coords_lat[0]],
[coords_lon[-1], coords_lat[0]],
[coords_lon[-1], coords_lat[-1]],
[coords_lon[0], coords_lat[-1]],
]
return coordinates
All operations together for the below sequence :
# Add mercator coordinates to the raster
raster = _helper_add_pseudomercator_optimized(raster)
# Create quadmesh from the burned raster
agg_mesh = _helper_to_datashader_quadmesh(raster, x="x_mercator", y="y_mercator")
# Don't plot values where the flooding is zero
agg_mesh = agg_mesh.where(agg_mesh < 0)
# Convert to datashader shade
im = tf.shade(agg_mesh, Theme.color_scale)
# Convert to image
img = im.to_pil()
# Get coordinates to plot raster on map
coordinates = _helper_img_coordinates(baseline_raster)
Then this image produced by datashader can be added to a plotly plot using the plotly objects layer, and providing this layer to the figure
layer = go.layout.mapbox.Layer(
below="water",
coordinates=coordinates,
sourcetype="image",
source=img,
)
I have a graph (A) built from unweighted edges, and I would like to compute the average shortest path length for the biggest connected graph (giantC) in my main graph (A). However, the script has been running for more than 3 hours so far (tried on Colab and locally), and no results are output neither for diameter nor for average_shortest_path_length.
I am using networkx==2.5, python==3.6.9
and here is my script
import logging
import networkx as nx
from networkx.algorithms.distance_measures import diameter
from networkx.algorithms.shortest_paths.generic import average_shortest_path_length
# graph is built from a json file as follows
with open('graph.json') as f:
graph_dict = json.load(f)
_indices = graph_dict['indices']
s_lst, rs_lst= _indices[0], _indices[1]
graph_ = nx.Graph()
for i in range(len(s_lst)):
graph_.add_edge(s_lst[i], rs_lst[i])
# fetch the hugest graph of all graphs
connected_subgraphs = [graph_.subgraph(cc) for cc in
nx.connected_components(graph_)]
logging.info('connected subgraphs fetched.')
Gcc = max(nx.connected_components(graph_), key=len)
giantC = graph_.subgraph(Gcc)
logging.info('Fetched Giant Subgraph')
n_nodes = giantC.number_of_nodes()
print(f'Number of nodes: {n_nodes}') # output is 106088
avg_shortest_path = average_shortest_path_length(giantC)
print(f'Avg Shortest path len: {avg_shortest_path}')
dia = diameter(giantC)
print(f'Diameter: {dia}')
Is there any way to make it faster? or an alternative to computing both the diameter and shortest path length for the giantC graph?
For future readers. In NetworkX >= 2.6 is available a diameter approximated metric for both directed and undirected graphs.
Example:
>>> import timeit
>>> timeit.timeit("print(nx.diameter(g))",setup="import networkx as nx; g = nx.fast_gnp_random_graph(5000, 0.03, 100)", number=1)
3
224.9891120430002
>>> timeit.timeit("print(nx.approximation.diameter(g))",setup="import networkx as nx; g = nx.fast_gnp_random_graph(5000, 0.03, 100)", number=1)
3
0.09284040399961668
Note that the approximated metric will compute a lower bound with the respect to the exact value.
For future readers,
if you want to fetch the largest connected subgraph from your NetworkX Graph
import networkx as nx
import logging
def fetch_hugest_subgraph(graph_):
Gcc = max(nx.connected_components(graph_), key=len)
giantC = graph_.subgraph(Gcc)
logging.info('Fetched Giant Subgraph')
return giantC
If you want to compute the average shortest path length for your graph we can do that by sampling
from statistics import mean
import networkx as nx
import random
def write_nodes_number_and_shortest_paths(graph_, n_samples=10_000,
output_path='graph_info_output.txt'):
with open(output_path, encoding='utf-8', mode='w+') as f:
for component in nx.connected_components(graph_):
component_ = graph_.subgraph(component)
nodes = component_.nodes()
lengths = []
for _ in range(n_samples):
n1, n2 = random.choices(list(nodes), k=2)
length = nx.shortest_path_length(component_, source=n1, target=n2)
lengths.append(length)
f.write(f'Nodes num: {len(nodes)}, shortest path mean: {mean(lengths)} \n')
Computing avg_shortest_path_length as I was informed by Joris Kinable (in the comments) has the complexity of O(V^3); V = number of nodes. The same applies for computing the diameter of your graph.
I am trying to plot some wind vectors on maps using cartopy. However, the winds look strange so I did a simple tests using wind vectors at a ring of points at 75N. The wind vectors should points 45 degree away from both lat/lon grids because u-winds and v-winds are of the same magnitude under the stereographic projection, which should preserve the angle.
from matplotlib import pyplot as plt
import numpy as np
import cartopy.crs as ccrs
pcproj = ccrs.PlateCarree()
lon0 = -150
mapproj = ccrs.Stereographic(
central_longitude=lon0,central_latitude=75,
true_scale_latitude=75,
)
XLIM = 300e3; YLIM=300e3
dm =5; dp=1
fig = plt.figure(0,(7,7))
ax = fig.add_axes([0.1,0.1,0.85,0.9],projection=mapproj)
ax.set_extent([-XLIM,XLIM,-YLIM,YLIM],crs=mapproj)
ax.coastlines(resolution='50m',color='.5',linewidth=1.5)
lon_grid = np.arange(-180,181,dm)
lat_grid = np.arange(-80,86,dp)
gl = ax.gridlines(draw_labels=True,
xlocs=lon_grid,ylocs=lat_grid,
x_inline=False,y_inline=False,
color='k',linestyle='dotted')
# --- draw 45 degree winds at 75N
lon = np.linspace(0,360,73)
lat = np.ones(len(lon))*75
uu = np.ones(len(lon))*10
vv = uu*1.
pts = mapproj.transform_points(pcproj,lon,lat)
xx = pts[...,0]; yy = pts[...,1]
ux,vx = mapproj.transform_vectors(pcproj,lon,lat,uu,vv)
ax.quiver(xx,yy,ux,vx,transform=mapproj)
It is clear that the wind vectors does not point 45 degrees from lat/lon grids.
I check the ux, vx at 150W and 75N, the center of the projection. The wind here should be the same as in lat/lon grid but it is (3.54,13.7) rather than (10,10).
The wind vectors looks the same as using this line
ax.quiver(lon,lat,uu,vv,transform=pcproj)
This may not be a surprise because I think quiver uses transform_vector and transform_points under the hood.
I tried winds along lat/lon grid directions, they are transformed correctly.
Is this a bug in transform_vectors or am I using it incorrectly?
UPDATE:
As suggested by #swatchai, right now, u-wind should be divided by cos(lat) before being fed in to transform_vectors. I doubt this is the intended behavior but this normalization should be used until a future cartopy update changed the behavior. They may choose just to update the docstring. Still, something to be caution about in future cartopy updates.
When (lat, long) in degrees are used, a small angular distance ds on spherical surface has 2 components:
`R*dlat` in NS direction
`R*dlon*cos(lat)` in EW direction
where R is the earth radius,
the second component varies with latitude of the location considered,
with this in mind, the computation of ux, vx at latitude 75 degrees should be
rho = np.pi/180.
ux,vx = mapproj.transform_vectors(pcproj,lon,lat, uu/np.cos(75*rho), vv)
I would like to create an accurate buffer of 5 miles around a coordinate, my current code is:
cpr_gdf['p_buffer']=cpr_gdf['coordinates'].buffer(5*(1/60))
The coordinates column was created with this code:
cpr_df['coordinates']=list(zip(cpr_df.sample_longitude_decimal,cpr_df.sample_latitude_decimal))
cpr_df['coordinates']=cpr_df['coordinates'].apply(Point)
cpr_gdf=gpd.GeoDataFrame(cpr_df,geometry='coordinates',crs={'init' :'epsg:4326'})
Thanks for any help!
You need to convert to an equal area projection that is most accurate to where your buffer will be (good resource at https://epsg.io/)
For example, I'm making maps in Michigan, so I'm using EPSG:3174 (which I believe is in meters, correct me if wrong). Given you've already converted your dataframe to a GeoPandas dataframe, you can convert your current projection to 3174 and then create your buffer (converting miles to meters)
cpr_gdf= cpr_gdf.to_crs({'init': 'epsg:3174'})
buffer_length_in_meters = (5 * 1000) * 1.60934
cpr_gdf['geometry'] = cpr_gdf.geometry.buffer(buffer_length_in_meters)
You can calculate buffer over points without converting to any other CRS using the function bellow. But it calculates in meters, so if you want to use miles just multiply distance on 1609.34
Here is an example
from geographiclib.geodesic import Geodesic
import numpy as np
from shapely.geometry import Polygon
import pandas as pd
import geopandas as gpd
def geod_buffer(gdf, distance, resolution=16, geod = Geodesic.WGS84):
"""
gdf - GeoDataFrame with geometry column
distance - The radius of the buffer in meters
resolution – The resolution of the buffer around each vertex
geod - Define an ellipsoid
"""
buffer = list()
for index, row in gdf.iterrows():
lon1, lat1 = row['geometry'].x, row['geometry'].y
buffer_ = list()
for azi1 in np.arange(0, 360, 90/resolution):
properties = geod.Direct(lat1, lon1, azi1, distance)
buffer_.append([properties['lon2'], properties['lat2']])
buffer.append(Polygon(buffer_))
return buffer
locations = pd.DataFrame([
{
'longitude': 54.604972,
'latitude': 18.346815},
{
'longitude': 54.605917,
'latitude': 18.347249}
])
locations_gpd = gpd.GeoDataFrame(locations,
geometry=gpd.points_from_xy(locations.longitude, locations.latitude),
crs='epsg:4326').drop(columns=['longitude', 'latitude'])
locations_gpd['geometry'] = geod_buffer(locations_gpd, 1000)
At the equator, one minute of latitude or longitude is ~ 1.84 km or 1.15 mi (ref).
So if you define your point as P = [y, x] then you can create a buffer around it of lets say 4 minutes which are approximately 5 miles: buffer = 0.04. The bounding box then is easily obtained with
minlat = P[0]-(P[0]*buffer)
maxlat = P[0]+(P[0]*buffer)
minlon = P[1]-(P[1]*buffer)
maxlon = P[1]+(P[1]*buffer)
I've just recently started using basemap, and i've encountered a problem.
When i run :
import numpy as np
import matplotlib.pyplot as plt
import netCDF4 as nc
import glob
from mpl_toolkits.basemap import Basemap
import warnings #note that I did not include all of my code, so some imports may be unnecessary.
%matplotlib inline
datapath = "/somedirectory/www.ncei.noaa.gov/data/avhrr-land-leaf-area-index-and-fapar/access/2001" #I've downloaded the data from here as well, I've averaged over every month.
datafiles = glob.glob(datapath+"/"+"avg_LAI_fAPAR*.nc")
data = [None]*len(datafiles)
month = [None]*len(datafiles)
lats = [None] * len(datafiles)
lons = [None] * len(datafiles)
fAPAR =[None] * len(datafiles)
LAI =[None] * len(datafiles)
for number, file in enumerate(datafiles):
month[number] = file[-4:-2]
data[number] = nc.Dataset(file,format = 'NETCDF4')
lats[number] = data[number]["latitude"]
lons[number] = data[number]["longitude"]
fAPAR[number] = data[number]["FAPAR"]
LAI[number] = data[number]["LAI"]
m = Basemap(width=5000000,height=3500000,
resolution='l',\
lat_ts=40)
for k in range(1): #only do one loop, it takes a long time to run on my machine. Idea is that is should be able to loop through all months in 2001
print(k)
plt.figure()#figsize=(20,10))
lon, lat = np.meshgrid(lons[k], lats[k])
xi, yi = m(lon,lat)
cs = m.pcolor(xi,yi,np.squeeze(LAI[k][0])) #note that the first dimension [k] comes from the for-loop, the second [0] is the temporal part of the LAI (avereged out in a bash script).
m.drawcoastlines()
m.drawcountries()
cbar = m.colorbar(cs, location='bottom', pad="10%")
plt.title('LAI on {}'.format(month[k]))`
The plot comes out empty, so nothing plotted (only white space). The data is masked, but also if I unmasked the data (i.e. replace the masked data by a nan), the plot did not show anything. The np.nanmean(LAI[0]) is about 1, but the regular mean is about -90, as the fill value is -100.
The data seems to work with NCview in linux.
I work on python 3.6, with the latest packages installed. Does anyone know where the problem might be?
Thanks in advance!