import datetime as dt
import logging
import os
import warnings
from typing import Callable, Literal, Optional, Union
import geopandas as gpd
import numpy as np
import xarray as xr
from rioxarray.merge import merge_arrays
from .. import NLMOD_DATADIR, cache, dims, util
from ..util import tqdm
from .webservices import arcrest
logger = logging.getLogger(__name__)
[docs]
@cache.cache_pickle
def get_gdf_surface_water(ds=None, extent=None):
"""Read a shapefile with surface water as a geodataframe, cut by the extent of the
model.
Parameters
----------
ds : xr.DataSet, None, optional
dataset containing relevant model information
extent : list, tuple or np.array, optional
desired model extent (xmin, xmax, ymin, ymax)
Returns
-------
gdf_opp_water : GeoDataframe
surface water geodataframe.
"""
if ds is None and extent is None:
raise ValueError("At least one of 'ds' or 'extent' must be provided.")
# laad bestanden in
fname = os.path.join(NLMOD_DATADIR, "shapes", "opp_water.shp")
gdf_swater = gpd.read_file(fname)
if ds is not None:
extent = dims.get_extent(ds)
if (extent[0] < 93000) & (extent[2] < 480000):
logger.warning(
"This function does not yield good results for the North Sea, see https://github.com/gwmod/nlmod/issues/225"
)
gdf_swater = util.gdf_within_extent(gdf_swater, extent)
return gdf_swater
[docs]
@cache.cache_netcdf(coords_3d=True)
def get_surface_water(ds, gdf=None, da_basename="rws_oppwater"):
"""Create 3 data-arrays from the shapefile with surface water:
.. deprecated:: 0.10.0
`get_surface_water` will be removed in nlmod 1.0.0, it is replaced by
`discretize_surface_water` because of new naming convention
https://github.com/gwmod/nlmod/issues/47
- area: area of the shape in the cell
- cond: conductance based on the area and "bweerstand" column in shapefile
- stage: surface water level based on the "peil" column in the shapefile
Parameters
----------
ds : xr.DataSet
xarray with model data
gdf : gpd.GeoDataFrame or None, optional
geometries of the surface water, if None the geometries are obtained using
get_gdf_surface_water. The default is None.
da_basename : str
name of the polygon shapes, name is used as a prefix
to store data arrays in ds
Returns
-------
ds : xarray.Dataset
dataset with modelgrid data.
"""
warnings.warn(
"'get_surface_water' is deprecated and will be removed in a future version. "
"Use 'nlmod.read.rws.discretize_surface_water' to project the surface water on the model grid",
DeprecationWarning,
)
if gdf is None:
gdf = get_gdf_surface_water(ds)
return discretize_surface_water(ds, gdf, da_basename)
[docs]
@cache.cache_netcdf(coords_3d=True)
def discretize_surface_water(ds, gdf, da_basename="rws_oppwater"):
"""Create 3 data-arrays from the shapefile with surface water.
These data arrays are:
- area: area of the shape in the cell
- cond: conductance based on the area and "bweerstand" column in shapefile
- stage: surface water level based on the "peil" column in the shapefile
Parameters
----------
ds : xr.DataSet
xarray with model data
gdf : gpd.GeoDataFrame or None, optional
geometries of the surface water and the columns 'bweerstand' and 'peil'.
da_basename : str
name of the polygon shapes, name is used as a prefix
to store data arrays in ds
Returns
-------
ds : xarray.Dataset
dataset with modelgrid data.
"""
modelgrid = dims.modelgrid_from_ds(ds)
area = xr.zeros_like(ds["top"])
cond = xr.zeros_like(ds["top"])
peil = xr.zeros_like(ds["top"])
for _, row in gdf.iterrows():
area_pol = dims.polygon_to_area(
modelgrid,
row["geometry"],
xr.ones_like(ds["top"]),
ds.gridtype,
)
cond = xr.where(area_pol > area, area_pol / row["bweerstand"], cond)
peil = xr.where(area_pol > area, row["peil"], peil)
area = xr.where(area_pol > area, area_pol, area)
ds_out = util.get_ds_empty(ds, keep_coords=("y", "x"))
ds_out[f"{da_basename}_area"] = area
ds_out[f"{da_basename}_area"].attrs["units"] = "m2"
ds_out[f"{da_basename}_cond"] = cond
ds_out[f"{da_basename}_cond"].attrs["units"] = "m2/day"
ds_out[f"{da_basename}_stage"] = peil
ds_out[f"{da_basename}_stage"].attrs["units"] = "mNAP"
for datavar in ds_out:
ds_out[datavar].attrs["source"] = "RWS"
ds_out[datavar].attrs["date"] = dt.datetime.now().strftime("%Y%m%d")
return ds_out
[docs]
@cache.cache_netcdf(coords_2d=True)
def get_northsea(ds, gdf=None, da_name="northsea"):
"""Get Dataset which is 1 at the northsea and 0 everywhere else.
Sea is defined by rws surface water shapefile.
.. deprecated:: 0.10.0
`get_northsea` will be removed in nlmod 1.0.0, it is replaced by
`discretize_northsea` because of new naming convention
https://github.com/gwmod/nlmod/issues/47
Parameters
----------
ds : xr.DataSet, None, optional
dataset containing relevant model information
gdf : gpd.GeoDataFrame or None, optional
geometries of the surface water, if None the geometries are obtained using
get_gdf_surface_water. The default is None.
da_name : str, optional
name of the datavar that identifies sea cells
Returns
-------
ds_out : xr.DataSet
Dataset with a single DataArray, this DataArray is 1 at sea and 0
everywhere else. Grid dimensions according to ds.
"""
warnings.warn(
"'get_northsea' is deprecated and will be removed in a future version. "
"Use 'nlmod.read.rws.discretize_northsea' to project the northsea on the model grid",
DeprecationWarning,
)
return discretize_northsea(ds, gdf, da_name)
[docs]
@cache.cache_netcdf(coords_2d=True)
def discretize_northsea(ds, gdf=None, da_name="northsea"):
"""Get Dataset which is 1 at the northsea and 0 everywhere else.
Sea is defined by rws surface water shapefile.
Parameters
----------
ds : xr.DataSet, None, optional
dataset containing relevant model information
gdf : gpd.GeoDataFrame or None, optional
geometries of the surface water, if None the geometries are obtained using
get_gdf_surface_water. The default is None.
da_name : str, optional
name of the datavar that identifies sea cells
Returns
-------
ds_out : xr.DataSet
Dataset with a single DataArray, this DataArray is 1 at sea and 0
everywhere else. Grid dimensions according to ds.
"""
if gdf is None:
gdf = get_gdf_surface_water(ds=ds)
# find grid cells with sea
swater_zee = gdf[
gdf["OWMNAAM"].isin(
[
"Rijn territoriaal water",
"Waddenzee",
"Waddenzee vastelandskust",
"Hollandse kust (kustwater)",
"Waddenkust (kustwater)",
]
)
]
ds_out = dims.gdf_to_bool_ds(swater_zee, ds, da_name, keep_coords=("y", "x"))
return ds_out
[docs]
def calculate_sea_coverage(
dtm,
ds=None,
zmax=0.0,
xy_sea=None,
diagonal=False,
method="mode",
nodata=-1,
return_filled_dtm=False,
):
"""Determine where the sea is by interpreting the digital terrain model.
This method assumes the pixel defined in xy_sea (by default top-left) of the
DTM-DataArray is sea. It then determines the height of the sea that is required for
other pixels to become sea as well, taking into account the pixels in between.
Parameters
----------
dtm : xr.DataArray
The digital terrain data, which can be of higher resolution than ds, Nans are
filled by the minial value of dtm.
ds : xr.Dataset, optional
Dataset with model information. When ds is not None, the sea DataArray is
transformed to the model grid. The default is None.
zmax : float, optional
Locations thet become sea when the sea level reaches a level of zmax will get a
value of 1 in the resulting DataArray. The default is 0.0.
xy_sea : tuble of 2 floats
The x- and y-coordinate of a location within the dtm that is sea. From this
point, calculate_sea determines at what level each cell becomes wet. When
xy_cell is None, the most northwest grid cell is sea, which is appropriate for
the Netherlands. The default is None.
diagonal : bool, optional
When true, dtm-values are connected diagonally as well (to determine the level
the sea will reach). The default is False.
method : str, optional
The method used to scale the dtm to ds. The default is "mode" (mode means that
if more than half of the (not-nan) cells are wet, the cell is classified as
sea).
nodata : int or float, optional
The value for model cells outside the coverage of the dtm.
Only used internally. The default is -1.
return_filled_dtm : bool, optional
When True, return the filled dtm. The default is False.
Returns
-------
sea : xr.DataArray
A DataArray with value of 1 where the sea is and 0 where it is not.
"""
from skimage.morphology import reconstruction
if not (dtm < zmax).any():
logger.warning(
f"There are no values in dtm below {zmax}. The provided dtm "
"probably is not appropriate to calculate the sea boundary."
)
# fill nans by the minimum value of dtm
dtm = dtm.where(~np.isnan(dtm), dtm.min())
seed = xr.full_like(dtm, dtm.max())
if xy_sea is None:
xy_sea = (dtm.x.data.min(), dtm.y.data.max())
# determine the closest x and y in the dtm grid
x_sea = dtm.x.sel(x=xy_sea[0], method="nearest")
y_sea = dtm.y.sel(y=xy_sea[1], method="nearest")
dtm.loc[{"x": x_sea, "y": y_sea}] = dtm.min()
seed.loc[{"x": x_sea, "y": y_sea}] = dtm.min()
seed = seed.data
footprint = np.ones((3, 3), dtype="bool")
if not diagonal:
footprint[[0, 0, 2, 2], [0, 2, 2, 0]] = False # no diagonal connections
filled = reconstruction(seed, dtm.data, method="erosion", footprint=footprint)
dtm.data = filled
if return_filled_dtm:
return dtm
sea_dtm = dtm < zmax
if method == "mode":
sea_dtm = sea_dtm.astype(int)
else:
sea_dtm = sea_dtm.astype(float)
if ds is not None:
sea = dims.structured_da_to_ds(sea_dtm, ds, method=method, nodata=nodata)
if (sea == nodata).any():
logger.info(
"The dtm data does not cover the entire model domain."
" Assuming cells outside dtm-cover to be sea."
)
sea = sea.where(sea != nodata, 1)
return sea
return sea_dtm
[docs]
def get_gdr_configuration() -> dict:
"""Get configuration for GDR data.
Note: Currently only includes configuration for bathymetry data. Other datasets
can be added in the future. See
https://geo.rijkswaterstaat.nl/arcgis/rest/services/GDR/ for available data.
Returns
-------
config : dict
configuration dictionary containing urls and layer numbers for GDR data.
"""
config = {}
config["bodemhoogte_1m"] = {
"url": (
"https://geo.rijkswaterstaat.nl/arcgis/rest/services/GDR/"
"bodemhoogte_index/FeatureServer"
),
"layer": 1,
}
# NOTE: the 20m resolution is no longer available from the GDR service via a
# geodataframe containing the url.
config["bodemhoogte_20m"] = {
"url": (
"https://downloads.rijkswaterstaatdata.nl/bodemhoogte_20mtr/"
"bodemhoogte_20mtr.tif"
)
}
return config
[docs]
def get_bathymetry_gdf(
resolution: Literal["20m", "1m"] = "20m",
extent: Optional[list[float]] = None,
config: Optional[dict] = None,
) -> gpd.GeoDataFrame:
"""Get bathymetry dataframe from RWS.
.. deprecated:: 0.10.0
`get_bathymetry_gdf` will be removed in nlmod 1.0.0, it is replaced by
`download_bathymetry_gdf` because of new naming convention
https://github.com/gwmod/nlmod/issues/47
Note that the 20m resolution does not contain bathymetry data for the major rivers.
If you need the bathymetry of the major rivers, use the 1m resolution.
Parameters
----------
resolution : str, optional
resolution of the bathymetry data, "1m" or "20m". The default is "20m".
extent : tuple, optional
extent of the model domain. The default is None.
config : dict, optional
configuration dictionary containing urls and layer numbers for GDR data. The
default is None, which uses the default configuration provided by
the function `get_gdr_configuration()`.
"""
warnings.warn(
"'get_bathymetry_gdf' is deprecated and will be removed in a future version. "
"Use 'nlmod.read.rws.download_bathymetry_gdf' to project the buisdrainage on the model grid",
DeprecationWarning,
)
return download_bathymetry_gdf(resolution, extent, config)
[docs]
def download_bathymetry_gdf(
resolution: Literal["20m", "1m"] = "20m",
extent: Optional[list[float]] = None,
config: Optional[dict] = None,
) -> gpd.GeoDataFrame:
"""Get bathymetry dataframe from RWS.
Note that the 20m resolution does not contain bathymetry data for the major rivers.
If you need the bathymetry of the major rivers, use the 1m resolution.
Parameters
----------
resolution : str, optional
resolution of the bathymetry data, "1m" or "20m". The default is "20m".
extent : tuple, optional
extent of the model domain. The default is None.
config : dict, optional
configuration dictionary containing urls and layer numbers for GDR data. The
default is None, which uses the default configuration provided by
the function `get_gdr_configuration()`.
"""
if config is None:
config = get_gdr_configuration()
if resolution == "1m":
url = config[f"bodemhoogte_{resolution}"]["url"]
layer = config[f"bodemhoogte_{resolution}"]["layer"]
return arcrest(url, layer, extent=extent)
else:
gdf = gpd.GeoDataFrame(index=[0], columns=["geotiff"])
gdf.loc[0, "geotiff"] = config[f"bodemhoogte_{resolution}"]["url"]
return gdf
[docs]
@cache.cache_netcdf()
def get_bathymetry(
extent: list[float],
resolution: Literal["20m", "1m"] = "20m",
res: Optional[float] = None,
method: Union[str, Callable, None] = None,
chunks: Optional[Union[str, dict[str, int]]] = "auto",
config: Optional[dict] = None,
) -> xr.DataArray:
"""Get bathymetry data from RWS.
.. deprecated:: 0.10.0
`get_bathymetry` will be removed in nlmod 1.0.0, it is replaced by
`download_bathymetry` because of new naming convention
https://github.com/gwmod/nlmod/issues/47
Bathymetry is available at 20m resolution and at 1m resolution. The 20m
resolution is available for large water bodies, but not in the major rivers.
The 1m dataset covers the major waterbodies across all of the Netherlands.
Parameters
----------
extent : tuple
extent of the model domain
resolution : str, optional
resolution of the bathymetry data, "1m" or "20m". The default is "20m".
res : float, optional
resolution of the output data array. The default is None, which uses
resolution of the input datasets. Resampling method is provided by the method
kwarg.
method : str, optional
Rasterio resampling method. The default is None. Pre-defined method are
"first", "last", "min", "nearest", "sum" or "count". But custom callables
are also supported. See the rasterio documentation for more information.
chunks : dict, optional
chunks for the output data array. The default is "auto", which lets xarray/dask
pick the chunksize. Set to None to avoid chunking.
config : dict, optional
configuration dictionary containing urls and layer numbers for GDR data. The
default is None, which uses the default configuration provided by
the function `get_gdr_configuration()`.
Returns
-------
bathymetry : xr.DataArray
bathymetry data
"""
warnings.warn(
"'get_bathymetry' is deprecated and will be removed in a future version. "
"Use 'nlmod.read.rws.download_bathymetry' to download bathymetry data",
DeprecationWarning,
)
return download_bathymetry(extent, resolution, res, method, chunks, config)
[docs]
@cache.cache_netcdf()
def download_bathymetry(
extent: list[float],
resolution: Literal["20m", "1m"] = "20m",
res: Optional[float] = None,
method: Union[str, Callable, None] = None,
chunks: Optional[Union[str, dict[str, int]]] = "auto",
config: Optional[dict] = None,
) -> xr.DataArray:
"""Download bathymetry data from RWS.
Bathymetry is available at 20m resolution and at 1m resolution. The 20m
resolution is available for large water bodies, but not in the major rivers.
The 1m dataset covers the major waterbodies across all of the Netherlands.
Parameters
----------
extent : tuple
extent of the model domain
resolution : str, optional
resolution of the bathymetry data, "1m" or "20m". The default is "20m".
res : float, optional
resolution of the output data array. The default is None, which uses
resolution of the input datasets. Resampling method is provided by the method
kwarg.
method : str, optional
Rasterio resampling method. The default is None. Pre-defined method are
"first", "last", "min", "nearest", "sum" or "count". But custom callables
are also supported. See the rasterio documentation for more information.
chunks : dict, optional
chunks for the output data array. The default is "auto", which lets xarray/dask
pick the chunksize. Set to None to avoid chunking.
config : dict, optional
configuration dictionary containing urls and layer numbers for GDR data. The
default is None, which uses the default configuration provided by
the function `get_gdr_configuration()`.
Returns
-------
bathymetry : xr.DataArray
bathymetry data
"""
gdf = download_bathymetry_gdf(resolution=resolution, extent=extent, config=config)
xmin, xmax, ymin, ymax = extent
dataarrays = []
for _, row in tqdm(
gdf.iterrows(), desc="Downloading bathymetry", total=gdf.index.size
):
url = row["geotiff"]
ds = xr.open_dataset(url, engine="rasterio")
ds = ds.assign_coords({"y": ds["y"].round(0), "x": ds["x"].round(0)})
da = (
ds["band_data"]
.sel(band=1, x=slice(xmin, xmax), y=slice(ymax, ymin))
.drop_vars("band")
)
if chunks:
da = da.chunk(chunks)
dataarrays.append(da)
if len(dataarrays) > 1:
da = merge_arrays(
dataarrays,
bounds=[xmin, ymin, xmax, ymax],
res=res,
method=method,
)
else:
da = dataarrays[0]
if res is not None:
da = da.rio.reproject(
da.rio.crs,
res=res,
resampling=method,
)
return da