Source code for nlmod.read.rws

import datetime as dt
import logging
import os
import warnings
from typing import Callable, Literal, Optional, Union

import geopandas as gpd
import numpy as np
import xarray as xr
from rioxarray.merge import merge_arrays

from .. import NLMOD_DATADIR, cache, dims, util
from ..util import tqdm
from .webservices import arcrest

logger = logging.getLogger(__name__)


[docs] @cache.cache_pickle def get_gdf_surface_water(ds=None, extent=None): """Read a shapefile with surface water as a geodataframe, cut by the extent of the model. Parameters ---------- ds : xr.DataSet, None, optional dataset containing relevant model information extent : list, tuple or np.array, optional desired model extent (xmin, xmax, ymin, ymax) Returns ------- gdf_opp_water : GeoDataframe surface water geodataframe. """ if ds is None and extent is None: raise ValueError("At least one of 'ds' or 'extent' must be provided.") # laad bestanden in fname = os.path.join(NLMOD_DATADIR, "shapes", "opp_water.shp") gdf_swater = gpd.read_file(fname) if ds is not None: extent = dims.get_extent(ds) if (extent[0] < 93000) & (extent[2] < 480000): logger.warning( "This function does not yield good results for the North Sea, see https://github.com/gwmod/nlmod/issues/225" ) gdf_swater = util.gdf_within_extent(gdf_swater, extent) return gdf_swater
[docs] @cache.cache_netcdf(coords_3d=True) def get_surface_water(ds, gdf=None, da_basename="rws_oppwater"): """Create 3 data-arrays from the shapefile with surface water: .. deprecated:: 0.10.0 `get_surface_water` will be removed in nlmod 1.0.0, it is replaced by `discretize_surface_water` because of new naming convention https://github.com/gwmod/nlmod/issues/47 - area: area of the shape in the cell - cond: conductance based on the area and "bweerstand" column in shapefile - stage: surface water level based on the "peil" column in the shapefile Parameters ---------- ds : xr.DataSet xarray with model data gdf : gpd.GeoDataFrame or None, optional geometries of the surface water, if None the geometries are obtained using get_gdf_surface_water. The default is None. da_basename : str name of the polygon shapes, name is used as a prefix to store data arrays in ds Returns ------- ds : xarray.Dataset dataset with modelgrid data. """ warnings.warn( "'get_surface_water' is deprecated and will be removed in a future version. " "Use 'nlmod.read.rws.discretize_surface_water' to project the surface water on the model grid", DeprecationWarning, ) if gdf is None: gdf = get_gdf_surface_water(ds) return discretize_surface_water(ds, gdf, da_basename)
[docs] @cache.cache_netcdf(coords_3d=True) def discretize_surface_water(ds, gdf, da_basename="rws_oppwater"): """Create 3 data-arrays from the shapefile with surface water. These data arrays are: - area: area of the shape in the cell - cond: conductance based on the area and "bweerstand" column in shapefile - stage: surface water level based on the "peil" column in the shapefile Parameters ---------- ds : xr.DataSet xarray with model data gdf : gpd.GeoDataFrame or None, optional geometries of the surface water and the columns 'bweerstand' and 'peil'. da_basename : str name of the polygon shapes, name is used as a prefix to store data arrays in ds Returns ------- ds : xarray.Dataset dataset with modelgrid data. """ modelgrid = dims.modelgrid_from_ds(ds) area = xr.zeros_like(ds["top"]) cond = xr.zeros_like(ds["top"]) peil = xr.zeros_like(ds["top"]) for _, row in gdf.iterrows(): area_pol = dims.polygon_to_area( modelgrid, row["geometry"], xr.ones_like(ds["top"]), ds.gridtype, ) cond = xr.where(area_pol > area, area_pol / row["bweerstand"], cond) peil = xr.where(area_pol > area, row["peil"], peil) area = xr.where(area_pol > area, area_pol, area) ds_out = util.get_ds_empty(ds, keep_coords=("y", "x")) ds_out[f"{da_basename}_area"] = area ds_out[f"{da_basename}_area"].attrs["units"] = "m2" ds_out[f"{da_basename}_cond"] = cond ds_out[f"{da_basename}_cond"].attrs["units"] = "m2/day" ds_out[f"{da_basename}_stage"] = peil ds_out[f"{da_basename}_stage"].attrs["units"] = "mNAP" for datavar in ds_out: ds_out[datavar].attrs["source"] = "RWS" ds_out[datavar].attrs["date"] = dt.datetime.now().strftime("%Y%m%d") return ds_out
[docs] @cache.cache_netcdf(coords_2d=True) def get_northsea(ds, gdf=None, da_name="northsea"): """Get Dataset which is 1 at the northsea and 0 everywhere else. Sea is defined by rws surface water shapefile. .. deprecated:: 0.10.0 `get_northsea` will be removed in nlmod 1.0.0, it is replaced by `discretize_northsea` because of new naming convention https://github.com/gwmod/nlmod/issues/47 Parameters ---------- ds : xr.DataSet, None, optional dataset containing relevant model information gdf : gpd.GeoDataFrame or None, optional geometries of the surface water, if None the geometries are obtained using get_gdf_surface_water. The default is None. da_name : str, optional name of the datavar that identifies sea cells Returns ------- ds_out : xr.DataSet Dataset with a single DataArray, this DataArray is 1 at sea and 0 everywhere else. Grid dimensions according to ds. """ warnings.warn( "'get_northsea' is deprecated and will be removed in a future version. " "Use 'nlmod.read.rws.discretize_northsea' to project the northsea on the model grid", DeprecationWarning, ) return discretize_northsea(ds, gdf, da_name)
[docs] @cache.cache_netcdf(coords_2d=True) def discretize_northsea(ds, gdf=None, da_name="northsea"): """Get Dataset which is 1 at the northsea and 0 everywhere else. Sea is defined by rws surface water shapefile. Parameters ---------- ds : xr.DataSet, None, optional dataset containing relevant model information gdf : gpd.GeoDataFrame or None, optional geometries of the surface water, if None the geometries are obtained using get_gdf_surface_water. The default is None. da_name : str, optional name of the datavar that identifies sea cells Returns ------- ds_out : xr.DataSet Dataset with a single DataArray, this DataArray is 1 at sea and 0 everywhere else. Grid dimensions according to ds. """ if gdf is None: gdf = get_gdf_surface_water(ds=ds) # find grid cells with sea swater_zee = gdf[ gdf["OWMNAAM"].isin( [ "Rijn territoriaal water", "Waddenzee", "Waddenzee vastelandskust", "Hollandse kust (kustwater)", "Waddenkust (kustwater)", ] ) ] ds_out = dims.gdf_to_bool_ds(swater_zee, ds, da_name, keep_coords=("y", "x")) return ds_out
[docs] def calculate_sea_coverage( dtm, ds=None, zmax=0.0, xy_sea=None, diagonal=False, method="mode", nodata=-1, return_filled_dtm=False, ): """Determine where the sea is by interpreting the digital terrain model. This method assumes the pixel defined in xy_sea (by default top-left) of the DTM-DataArray is sea. It then determines the height of the sea that is required for other pixels to become sea as well, taking into account the pixels in between. Parameters ---------- dtm : xr.DataArray The digital terrain data, which can be of higher resolution than ds, Nans are filled by the minial value of dtm. ds : xr.Dataset, optional Dataset with model information. When ds is not None, the sea DataArray is transformed to the model grid. The default is None. zmax : float, optional Locations thet become sea when the sea level reaches a level of zmax will get a value of 1 in the resulting DataArray. The default is 0.0. xy_sea : tuble of 2 floats The x- and y-coordinate of a location within the dtm that is sea. From this point, calculate_sea determines at what level each cell becomes wet. When xy_cell is None, the most northwest grid cell is sea, which is appropriate for the Netherlands. The default is None. diagonal : bool, optional When true, dtm-values are connected diagonally as well (to determine the level the sea will reach). The default is False. method : str, optional The method used to scale the dtm to ds. The default is "mode" (mode means that if more than half of the (not-nan) cells are wet, the cell is classified as sea). nodata : int or float, optional The value for model cells outside the coverage of the dtm. Only used internally. The default is -1. return_filled_dtm : bool, optional When True, return the filled dtm. The default is False. Returns ------- sea : xr.DataArray A DataArray with value of 1 where the sea is and 0 where it is not. """ from skimage.morphology import reconstruction if not (dtm < zmax).any(): logger.warning( f"There are no values in dtm below {zmax}. The provided dtm " "probably is not appropriate to calculate the sea boundary." ) # fill nans by the minimum value of dtm dtm = dtm.where(~np.isnan(dtm), dtm.min()) seed = xr.full_like(dtm, dtm.max()) if xy_sea is None: xy_sea = (dtm.x.data.min(), dtm.y.data.max()) # determine the closest x and y in the dtm grid x_sea = dtm.x.sel(x=xy_sea[0], method="nearest") y_sea = dtm.y.sel(y=xy_sea[1], method="nearest") dtm.loc[{"x": x_sea, "y": y_sea}] = dtm.min() seed.loc[{"x": x_sea, "y": y_sea}] = dtm.min() seed = seed.data footprint = np.ones((3, 3), dtype="bool") if not diagonal: footprint[[0, 0, 2, 2], [0, 2, 2, 0]] = False # no diagonal connections filled = reconstruction(seed, dtm.data, method="erosion", footprint=footprint) dtm.data = filled if return_filled_dtm: return dtm sea_dtm = dtm < zmax if method == "mode": sea_dtm = sea_dtm.astype(int) else: sea_dtm = sea_dtm.astype(float) if ds is not None: sea = dims.structured_da_to_ds(sea_dtm, ds, method=method, nodata=nodata) if (sea == nodata).any(): logger.info( "The dtm data does not cover the entire model domain." " Assuming cells outside dtm-cover to be sea." ) sea = sea.where(sea != nodata, 1) return sea return sea_dtm
[docs] def get_gdr_configuration() -> dict: """Get configuration for GDR data. Note: Currently only includes configuration for bathymetry data. Other datasets can be added in the future. See https://geo.rijkswaterstaat.nl/arcgis/rest/services/GDR/ for available data. Returns ------- config : dict configuration dictionary containing urls and layer numbers for GDR data. """ config = {} config["bodemhoogte_1m"] = { "url": ( "https://geo.rijkswaterstaat.nl/arcgis/rest/services/GDR/" "bodemhoogte_index/FeatureServer" ), "layer": 1, } # NOTE: the 20m resolution is no longer available from the GDR service via a # geodataframe containing the url. config["bodemhoogte_20m"] = { "url": ( "https://downloads.rijkswaterstaatdata.nl/bodemhoogte_20mtr/" "bodemhoogte_20mtr.tif" ) } return config
[docs] def get_bathymetry_gdf( resolution: Literal["20m", "1m"] = "20m", extent: Optional[list[float]] = None, config: Optional[dict] = None, ) -> gpd.GeoDataFrame: """Get bathymetry dataframe from RWS. .. deprecated:: 0.10.0 `get_bathymetry_gdf` will be removed in nlmod 1.0.0, it is replaced by `download_bathymetry_gdf` because of new naming convention https://github.com/gwmod/nlmod/issues/47 Note that the 20m resolution does not contain bathymetry data for the major rivers. If you need the bathymetry of the major rivers, use the 1m resolution. Parameters ---------- resolution : str, optional resolution of the bathymetry data, "1m" or "20m". The default is "20m". extent : tuple, optional extent of the model domain. The default is None. config : dict, optional configuration dictionary containing urls and layer numbers for GDR data. The default is None, which uses the default configuration provided by the function `get_gdr_configuration()`. """ warnings.warn( "'get_bathymetry_gdf' is deprecated and will be removed in a future version. " "Use 'nlmod.read.rws.download_bathymetry_gdf' to project the buisdrainage on the model grid", DeprecationWarning, ) return download_bathymetry_gdf(resolution, extent, config)
[docs] def download_bathymetry_gdf( resolution: Literal["20m", "1m"] = "20m", extent: Optional[list[float]] = None, config: Optional[dict] = None, ) -> gpd.GeoDataFrame: """Get bathymetry dataframe from RWS. Note that the 20m resolution does not contain bathymetry data for the major rivers. If you need the bathymetry of the major rivers, use the 1m resolution. Parameters ---------- resolution : str, optional resolution of the bathymetry data, "1m" or "20m". The default is "20m". extent : tuple, optional extent of the model domain. The default is None. config : dict, optional configuration dictionary containing urls and layer numbers for GDR data. The default is None, which uses the default configuration provided by the function `get_gdr_configuration()`. """ if config is None: config = get_gdr_configuration() if resolution == "1m": url = config[f"bodemhoogte_{resolution}"]["url"] layer = config[f"bodemhoogte_{resolution}"]["layer"] return arcrest(url, layer, extent=extent) else: gdf = gpd.GeoDataFrame(index=[0], columns=["geotiff"]) gdf.loc[0, "geotiff"] = config[f"bodemhoogte_{resolution}"]["url"] return gdf
[docs] @cache.cache_netcdf() def get_bathymetry( extent: list[float], resolution: Literal["20m", "1m"] = "20m", res: Optional[float] = None, method: Union[str, Callable, None] = None, chunks: Optional[Union[str, dict[str, int]]] = "auto", config: Optional[dict] = None, ) -> xr.DataArray: """Get bathymetry data from RWS. .. deprecated:: 0.10.0 `get_bathymetry` will be removed in nlmod 1.0.0, it is replaced by `download_bathymetry` because of new naming convention https://github.com/gwmod/nlmod/issues/47 Bathymetry is available at 20m resolution and at 1m resolution. The 20m resolution is available for large water bodies, but not in the major rivers. The 1m dataset covers the major waterbodies across all of the Netherlands. Parameters ---------- extent : tuple extent of the model domain resolution : str, optional resolution of the bathymetry data, "1m" or "20m". The default is "20m". res : float, optional resolution of the output data array. The default is None, which uses resolution of the input datasets. Resampling method is provided by the method kwarg. method : str, optional Rasterio resampling method. The default is None. Pre-defined method are "first", "last", "min", "nearest", "sum" or "count". But custom callables are also supported. See the rasterio documentation for more information. chunks : dict, optional chunks for the output data array. The default is "auto", which lets xarray/dask pick the chunksize. Set to None to avoid chunking. config : dict, optional configuration dictionary containing urls and layer numbers for GDR data. The default is None, which uses the default configuration provided by the function `get_gdr_configuration()`. Returns ------- bathymetry : xr.DataArray bathymetry data """ warnings.warn( "'get_bathymetry' is deprecated and will be removed in a future version. " "Use 'nlmod.read.rws.download_bathymetry' to download bathymetry data", DeprecationWarning, ) return download_bathymetry(extent, resolution, res, method, chunks, config)
[docs] @cache.cache_netcdf() def download_bathymetry( extent: list[float], resolution: Literal["20m", "1m"] = "20m", res: Optional[float] = None, method: Union[str, Callable, None] = None, chunks: Optional[Union[str, dict[str, int]]] = "auto", config: Optional[dict] = None, ) -> xr.DataArray: """Download bathymetry data from RWS. Bathymetry is available at 20m resolution and at 1m resolution. The 20m resolution is available for large water bodies, but not in the major rivers. The 1m dataset covers the major waterbodies across all of the Netherlands. Parameters ---------- extent : tuple extent of the model domain resolution : str, optional resolution of the bathymetry data, "1m" or "20m". The default is "20m". res : float, optional resolution of the output data array. The default is None, which uses resolution of the input datasets. Resampling method is provided by the method kwarg. method : str, optional Rasterio resampling method. The default is None. Pre-defined method are "first", "last", "min", "nearest", "sum" or "count". But custom callables are also supported. See the rasterio documentation for more information. chunks : dict, optional chunks for the output data array. The default is "auto", which lets xarray/dask pick the chunksize. Set to None to avoid chunking. config : dict, optional configuration dictionary containing urls and layer numbers for GDR data. The default is None, which uses the default configuration provided by the function `get_gdr_configuration()`. Returns ------- bathymetry : xr.DataArray bathymetry data """ gdf = download_bathymetry_gdf(resolution=resolution, extent=extent, config=config) xmin, xmax, ymin, ymax = extent dataarrays = [] for _, row in tqdm( gdf.iterrows(), desc="Downloading bathymetry", total=gdf.index.size ): url = row["geotiff"] ds = xr.open_dataset(url, engine="rasterio") ds = ds.assign_coords({"y": ds["y"].round(0), "x": ds["x"].round(0)}) da = ( ds["band_data"] .sel(band=1, x=slice(xmin, xmax), y=slice(ymax, ymin)) .drop_vars("band") ) if chunks: da = da.chunk(chunks) dataarrays.append(da) if len(dataarrays) > 1: da = merge_arrays( dataarrays, bounds=[xmin, ymin, xmax, ymax], res=res, method=method, ) else: da = dataarrays[0] if res is not None: da = da.rio.reproject( da.rio.crs, res=res, resampling=method, ) return da