Source code for nlmod.gis

import logging
import os

import geopandas as gpd
import numpy as np

from nlmod.dims.grid import get_affine_mod_to_world, polygons_from_ds
from nlmod.dims.layers import calculate_thickness
from nlmod.epsg28992 import EPSG_28992

logger = logging.getLogger(__name__)



[docs]
def vertex_da_to_gdf(
    model_ds, data_variables, polygons=None, dealing_with_time="mean", crs=EPSG_28992
):
    """Convert one or more DataArrays from a vertex model dataset to a Geodataframe.

    Parameters
    ----------
    model_ds : xr.DataSet
        xarray with model data
    data_variables : list, tuple or set
        data_variables in model_ds that will be stored as attributes in the
        shapefile.
    polygons : list of shapely Polygons, optional
        geometries used for the GeoDataframe, if None the polygons are created
        from the data variable 'vertices' in model_ds. The default is None.
    dealing_with_time : str, optional
        when there is time variant data in the model dataset this function
        becomes very slow. For now only the time averaged data will be
        saved in the geodataframe. Later this can be extended with multiple
        possibilities. The default is 'mean'.
    crs : str, optional
        coordinate reference system for the geodataframe. The default
        is EPSG:28992 (RD).

    Raises
    ------
    ValueError
        for DataArrays with unexpected dimension names.
    NotImplementedError
        for DataArrays with more than 2 dimensions or dealing_with_time!='mean'

    Returns
    -------
    gdf : geopandas.GeoDataframe
        geodataframe of one or more DataArrays.
    """
    assert (
        model_ds.gridtype == "vertex"
    ), f"expected model dataset with gridtype vertex, got {model_ds.gridtype}"

    if isinstance(data_variables, str):
        data_variables = [data_variables]

    # create dictionary with column names and values of the geodataframe
    dv_dic = {}
    for da_name in data_variables:
        da = model_ds[da_name]
        no_dims = len(da.dims)
        if no_dims == 1:
            dv_dic[da_name] = da.values
        elif no_dims == 2:
            if da.dims == ("layer", "icell2d"):
                for i, da_lay in enumerate(da):
                    dv_dic[f"{da_name}_lay{i}"] = da_lay
            elif "time" in da.dims:
                da_mean = da.mean(dim="time")
                if dealing_with_time == "mean":
                    dv_dic[f"{da_name}_mean"] = da_mean.values
                else:
                    raise NotImplementedError(
                        "Can only use the mean of a DataArray with dimension time, "
                        "use dealing_with_time='mean'"
                    )
            else:
                logger.warning(
                    "expected dimensions ('layer', 'icell2d') for data variable "
                    f"{da_name}, got {da.dims}"
                )
        else:
            logger.warning(
                f"expected one or two dimensions for data variable "
                f"{da_name}, got {no_dims} dimensions"
            )

    # create geometries
    if polygons is None:
        polygons = polygons_from_ds(model_ds)

    # construct geodataframe
    gdf = gpd.GeoDataFrame(dv_dic, geometry=polygons, crs=crs)

    return gdf




[docs]
def struc_da_to_gdf(
    model_ds, data_variables, polygons=None, dealing_with_time="mean", crs=EPSG_28992
):
    """Convert one or more DataArrays from a structured model dataset to a Geodataframe.

    Parameters
    ----------
    model_ds : xr.DataSet
        xarray with model data
    data_variables : list, tuple or set
        data_variables in model_ds that will be stored as attributes in the
        shapefile.
    polygons : list of shapely Polygons, optional
        geometries used for the GeoDataframe, if None the polygons are created
        from the data variable 'vertices' in model_ds. The default is None.
    crs : str, optional
        coordinate reference system for the geodataframe. The default
        is EPSG:28992 (RD).

    Raises
    ------
    ValueError
        for DataArrays with unexpected dimension names.
    NotImplementedError
        for DataArrays with more than 2 dimensions or dealing_with_time!='mean'

    Returns
    -------
    gdf : geopandas.GeoDataframe
        geodataframe of one or more DataArrays.
    """
    assert (
        model_ds.gridtype == "structured"
    ), f"expected model dataset with gridtype vertex, got {model_ds.gridtype}"

    if isinstance(data_variables, str):
        data_variables = [data_variables]

    # create dictionary with column names and values of the geodataframe
    dv_dic = {}
    for da_name in data_variables:
        da = model_ds[da_name]
        no_dims = len(da.dims)
        if no_dims == 2:
            dv_dic[da_name] = da.values.flatten("F")
        elif no_dims == 3:
            if da.dims == ("layer", "y", "x"):
                for i, da_lay in enumerate(da):
                    dv_dic[f"{da_name}_lay{i}"] = da_lay.values.flatten("F")
            elif "time" in da.dims:
                da_mean = da.mean(dim="time")
                if dealing_with_time == "mean":
                    dv_dic[f"{da_name}_mean"] = da_mean.values.flatten("F")
                else:
                    raise NotImplementedError(
                        "Can only use the mean of a DataArray with dimension time, use dealing_with_time='mean'"
                    )
            else:
                raise ValueError(
                    f"expected dimensions ('layer', 'y', 'x'), got {da.dims}"
                )
        else:
            raise NotImplementedError(
                f"expected two or three dimensions got {no_dims} for data variable {da_name}"
            )

    # create geometries
    if polygons is None:
        polygons = polygons_from_ds(model_ds)

    # construct geodataframe
    gdf = gpd.GeoDataFrame(dv_dic, geometry=polygons, crs=crs)

    return gdf




[docs]
def dataarray_to_shapefile(model_ds, data_variables, fname, polygons=None):
    """Save one or more DataArrays from a model dataset as a shapefile.

    Parameters
    ----------
    model_ds : xr.DataSet
        xarray with model data
    data_variables : list, tuple or set
        data_variables in model_ds that will be stored as attributes in the
        shapefile.
    fname : str
        filename of the shapefile.
    polygons : list of shapely Polygons, optional
        geometries used for the GeoDataframe, if None the polygons are created
        from the data variable 'vertices' in model_ds. The default is None.

    Returns
    -------
    None.
    """
    if model_ds.gridtype == "vertex":
        gdf = vertex_da_to_gdf(model_ds, data_variables, polygons=polygons)
    else:
        gdf = struc_da_to_gdf(model_ds, data_variables, polygons=polygons)
    gdf.to_file(fname)




[docs]
def ds_to_vector_file(
    model_ds,
    gisdir=None,
    driver="GPKG",
    combine_dic=None,
    exclude=("x", "y", "time_steps", "area", "vertices", "rch_name", "icvert"),
    crs=EPSG_28992,
):
    """Save all data variables in a model dataset to multiple shapefiles.

    Parameters
    ----------
    model_ds : xr.DataSet
        xarray with model data
    gisdir : str, optional
        gis directory to save shapefiles, if None a subdirectory 'gis' of the
        model_ws (which is an attribute of model_ds) is used. The default is
        None.
    driver : str, optional
        determines if the data variables are saved as seperate "ESRI Shapefile"
        or a single "GPKG" (geopackage). The default is geopackage.
    combine_dic : dictionary of str:set(), optional
        The items in this dictionary are data variables in model_ds that
        should be combined in one shapefile. The key defines the name of the
        shapefile. If None the default combine_dic is used. The default is
        None.
    exclude : tuple of str, optional
        data variables that are not exported to shapefiles. The default is
        ('x', 'y', 'time_steps', 'area', 'vertices').
    crs : str, optional
        coordinate reference system for the vector file. The default
        is EPSG:28992 (RD).

    Returns
    -------
    fnames : str or list of str
        filename(s) of exported geopackage or shapefiles.
    """
    # get default combination dictionary
    if combine_dic is None:
        combine_dic = {
            "topbot": {"top", "botm"},
            "sea": {"northsea", "bathymetry"},
        }

    # create gis directory and filenames
    if gisdir is None:
        gisdir = os.path.join(model_ds.model_ws, "gis")
    if not os.path.exists(gisdir):
        os.mkdir(gisdir)

    if driver == "GPKG":
        fname_gpkg = os.path.join(gisdir, f"{model_ds.model_name}.gpkg")
    elif driver == "ESRI Shapefile":
        fnames = []
    else:
        raise ValueError(f"invalid driver -> {driver}")

    # get all data variables in the model dataset
    da_names = set(model_ds)

    # exclude some data variables
    for da_name in da_names:
        # add data variables with only time variant data to exclude list
        if ("time",) == model_ds[da_name].dims:
            exclude += (da_name,)
        # add data variables with vertex data to exclude list
        elif "iv" in model_ds[da_name].dims:
            exclude += (da_name,)
        # add data variables with vertex data to exclude list
        elif "nvert" in model_ds[da_name].dims:
            exclude += (da_name,)

    # exclude some names from export
    da_names -= set(exclude)

    # create list of polygons
    polygons = polygons_from_ds(model_ds)

    # combine some data variables in one shapefile
    for key, item in combine_dic.items():
        if set(item).issubset(da_names):
            if model_ds.gridtype == "structured":
                gdf = struc_da_to_gdf(model_ds, item, polygons=polygons, crs=crs)
            elif model_ds.gridtype == "vertex":
                gdf = vertex_da_to_gdf(model_ds, item, polygons=polygons, crs=crs)
            if driver == "GPKG":
                gdf.to_file(fname_gpkg, layer=key, driver=driver)
            else:
                fname = os.path.join(gisdir, f"{key}.shp")
                gdf.to_file(fname, driver=driver)
                fnames.append(fname)
            da_names -= item
        else:
            logger.info(
                f"could not add {item} into to geopackage because 1 or more of the data variables do not exist"
            )

    # create unique shapefiles for the other data variables
    for da_name in da_names:
        if model_ds.gridtype == "structured":
            gdf = struc_da_to_gdf(model_ds, (da_name,), polygons=polygons, crs=crs)
        elif model_ds.gridtype == "vertex":
            gdf = vertex_da_to_gdf(model_ds, (da_name,), polygons=polygons, crs=crs)
        if driver == "GPKG":
            gdf.to_file(fname_gpkg, layer=da_name, driver=driver)
        else:
            fname = os.path.join(gisdir, f"{da_name}.shp")
            gdf.to_file(fname, driver=driver)
            fnames.append(fname)

    # return filename(s)
    if driver == "GPKG":
        return fname_gpkg
    else:
        return fnames




[docs]
def ds_to_ugrid_nc_file(
    model_ds,
    fname=None,
    variables=None,
    dummy_var="mesh_topology",
    xv="xv",
    yv="yv",
    face_node_connectivity="icvert",
    split_layer_dimension=True,
    split_time_dimension=False,
    for_imod_qgis_plugin=False,
):
    """Save a model dataset to a UGRID NetCDF file, so it can be opened as a Mesh Layer
    in qgis.

    Parameters
    ----------
    model_ds : xr.DataSet
        xarray Dataset with model data
    fname : str, optional
        filename of the UGRID NetCDF-file, preferably with the extension .nc. When fname
        is None,only a ugird-ready Dataset is created, without saving this Dataset to
        file. The defaults is None.
    variables : str or list of str, optional
        THe variables to be saved in the NetCDF file. The default is None,
        which means all variables will be saved in the file.
    dummy_var : str, optional
        The name of the new dummy-variable that contains the gridinformation in
        its attributes. The default is 'mesh_topology'.
    xv : str, optional
        The name of the variable that contains the x-coordinate of the vertices
        that together form the edges of the faces. The default is 'xv'.
    yv : str, optional
        The name of the variable that contains the y-coordinate of the vertices
        that together form the edges of the faces. The default is 'yv'.
    face_node_connectivity : str, optional
        The name of the variable that contains the indexes of the vertices for
        each face. The default is 'icvert'.
    split_layer_dimension : bool, optional
        Splits the layer dimension into seperate variables when True. The defaults is
        True.
    split_time_dimension : bool, optional
        Splits the time dimension into seperate variables when True. The defaults is
        False.
    for_imod_qgis_plugin : bool, optional
        When True, set some properties of the netcdf file to improve compatibility with
        the iMOD-QGIS plugin. Layers are renamed to 'layer_i' until 'layer_n', a
        variable 'top' is added for each layer, and the variable 'botm' is renamed to
        'bottom'. The default is False.

    Returns
    -------
    ds : xr.DataSet
        The dataset that was saved to a NetCDF-file. Can be used for debugging.
    """
    assert model_ds.gridtype == "vertex", "Only vertex grids are supported for now"

    # copy the dataset, so we do not alter the original one
    ds = model_ds.copy()

    if "angrot" in ds.attrs and ds.attrs["angrot"] != 0.0:
        # rotate the model coordinates to real coordinates
        affine = get_affine_mod_to_world(ds)
        ds[xv], ds[yv] = affine * (ds[xv], ds[yv])

    # add a dummy variable with the required grid-information
    ds[dummy_var] = 0
    ds[dummy_var].attrs["node_coordinates"] = f"{xv} {yv}"
    ds[dummy_var].attrs["cf_role"] = "mesh_topology"
    ds[dummy_var].attrs["topology_dimension"] = 2
    ds[dummy_var].attrs["face_node_connectivity"] = face_node_connectivity

    # drop the first vertex of each face, as the faces should be open in ugrid
    nvert_per_cell_dim = ds[face_node_connectivity].dims[1]
    ds = ds[{nvert_per_cell_dim: ds[nvert_per_cell_dim][1:]}]
    # make sure vertices (nodes) in faces are sprecified in counterclokcwise-
    # direction. Flopy specifies them in clockwise direction, so we need to
    # reverse the direction.
    data = np.flip(ds[face_node_connectivity].data, 1)
    nodata = ds[face_node_connectivity].attrs.get("nodata")
    if nodata is not None:
        # move the nodata values from the first columns to the last
        data_new = np.full(data.shape, nodata)
        for i in range(data.shape[0]):
            mask = data[i, :] != nodata
            data_new[i, : mask.sum()] = data[i, mask]
        data = data_new
    ds[face_node_connectivity].data = data
    ds[face_node_connectivity].attrs["cf_role"] = "face_node_connectivity"
    ds[face_node_connectivity].attrs["start_index"] = 0

    if for_imod_qgis_plugin and "botm" in ds:
        ds["top"] = ds["botm"] + calculate_thickness(ds)
        ds = ds.rename({"botm": "bottom"})

    # set for each of the variables that they describe the faces
    if variables is None:
        variables = list(ds.keys())
    if isinstance(variables, str):
        variables = [variables]
    for var in variables:
        if var in [dummy_var, face_node_connectivity]:
            continue
        ds[var].attrs["location"] = "face"

    # Make sure time is encoded as a float for MDAL.
    # Copied from imod-python.
    for var in ds.coords:
        if np.issubdtype(ds[var].dtype, np.datetime64):
            ds[var].encoding["dtype"] = np.float64

    # Convert boolean layers to integer and int64 to int32
    for var in variables:
        if np.issubdtype(ds[var].dtype, bool):
            ds[var].encoding["dtype"] = np.int32
        elif np.issubdtype(ds[var].dtype, str) or np.issubdtype(ds[var].dtype, object):
            # convert the string to an index of unique strings
            index = np.unique(ds[var], return_inverse=True)[1]
            ds[var] = ds[var].dims, index
        if np.issubdtype(ds[var].dtype, np.int64):
            ds[var].encoding["dtype"] = np.int32

    # Breaks down variables with a layer dimension into separate variables.
    if split_layer_dimension:
        if for_imod_qgis_plugin:
            ds, variables = _break_down_dimension(
                ds, variables, "layer", add_dim_name=True, add_one_based_index=True
            )
        else:
            ds, variables = _break_down_dimension(ds, variables, "layer")
    if split_time_dimension:
        # Breaks down variables with a time dimension into separate variables.
        ds, variables = _break_down_dimension(ds, variables, "time")

    # only keep the selected variables
    ds = ds[variables + [dummy_var, xv, yv, face_node_connectivity]]
    if fname is not None:
        # and save to file
        ds.to_netcdf(fname)
    return ds




[docs]
def _break_down_dimension(
    ds, variables, dim, add_dim_name=False, add_one_based_index=False
):
    """Internal method to split a dimension of a variable into multiple variables.

    Copied and altered from imod-python.
    """
    keep_vars = []
    for var in variables:
        if dim in ds[var].dims:
            stacked = ds[var]
            for i, value in enumerate(stacked[dim].values):
                name = var
                if add_dim_name:
                    name = f"{name}_{dim}"
                if add_one_based_index:
                    name = f"{name}_{i + 1}"
                else:
                    name = f"{name}_{value}"

                ds[name] = stacked.sel({dim: value}, drop=True)
                if "long_name" in ds[name].attrs:
                    long_name = ds[name].attrs["long_name"]
                    ds[name].attrs["long_name"] = f"{long_name} {value}"
                if "standard_name" in ds[name].attrs:
                    standard_name = ds[name].attrs["standard_name"]
                    ds[name].attrs["standard_name"] = f"{standard_name}_{value}"
                keep_vars.append(name)
        else:
            keep_vars.append(var)
    if dim in ds.coords:
        ds = ds.drop_vars(dim)

    return ds, keep_vars