Source code for nlmod.dims.grid

"""Module containing model grid functions.

-   project data on different grid types
-   obtain various types of reclists from a grid that
    can be used as input for a MODFLOW package
-   fill, interpolate and resample grid data
"""

# ruff: noqa: F401
import logging
import os
import warnings

import flopy
import geopandas as gpd
import numpy as np
import pandas as pd
import shapely
import xarray as xr
from affine import Affine
from flopy.discretization.structuredgrid import StructuredGrid
from flopy.discretization.vertexgrid import VertexGrid
from flopy.utils.gridgen import Gridgen
from flopy.utils.gridintersect import GridIntersect
from packaging import version
from scipy.interpolate import griddata
from shapely.affinity import affine_transform
from shapely.geometry import Point, Polygon
from shapely.ops import unary_union

from .. import cache, util
from ..util import tqdm
from .layers import (
    fill_nan_top_botm_kh_kv,
    get_first_active_layer,
    get_idomain,
    remove_inactive_layers,
)
from .rdp import rdp
from .shared import (
    GridTypeDims,
    get_area,
    get_delc,
    get_delr,
    is_layered,
    is_rotated,
    is_structured,
    is_vertex,
)  # noqa: F401

logger = logging.getLogger(__name__)



[docs]
def snap_extent(extent, delr, delc):
    """Snap the extent in such a way that an integer number of columns and rows fit in
    the extent. The new extent is always equal to, or bigger than the original extent.

    Parameters
    ----------
    extent : list, tuple or np.array
        original extent (xmin, xmax, ymin, ymax)
    delr : int or float,
        cell size along rows, equal to dx
    delc : int or float,
        cell size along columns, equal to dy

    Returns
    -------
    extent : list, tuple or np.array
        adjusted extent
    """
    extent = list(extent).copy()

    logger.debug(f"redefining extent: {extent}")

    if delr <= 0 or delc <= 0:
        raise ValueError("delr and delc should be positive values")

    # if xmin can be divided by delr do nothing, otherwise rescale xmin
    if not extent[0] % delr == 0:
        extent[0] -= extent[0] % delr

    # get number of columns and xmax
    ncol = int(np.ceil((extent[1] - extent[0]) / delr))
    extent[1] = extent[0] + (ncol * delr)  # round xmax up to close grid

    # if ymin can be divided by delc do nothing, otherwise rescale ymin
    if not extent[2] % delc == 0:
        extent[2] -= extent[2] % delc

    # get number of rows and ymax
    nrow = int(np.ceil((extent[3] - extent[2]) / delc))
    extent[3] = extent[2] + (nrow * delc)  # round ymax up to close grid

    logger.debug(f"new extent is {extent} and has {nrow} rows and {ncol} columns")

    return extent




[docs]
def xy_to_icell2d(xy, ds):
    """Get the icell2d value of a point defined by its x and y coordinates.

    Parameters
    ----------
    xy : list, tuple
        coordinates of ta point.
    ds : xr.Dataset
        model dataset.

    Returns
    -------
    icell2d : int
        number of the icell2d value of a cell containing the xy point.
    """
    logger.warning(
        "nlmod.grid.xy_to_icell2d is deprecated. "
        "Use nlmod.grid.get_icell2d_from_xy instead"
    )
    msg = "xy_to_icell2d can only be applied to a vertex grid"
    assert ds.gridtype == "vertex", msg
    icell2d = (np.abs(ds.x.data - xy[0]) + np.abs(ds.y.data - xy[1])).argmin().item()

    return icell2d




[docs]
def get_icell2d_from_xy(x, y, ds, gi=None, rotated=True):
    """Get the icell2d value of a point defined by its x and y coordinates.

    Parameters
    ----------
    x : float
        The x-coordinate of the point.
    y : float
        The y-coordinate of the point.
    ds : xr.Dataset
        The model dataset.
    gi : flopy.utils.GridIntersect, optional
        Can be supplied to speed up the calculation, as the generation of the
        GridIntersect-instance can take some time. The default is None.
    rotated : bool, optional
        If the model grid has a rotation, and rotated is False, x and y are in model
        coordinates. Otherwise x and y are in real world coordinates. The default is
        True.

    Raises
    ------
    ValueError
        Raises a ValueError if the point is outside of the model grid.

    Returns
    -------
    icell2d : int
        The icell2d-number of the model cell containing the point (zero-based)

    """
    msg = "get_icell2d_from_xy can only be applied to a vertex grid"
    assert ds.gridtype == "vertex", msg
    if gi is None:
        gi = flopy.utils.GridIntersect(modelgrid_from_ds(ds, rotated=rotated))
    cellids = gi.intersects(Point(x, y), dataframe=True)["cellid"].values
    if len(cellids) < 1:
        raise (ValueError(f"Point ({x}, {y}) is outside of the model grid"))
    icell2d = cellids[0]
    return icell2d




[docs]
def xy_to_row_col(xy, ds):
    """Get the row and column values of a point defined by its x and y coordinates.

    Parameters
    ----------
    xy : list, tuple
        coordinates of ta point.
    ds : xr.Dataset
        model dataset.

    Returns
    -------
    row : int
        number of the row value of a cell containing the xy point.
    col : int
        number of the column value of a cell containing the xy point.
    """
    logger.warning(
        "nlmod.grid.xy_to_row_col is deprecated. "
        "Use nlmod.grid.get_row_col_from_xy instead"
    )
    msg = "xy_to_row_col can only be applied to a structured grid"
    assert ds.gridtype == "structured", msg
    row = np.abs(ds.y.data - xy[1]).argmin()
    col = np.abs(ds.x.data - xy[0]).argmin()
    return row, col




[docs]
def get_row_col_from_xy(x, y, ds, rotated=True, gi=None):
    """Get the row and column of a point defined by a x and y coordinate.

    Parameters
    ----------
    x : float
        The x-coordinate of the point.
    y : float
        The y-coordinate of the point.
    ds : xr.Dataset
        The model dataset.
    rotated : bool, optional
        If the model grid has a rotation, and rotated is False, x and y are in model
        coordinates. Otherwise x and y are in real world coordinates. The default is
        True.
    gi : flopy.utils.GridIntersect, optional
        Can be supplied to use a GridIntersect-class instead of our own calculation

    Raises
    ------
    ValueError
        Raises a ValueError if the point is outside of the model grid.

    Returns
    -------
    row : int
        The row-number of the model cell containing the point (zero-based)
    col : int
        The column-number of the model cell containing the point (zero-based)
    """
    msg = "get_row_col_from_xy can only be applied to a structured grid"
    assert ds.gridtype == "structured", msg
    if gi is not None:
        cellids = gi.intersects(Point(x, y), dataframe=True)[["row", "col"]].values
        if len(cellids) < 1:
            raise (ValueError(f"Point ({x}, {y}) is outside of the model grid"))
        row, col = cellids[0]
        return row, col
    if rotated and ("angrot" in ds.attrs) and (ds.attrs["angrot"] != 0.0):
        # calculate the x and y in model coordinates
        affine = get_affine_world_to_mod(ds)
        x, y = affine * (x, y)
    if x < ds.extent[0] or x > ds.extent[1] or y < ds.extent[2] or y > ds.extent[3]:
        raise (ValueError(f"Point ({x}, {y}) is outside of the model grid"))

    y_bot = ds.y - get_delc(ds) / 2
    row = np.where(y >= y_bot)[0][0]

    x_left = ds.x - get_delr(ds) / 2
    col = np.where(x >= x_left)[0][-1]
    return row, col




[docs]
def xyz_to_cid(xyz, ds=None, modelgrid=None):
    """Get the icell2d value of a point defined by its x and y coordinates.

    Parameters
    ----------
    xyz : list, tuple
        coordinates of a point.
    ds : xr.Dataset
        model dataset.
    modelgrid : StructuredGrid, VertexGrid, optional
        A flopy grid-object


    Returns
    -------
    cid : tuple
        (layer, cid) for vertex grid, (layer, row, column) for structured grid.
    """
    if modelgrid is None:
        modelgrid = modelgrid_from_ds(ds)

    cid = modelgrid.intersect(x=xyz[0], y=xyz[1], z=xyz[2])

    return cid




[docs]
def get_node_structured(lay, row, col, shape):
    """Get the node number of a structured grid.

    Parameters
    ----------
    lay : int
        layer number.
    row : int
        row number.
    col : int
        column number.
    shape : tuple
        shape of the model grid (nlay, nrow, ncol).

    Returns
    -------
    node : int
        node number.
    """
    return lay * shape[1] * shape[2] + row * shape[2] + col




[docs]
def get_node_vertex(lay, icell2d, shape):
    """Get the node number of a vertex grid.

    Parameters
    ----------
    lay : int
        layer number.
    icell2d : int
        icell2d number
    shape : tuple
        shape of the model grid (nlay, ncpl).

    Returns
    -------
    node : int
        node number
    """
    return lay * shape[1] + icell2d




[docs]
def node_to_lrc(node, shape):
    """Convert a node number to (layer,) row and column.

    Layer is only returned if shape is 3D.

    Parameters
    ----------
    node : int
        node number.
    shape : tuple
        shape of the model grid.

    Returns
    -------
    lrc : tuple
        layer, row and column.
    """
    if len(shape) == 3:
        _, nrow, ncol = shape
        lay = node // (nrow * ncol)
        row = (node - lay * (nrow * ncol)) // ncol
        col = node - lay * (nrow * ncol) - (row * ncol)
        lrc = (lay, row, col)
    elif len(shape) == 2:
        nrow, ncol = shape
        row = node // ncol
        col = node - row * ncol
        lrc = (row, col)
    else:
        raise ValueError("shape should be 2D or 3D")
    return lrc




[docs]
def modelgrid_from_ds(ds, rotated=True, nlay=None, top=None, botm=None, **kwargs):
    """Get flopy modelgrid from ds.

    Parameters
    ----------
    ds : xr.Dataset
        model dataset.

    Returns
    -------
    modelgrid : StructuredGrid, VertexGrid
        grid information.
    """
    if rotated and ("angrot" in ds.attrs) and (ds.attrs["angrot"] != 0.0):
        xoff = ds.attrs["xorigin"]
        yoff = ds.attrs["yorigin"]
        angrot = ds.attrs["angrot"]
    else:
        if is_structured(ds):
            xoff = ds.extent[0]
            yoff = ds.extent[2]
        else:
            xoff = 0.0
            yoff = 0.0
        angrot = 0.0
    if top is None and "top" in ds:
        top = ds["top"].data
    if botm is None and "botm" in ds:
        botm = ds["botm"].data
    if nlay is None:
        if "layer" in ds:
            nlay = len(ds.layer)
        elif botm is not None:
            nlay = len(botm)

    if nlay is not None and botm is not None and nlay < len(botm):
        botm = botm[:nlay]

    kwargs = dict(
        xoff=xoff, yoff=yoff, angrot=angrot, nlay=nlay, top=top, botm=botm, **kwargs
    )
    if is_structured(ds):
        if not isinstance(ds.extent, (tuple, list, np.ndarray)):
            raise TypeError(
                f"extent should be a list, tuple or numpy array, not {type(ds.extent)}"
            )
        modelgrid = StructuredGrid(
            delc=get_delc(ds),
            delr=get_delr(ds),
            **kwargs,
        )
    elif is_vertex(ds):
        vertices = get_vertices_from_ds(ds)
        cell2d = get_cell2d_from_ds(ds)
        modelgrid = VertexGrid(
            vertices=vertices,
            cell2d=cell2d,
            **kwargs,
        )
    return modelgrid




[docs]
def modelgrid_to_vertex_ds(mg, ds, nodata=-1):
    """Add information about the calculation-grid to a model dataset."""
    warnings.warn(
        "'modelgrid_to_vertex_ds' is deprecated and will be removed in a"
        "future version, please use 'modelgrid_to_ds' instead",
        DeprecationWarning,
    )

    # add modelgrid to ds
    ds["xv"] = ("iv", mg.verts[:, 0])
    ds["yv"] = ("iv", mg.verts[:, 1])

    cell2d = mg.cell2d
    ncvert_max = np.max([x[3] for x in cell2d])
    icvert = np.full((mg.ncpl, ncvert_max), nodata)
    for i in range(mg.ncpl):
        icvert[i, : cell2d[i][3]] = cell2d[i][4:]
    ds["icvert"] = ("icell2d", "icv"), icvert
    ds["icvert"].attrs["nodata"] = nodata
    return ds




[docs]
def modelgrid_to_ds(mg=None, grbfile=None):
    """Create Dataset from flopy modelgrid object.

    Parameters
    ----------
    mg : flopy.discretization.Grid
        flopy modelgrid object
    grbfile : str
        path to a binary grid file

    Returns
    -------
    ds : xr.Dataset
        Dataset containing grid information
    """
    if mg is None and grbfile is not None:
        mg = flopy.mf6.utils.MfGrdFile(grbfile).modelgrid
    elif mg is None and grbfile is None:
        raise ValueError("Either 'mg' or 'grbfile' should be specified!")
    if mg.grid_type == "structured":
        x, y = mg.xyedges
        from .base import _get_structured_grid_ds

        ds = _get_structured_grid_ds(
            xedges=x,
            yedges=y,
            nlay=mg.nlay,
            botm=mg.botm,
            top=mg.top,
            xorigin=mg.xoffset,
            yorigin=mg.yoffset,
            angrot=mg.angrot,
            attrs=None,
            crs=None,
        )
    elif mg.grid_type == "vertex":
        from .base import _get_vertex_grid_ds

        ds = _get_vertex_grid_ds(
            x=mg.xcellcenters,
            y=mg.ycellcenters,
            xv=mg.verts[:, 0],
            yv=mg.verts[:, 1],
            cell2d=mg.cell2d,
            extent=mg.extent,
            nlay=mg.nlay,
            angrot=mg.angrot,
            xorigin=np.concatenate(mg.xvertices).min(),
            yorigin=np.concatenate(mg.yvertices).min(),
            botm=mg.botm,
            top=mg.top,
            attrs=None,
            crs=None,
        )
    else:
        raise NotImplementedError(f"Grid type '{mg.grid_type}' not supported!")

    return ds




[docs]
def get_dims_coords_from_modelgrid(mg):
    """Get dimensions and coordinates from modelgrid.

    Used to build new xarray DataArrays with appropriate dimensions and coordinates.

    Parameters
    ----------
    mg : flopy.discretization.Grid
        flopy modelgrid object

    Returns
    -------
    dims : tuple of str
        tuple containing dimensions
    coords : dict
        dictionary containing spatial coordinates derived from modelgrid

    Raises
    ------
    ValueError
        for unsupported grid types
    """
    if mg.grid_type == "structured":
        layers = np.arange(mg.nlay)
        x, y = mg.xycenters  # local coordinates
        if mg.angrot == 0.0:
            x += mg.xoffset  # convert to global coordinates
            y += mg.yoffset  # convert to global coordinates
        coords = {"layer": layers, "y": y, "x": x}
        dims = ("layer", "y", "x")
    elif mg.grid_type == "vertex":
        layers = np.arange(mg.nlay)
        y = mg.ycellcenters
        x = mg.xcellcenters
        coords = {"layer": layers, "y": ("icell2d", y), "x": ("icell2d", x)}
        dims = ("layer", "icell2d")
    elif mg.grid_type == "unstructured":
        coords = {
            "x": ("node", mg.xcellcenters),
            "y": ("node", mg.ycellcenters),
        }
        dims = ("node",)
    else:
        raise ValueError(f"grid type '{mg.grid_type}' not supported.")
    return dims, coords




[docs]
def gridprops_to_vertex_ds(gridprops, ds, nodata=-1):
    """Gridprops is a dictionary containing keyword arguments needed to generate a flopy
    modelgrid instance.
    """
    _, xv, yv = zip(*gridprops["vertices"])
    ds["xv"] = ("iv", np.array(xv))
    ds["yv"] = ("iv", np.array(yv))

    cell2d = gridprops["cell2d"]
    ncvert_max = np.max([x[3] for x in cell2d])
    icvert = np.full((gridprops["ncpl"], ncvert_max), nodata)
    for i in range(gridprops["ncpl"]):
        icvert[i, : cell2d[i][3]] = cell2d[i][4:]
    ds["icvert"] = ("icell2d", "icv"), icvert
    ds["icvert"].attrs["nodata"] = nodata
    return ds




[docs]
def get_vertices_from_ds(ds):
    """Get the vertices-list from a model dataset.

    Flopy needs needs this list to build a disv-package
    """
    vertices = list(zip(ds["iv"].data, ds["xv"].data, ds["yv"].data))
    return vertices




[docs]
def get_cell2d_from_ds(ds):
    """Get the cell2d-list from a model dataset.

    Flopy needs this list to build a disv-package
    """
    icell2d = ds["icell2d"].data
    x = ds["x"].data
    y = ds["y"].data
    icvert = ds["icvert"].data
    if "nodata" in ds["icvert"].attrs:
        nodata = ds["icvert"].attrs["nodata"]
    else:
        nodata = -1
        icvert = icvert.copy()
        icvert[np.isnan(icvert)] = nodata
        icvert = icvert.astype(int)
    cell2d = []
    for i, cid in enumerate(icell2d):
        mask = icvert[i] != nodata
        cell2d.append((cid, x[i], y[i], mask.sum(), *icvert[i, mask]))
    return cell2d




[docs]
def get_cellids_from_xy(x, y, ds=None, modelgrid=None, gi=None):
    """Map points to grid cells.

    Note this is faster and more convenient than GridIntersect, because it
    provides the corresponding cellid for each point (instead of aggregating
    points per cell).

    Parameters
    ----------
    x : np.ndarray
        x-coordinates of points.
    y : np.ndarray
        y-coordinates of points.
    ds : xr.Dataset, optional
        Model dataset containing grid information. Must provide one of ds or modelgrid.
    modelgrid : StructuredGrid or VertexGrid, optional
        Model grid object. Must provide one of ds or modelgrid.
    gi : GridIntersect, optional
        GridIntersect instance, when passed saves some time building grid polygons.

    Returns
    -------
    cellids : pd.Series
        series with mapping between points and cellid of grid cell in
        which points are located.
    """
    if ds is None and modelgrid is None:
        raise ValueError("Either ds or modelgrid must be provided.")
    elif ds is not None:
        modelgrid = modelgrid_from_ds(ds)

    # build geometries and cellids for grid
    if gi is None:
        gi = flopy.utils.GridIntersect(modelgrid)

    # spatial join points with grid and add resulting cellid to obs_ds
    spatial_join = gpd.GeoDataFrame(geometry=gpd.points_from_xy(x, y)).sjoin(
        gpd.GeoDataFrame({"cellid": gi.cellids}, geometry=gi.geoms),
        how="left",
    )
    # deal with edge cases, sort by index and cellid, then pick lowest cellid
    cellids = (
        spatial_join.reset_index()
        .sort_values(["index", "cellid"])
        .groupby("index")
        .first()["cellid"]
    )
    cellids.index.name = "point_id"
    return cellids




[docs]
def refine(
    ds,
    model_ws=None,
    refinement_features=None,
    exe_name=None,
    remove_nan_layers=True,
    model_coordinates=False,
    version_tag=None,
):
    """Refine the grid (discretization by vertices, disv), using Gridgen.

    Parameters
    ----------
    ds : xr.Dataset
        A structured model Dataset.
    model_ws : str, optional
        The working directory for GridGen. Get from ds when model_ws is None.
        The default is None.
    refinement_features : list of tuples of length 2 or 3, optional
        List of tuples containing refinement features. Each tuple must be of
        the form (GeoDataFrame, level) or (geometry, shape_type, level). When
        refinement_features is None, no refinement is added, but the structured model
        Dataset is transformed to a Vertex Dataset. The default is None.
    exe_name : str, optional
        Filepath to the gridgen executable. The file path within nlmod is chose
        if exe_name is None. The default is None.
    remove_nan_layers : bool, optional
        if True layers that are inactive everywhere are removed from the model.
        If False nan layers are kept which might be usefull if you want
        to keep some layers that exist in other models. The default is True.
    model_coordinates : bool, optional
        When model_coordinates is True, the features supplied in refinement_features are
        already in model-coordinates. Only used when a grid is rotated. The default is
        False.
    version_tag : str, default None
        GitHub release ID: for example "18.0" or "latest". If version_tag is provided,
        the most recent installation location of MODFLOW is found in flopy metadata
        that respects `version_tag`. If not found, the executables are downloaded.
        Not compatible with exe_name.

    Returns
    -------
    xr.Dataset
        A Vertex model Dataset
    """
    assert ds.gridtype == "structured", "Can only refine a structured grid"
    logger.info("create vertex grid using gridgen")

    if exe_name is None:
        exe_name = util.get_exe_path(exe_name="gridgen", version_tag=version_tag)
    else:
        exe_name = util.get_exe_path(exe_name=exe_name, version_tag=version_tag)

    if model_ws is None:
        model_ws = os.path.join(ds.model_ws, "gridgen")
        os.makedirs(model_ws, exist_ok=True)

    if version.parse(flopy.__version__) < version.parse("3.3.6"):
        sim = flopy.mf6.MFSimulation()
        gwf = flopy.mf6.MFModel(sim)
        dis = flopy.mf6.ModflowGwfdis(
            gwf,
            nrow=len(ds.y),
            ncol=len(ds.x),
            delr=get_delr(ds),
            delc=get_delc(ds),
            xorigin=ds.extent[0],
            yorigin=ds.extent[2],
        )
        g = Gridgen(dis, model_ws=model_ws, exe_name=exe_name)
    else:
        # create a modelgrid with only one layer, to speed up Gridgen
        modelgrid = modelgrid_from_ds(ds, rotated=False, nlay=1)
        g = Gridgen(modelgrid, model_ws=model_ws, exe_name=exe_name)

    if model_coordinates:
        if not is_rotated(ds):
            msg = "The supplied shapes need to be in realworld coordinates"
            raise ValueError(msg)

    if refinement_features is not None:
        for refinement_feature in refinement_features:
            if len(refinement_feature) == 3:
                # the feature is a file or a list of geometries
                fname, geom_type, level = refinement_feature
                if not model_coordinates and is_rotated(ds):
                    msg = "Converting files to model coordinates not supported"
                    raise NotImplementedError(msg)
                g.add_refinement_features(fname, geom_type, level, layers=[0])
            elif len(refinement_feature) == 2:
                # the feature is a geodataframe
                gdf, level = refinement_feature
                if not model_coordinates and is_rotated(ds):
                    affine = get_affine_world_to_mod(ds)
                    gdf = affine_transform_gdf(gdf, affine)
                geom_types = gdf.geom_type.str.replace("Multi", "")
                geom_types = geom_types.str.replace("String", "")
                geom_types = geom_types.str.lower()
                for geom_type in geom_types.unique():
                    if flopy.__version__ == "3.3.5" and geom_type == "line":
                        # a bug in flopy that is fixed in the dev branch
                        msg = (
                            "geom_type line is buggy in flopy 3.3.5. "
                            "See https://github.com/modflowpy/flopy/issues/1405"
                        )
                        raise ValueError(msg)
                    mask = geom_types == geom_type
                    # features = [gdf[mask].unary_union]
                    features = list(gdf[mask].geometry.explode(index_parts=True))
                    g.add_refinement_features(features, geom_type, level, layers=[0])
    g.build()
    gridprops = g.get_gridprops_disv()
    gridprops["area"] = g.get_area()
    ds = ds_to_gridprops(ds, gridprops=gridprops)
    if remove_nan_layers:
        ds = remove_inactive_layers(ds)
    return ds




[docs]
def ds_to_gridprops(ds_in, gridprops, method="nearest", icvert_nodata=-1):
    """Resample a xarray dataset of a structured grid to a new dataset with a vertex
    grid.

    Returns a dataset with resampled variables and the untouched variables.

    Parameters
    ----------
    ds_in : xr.Dataset
        dataset with dimensions (layer, y, x). y and x are from the original
        structured grid
    gridprops : dictionary
        dictionary with grid properties output from gridgen.  Used as the
        definition of the vertex grid.
    method : str, optional
        type of interpolation used to resample. The default is 'nearest'.
    icvert_nodata : int, optional
        integer to represent nodata-values in cell2d array. Defaults to -1.

    Returns
    -------
    ds_out : xr.Dataset
        dataset with resampled variables and the untouched variables.
    """
    logger.info("resample model Dataset to vertex modelgrid")

    assert isinstance(ds_in, xr.core.dataset.Dataset)

    xyi, _ = get_xyi_icell2d(gridprops)
    x = xr.DataArray(xyi[:, 0], dims=("icell2d",))
    y = xr.DataArray(xyi[:, 1], dims=("icell2d",))

    if method in ["nearest", "linear"]:
        # resample the entire dataset in one line. Leaves not_interp_vars untouched
        ds_out = ds_in.interp(x=x, y=y, method=method, kwargs={"fill_value": None})

    else:
        # apply method to numeric data variables
        interp_vars = []
        not_interp_vars = []
        for key, var in ds_in.items():
            if "x" in var.dims or "y" in var.dims:
                if np.issubdtype(var.dtype, np.number):
                    interp_vars.append(key)
                else:
                    logger.info(
                        f"Data variable {key} has spatial coordinates but it cannot be refined "
                        "because of its non-numeric dtype. It is not available in the output Dataset."
                    )
            else:
                not_interp_vars.append(key)

        ds_out = ds_in[not_interp_vars]
        ds_out.coords.update({"layer": ds_in.layer, "x": x, "y": y})

        # add other variables
        from .resample import structured_da_to_ds

        for not_interp_var in not_interp_vars:
            ds_out[not_interp_var] = structured_da_to_ds(
                da=ds_in[not_interp_var], ds=ds_out, method=method, nodata=np.nan
            )
    if is_rotated(ds_out):
        affine = get_affine_mod_to_world(ds_out)
        ds_out["xc"], ds_out["yc"] = affine * (ds_out.x, ds_out.y)

    if "area" in gridprops:
        if "area" in ds_out:
            ds_out = ds_out.drop_vars("area")

        # only keep the first layer of area
        area = gridprops["area"][: len(ds_out["icell2d"])]
        ds_out["area"] = ("icell2d", area)

    # add information about the vertices
    ds_out = gridprops_to_vertex_ds(gridprops, ds_out, nodata=icvert_nodata)

    # then finally change the gridtype in the attributes
    ds_out.attrs["gridtype"] = "vertex"
    return ds_out




[docs]
def get_xyi_icell2d(gridprops=None, ds=None):
    """Get x and y coordinates of the cell mids from the cellids in the grid properties.

    Parameters
    ----------
    gridprops : dictionary, optional
        dictionary with grid properties output from gridgen. If gridprops is
        None xyi and icell2d will be obtained from ds.
    ds : xr.Dataset
        dataset with model data. Should have dimension (layer, icell2d).

    Returns
    -------
    xyi : numpy.ndarray
        array with x and y coördinates of cell centers, shape(len(icell2d), 2).
    icell2d : numpy.ndarray
        array with cellids, shape(len(icell2d))
    """
    if gridprops is not None:
        xc_gwf = [cell2d[1] for cell2d in gridprops["cell2d"]]
        yc_gwf = [cell2d[2] for cell2d in gridprops["cell2d"]]
        xyi = np.vstack((xc_gwf, yc_gwf)).T
        icell2d = np.array([c[0] for c in gridprops["cell2d"]])
    elif ds is not None:
        xyi = np.array(list(zip(ds.x.values, ds.y.values)))
        icell2d = ds.icell2d.values
    else:
        raise ValueError("either gridprops or ds should be specified")

    return xyi, icell2d




[docs]
def update_ds_from_layer_ds(ds, layer_ds, method="nearest", **kwargs):
    """Add variables from a layer Dataset to a model Dataset. Keep de grid- information
    from the model Dataset (x and y or icell2d), but update the layer dimension when
    neccesary.

    Parameters
    ----------
    ds : xr.Dataset
        dataset with model data. Can have dimension (layer, y, x) or
        (layer, icell2d).
    layer_ds : xr.Dataset
        dataset with layer data.
    method : str
        The method used for resampling layer_ds to the grid of ds.
    **kwargs : keyword arguments
        keyword arguments are passed to the fill_nan_top_botm_kh_kv-method.

    Returns
    -------
    ds : xr.Dataset
        Dataset with variables from layer_ds.
    """
    if not layer_ds.layer.equals(ds.layer):
        # update layers in ds
        drop_vars = []
        for var in ds.data_vars:
            if "layer" in ds[var].dims:
                if var not in layer_ds.data_vars:
                    logger.info(
                        f"Variable {var} is dropped, as it has dimension layer, "
                        "but is not defined in layer_ds"
                    )
                drop_vars.append(var)
        if len(drop_vars) > 0:
            ds = ds.drop_vars(drop_vars)
        ds = ds.assign_coords({"layer": layer_ds.layer})
    if method in ["nearest", "linear"]:
        if is_rotated(ds):
            x = ds.xc
            y = ds.yc
        else:
            x = ds.x
            y = ds.y
        layer_ds = layer_ds.interp(x=x, y=y, method=method, kwargs={"fill_value": None})
        for var in layer_ds.data_vars:
            ds[var] = layer_ds[var]
    else:
        from .resample import structured_da_to_ds

        for var in layer_ds.data_vars:
            ds[var] = structured_da_to_ds(layer_ds[var], ds, method=method)
    from .base import extrapolate_ds

    ds = extrapolate_ds(ds)
    ds = fill_nan_top_botm_kh_kv(ds, **kwargs)
    return ds




[docs]
def col_to_list(col_in, ds, cellids):
    """Convert array data in ds to a list of values for specific cells.

    This function is typically used to create a rec_array with stress period
    data for the modflow packages. Can be used for structured and
    vertex grids.

    Parameters
    ----------
    col_in : xr.DataArray, str, int or float
        if col_in is a str type it is the name of the variable in ds (if it exists).
        if col_in is an int or a float it is a value that will be used for all
        cells in cellids.
    ds : xr.Dataset
        dataset with model data. Can have dimension (layer, y, x) or
        (layer, icell2d).
    cellids : tuple of numpy arrays
        tuple with indices of the cells that will be used to create the list
        with values. There are 3 options:
            1.   cellids contains (layers, rows, columns)
            2.   cellids contains (rows, columns) or (layers, icell2ds)
            3.   cellids contains (icell2ds)

    Raises
    ------
    ValueError
        raised if the cellids are in the wrong format.

    Returns
    -------
    col_lst : list
        raster values from ds presented in a list per cell.
    """
    if isinstance(col_in, str) and col_in in ds:
        col_in = ds[col_in]
    if isinstance(col_in, xr.DataArray):
        if len(cellids) == 3:
            # 3d grid
            col_lst = [
                col_in.data[lay, row, col]
                for lay, row, col in zip(cellids[0], cellids[1], cellids[2])
            ]
        elif len(cellids) == 2:
            # 2d grid or vertex 3d grid
            col_lst = [
                col_in.data[row, col] for row, col in zip(cellids[0], cellids[1])
            ]
        elif len(cellids) == 1:
            # 2d vertex grid
            col_lst = col_in.data[cellids[0]]
        else:
            raise ValueError(f"could not create a column list for col_in={col_in}")
    else:
        col_lst = [col_in] * len(cellids[0])

    return col_lst




[docs]
def lrc_to_reclist(
    layers, rows, columns, cellids, ds, col1=None, col2=None, col3=None, aux=None
):
    """Create a reclist for stress period data from a set of cellids.

    Used for structured grids.


    Parameters
    ----------
    layers : list or numpy.ndarray
        list with the layer for each cell in the reclist.
    rows : list or numpy.ndarray
        list with the rows for each cell in the reclist.
    columns : list or numpy.ndarray
        list with the columns for each cell in the reclist.
    cellids : tuple of numpy arrays
        tuple with indices of the cells that will be used to create the list
        with values.
    ds : xr.Dataset
        dataset with model data. Can have dimension (layer, y, x) or
        (layer, icell2d).
    col1 : str, int or float, optional
        1st column of the reclist, if None the reclist will be a list with
        ((layer,row,column)) for each row.

        col1 should be the following value for each package (can also be the
            name of a timeseries):
            rch: recharge [L/T]
            ghb: head [L]
            drn: drain level [L]
            chd: head [L]

    col2 : str, int or float, optional
        2nd column of the reclist, if None the reclist will be a list with
        ((layer,row,column), col1) for each row.

        col2 should be the following value for each package (can also be the
            name of a timeseries):
            ghb: conductance [L^2/T]
            drn: conductance [L^2/T]

    col3 : str, int or float, optional
        3th column of the reclist, if None the reclist will be a list with
        ((layer,row,column), col1, col2) for each row.

        col3 should be the following value for each package (can also be the
            name of a timeseries):

    aux : str or list of str
        list of auxiliary variables to include in reclist

    Raises
    ------
    ValueError
        Question: will this error ever occur?.

    Returns
    -------
    reclist : list of tuples
        every row consist of ((layer,row,column), col1, col2, col3).
    """
    cols = []

    if col1 is not None:
        cols.append(col_to_list(col1, ds, cellids))
    if col2 is not None and len(cols) == 1:
        cols.append(col_to_list(col2, ds, cellids))
    elif col2 is not None and len(cols) != 1:
        raise ValueError("col2 is set, but col1 is not!")
    if col3 is not None and len(cols) == 2:
        cols.append(col_to_list(col3, ds, cellids))
    elif col3 is not None and len(cols) != 2:
        raise ValueError("col3 is set, but col1 and/or col2 are not!")

    if aux is not None:
        if isinstance(aux, (str, int, float)):
            aux = [aux]

        for i_aux in aux:
            if isinstance(i_aux, str):
                if "layer" in ds[i_aux].dims and len(cellids) != 3:
                    cols.append(col_to_list(i_aux, ds, (np.array(layers),) + cellids))
                else:
                    cols.append(col_to_list(i_aux, ds, cellids))
            else:
                cols.append(col_to_list(i_aux, ds, cellids))

    reclist = list(zip(zip(layers, rows, columns), *cols))
    return reclist




[docs]
def lcid_to_reclist(
    layers,
    cellids,
    ds,
    col1=None,
    col2=None,
    col3=None,
    aux=None,
):
    """Create a reclist for stress period data from a set of cellids.

    Used for vertex grids.


    Parameters
    ----------
    layers : list or numpy.ndarray
        list with the layer for each cell in the reclist.
    cellids : tuple of numpy arrays
        tuple with indices of the cells that will be used to create the list
        with values for a column. There are 2 options:
            1. cellids contains (layers, cids)
            2. cellids contains (cids)
    ds : xr.Dataset
        dataset with model data. Should have dimensions (layer, icell2d).
    col1 : str, int or float, optional
        1st column of the reclist, if None the reclist will be a list with
        ((layer,icell2d)) for each row. col1 should be the following value for
        each package (can also be the name of a timeseries):
        -   rch: recharge [L/T]
        -   ghb: head [L]
        -   drn: drain level [L]
        -   chd: head [L]
        -   riv: stage [L]

    col2 : str, int or float, optional
        2nd column of the reclist, if None the reclist will be a list with
        ((layer,icell2d), col1) for each row. col2 should be the following
        value for each package (can also be the name of a timeseries):
        -   ghb: conductance [L^2/T]
        -   drn: conductance [L^2/T]
        -   riv: conductacnt [L^2/T]

    col3 : str, int or float, optional
        3th column of the reclist, if None the reclist will be a list with
        ((layer,icell2d), col1, col2) for each row. col3 should be the following
        value for each package (can also be the name of a timeseries):
        -   riv: bottom [L]

    aux : str or list of str
        list of auxiliary variables to include in reclist

    Raises
    ------
    ValueError
        Question: will this error ever occur?.

    Returns
    -------
    reclist : list of tuples
        every row consist of ((layer, icell2d), col1, col2, col3)
        grids.
    """
    cols = []

    if col1 is not None:
        cols.append(col_to_list(col1, ds, cellids))
    if col2 is not None and len(cols) == 1:
        cols.append(col_to_list(col2, ds, cellids))
    elif col2 is not None and len(cols) != 1:
        raise ValueError("col2 is set, but col1 is not!")
    if col3 is not None and len(cols) == 2:
        cols.append(col_to_list(col3, ds, cellids))
    elif col3 is not None and len(cols) != 2:
        raise ValueError("col3 is set, but col1 and/or col2 are not!")

    if aux is not None:
        if isinstance(aux, (str, int, float)):
            aux = [aux]

        for i_aux in aux:
            if isinstance(i_aux, str):
                if "layer" in ds[i_aux].dims and len(cellids) != 2:
                    cols.append(col_to_list(i_aux, ds, (np.array(layers),) + cellids))
                else:
                    cols.append(col_to_list(i_aux, ds, cellids))
            else:
                cols.append(col_to_list(i_aux, ds, cellids))

    reclist = list(zip(zip(layers, cellids[-1]), *cols))
    return reclist




[docs]
def cols_to_reclist(ds, cellids, *args, cellid_column=0):
    """Create a reclist for stress period data from a set of cellids.

    Parameters
    ----------
    ds : xr.Dataset
        dataset with model data. Should have dimensions (layer, icell2d).
    cellids : tuple of length 2 or 3
        tuple with indices of the cells that will be used to create the list. For a
        structured grid, cellids represents (layer, row, column). For a vertex grid
        cellid reprsents (layer, icell2d).
    args : xr.DataArray, str, int or float
        the args parameter represents the data to be used as the columns in the reclist.
        See col_to_list of the allowed values.
    cellid_column : int, optional
        Adds the cellid ((layer, row, col) or (layer, icell2d)) to the reclist in this
        column number. Do not add cellid when cellid_column is None. The default is 0.
    """
    cols = [col_to_list(col, ds, cellids) for col in args]
    if cellid_column is not None:
        cols.insert(cellid_column, list(zip(*cellids)))
    return list(zip(*cols))




[docs]
def da_to_reclist(
    ds,
    mask,
    col1=None,
    col2=None,
    col3=None,
    layer=0,
    aux=None,
    first_active_layer=False,
    only_active_cells=True,
):
    """Create a reclist for stress period data from a model dataset.

    Used for vertex grids.


    Parameters
    ----------
    ds : xr.Dataset
        dataset with model data and dimensions (layer, icell2d)
    mask : xr.DataArray for booleans
        True for the cells that will be used in the rec list.
    col1 : str, int or float, optional
        1st column of the reclist, if None the reclist will be a list with
        (cellid,) for each row.

        col1 should be the following value for each package (can also be the
            name of a timeseries):
            rch: recharge [L/T]
            ghb: head [L]
            drn: drain level [L]
            chd: head [L]

    col2 : str, int or float, optional
        2nd column of the reclist, if None the reclist will be a list with
        (cellid, col1) for each row.

        col2 should be the following value for each package (can also be the
            name of a timeseries):
            ghb: conductance [L^2/T]
            drn: conductance [L^2/T]

    col3 : str, int or float, optional
        3th column of the reclist, if None the reclist will be a list with
        (cellid, col1, col2) for each row.

        col3 should be the following value for each package (can also be the
            name of a timeseries):
            riv: bottom [L]
    aux : str or list of str, optional
        list of auxiliary variables to include in reclist
    layer : int, optional
        layer used in the reclist. Not used if layer is in the dimensions of
        mask or if first_active_layer is True. The default is 0
    first_active_layer : bool, optional
        If True an extra mask is applied to use the first active layer of each
        cell in the grid. Not used if layer is in the dimensions of mask. The
        default is False.
    only_active_cells : bool, optional
        If True an extra mask is used to only include cells with an idomain
        of 1. The default is True.

    Returns
    -------
    reclist : list of tuples
        every row consists of ((layer, icell2d), col1, col2, col3).
    """
    if "layer" in mask.dims:
        if only_active_cells:
            idomain = get_idomain(ds)
            cellids = np.where((mask) & (idomain == 1))
            ignore_cells = int(np.sum((mask) & (idomain != 1)))
            if ignore_cells > 0:
                logger.info(
                    f"ignore {ignore_cells} out of {np.sum(mask.values)} cells "
                    "because idomain is inactive"
                )
        else:
            cellids = np.where(mask)

        if "icell2d" in mask.dims:
            layers = cellids[0]
            return lcid_to_reclist(layers, cellids, ds, col1, col2, col3, aux=aux)
        else:
            layers = cellids[0]
            rows = cellids[1]
            columns = cellids[2]
            return lrc_to_reclist(
                layers, rows, columns, cellids, ds, col1, col2, col3, aux=aux
            )
    else:
        if first_active_layer:
            fal = get_first_active_layer(ds)
            cellids = np.where((mask.squeeze()) & (fal != fal.attrs["nodata"]))
            layers = col_to_list(fal, ds, cellids)
        elif only_active_cells:
            idomain = get_idomain(ds)
            cellids = np.where((mask) & (idomain[layer] == 1))
            ignore_cells = int(np.sum((mask) & (idomain[layer] != 1)))
            if ignore_cells > 0:
                logger.info(
                    f"ignore {ignore_cells} out of {np.sum(mask.values)} cells because idomain is inactive"
                )
            layers = col_to_list(layer, ds, cellids)
        else:
            cellids = np.where(mask)
            layers = col_to_list(layer, ds, cellids)

        if "icell2d" in mask.dims:
            return lcid_to_reclist(layers, cellids, ds, col1, col2, col3, aux=aux)
        else:
            rows = cellids[-2]
            columns = cellids[-1]

            return lrc_to_reclist(
                layers, rows, columns, cellids, ds, col1, col2, col3, aux=aux
            )




[docs]
def polygon_to_area(modelgrid, polygon, da, gridtype="structured"):
    """Create a grid with the surface area in each cell based on a polygon value.

    Parameters
    ----------
    modelgrid : flopy.discretization.structuredgrid.StructuredGrid
        grid.
    polygon : shapely.geometry.polygon.Polygon
        polygon feature.
    da : xr.DataArray
        data array that is filled with polygon data

    Returns
    -------
    area_array : xr.DataArray
        area of polygon within each modelgrid cell
    """
    if polygon.geom_type == "Polygon":
        pass
    elif polygon.geom_type == "MultiPolygon":
        warnings.warn(
            "function not tested for MultiPolygon type, can have unexpected results"
        )
    else:
        raise TypeError(
            f'input geometry should by of type "Polygon" not {polygon.geom_type}'
        )

    ix = GridIntersect(modelgrid)
    df = ix.intersect(polygon, geo_dataframe=True)

    if gridtype == "structured":
        area_array = util.get_da_from_da_ds(da, dims=("y", "x"), data=0.0)
        for row, col, area in zip(df["row"], df["col"], df["areas"]):
            area_array[row, col] = area
    elif gridtype == "vertex":
        area_array = util.get_da_from_da_ds(da, dims=("icell2d",), data=0.0)
        area_array[df["cellid"].values] = df["areas"]

    return area_array




[docs]
def gdf_to_data_array_struc(
    gdf, gwf, field="VALUE", agg_method=None, interp_method=None
):
    """Project vector data on a structured grid. Aggregate data if multiple geometries
    are in a single cell.

    Parameters
    ----------
    gdf : geopandas.GeoDataframe
        vector data can only contain a single geometry type.
    gwf : flopy groundwater flow model
        model with a structured grid.
    field : str, optional
        column name in the geodataframe. The default is 'VALUE'.
    interp_method : str or None, optional
        method to obtain values in cells without geometry by interpolating
        between cells with values. Options are 'nearest' and 'linear'.
    agg_method : str, optional
        aggregation method to handle multiple geometries in one cell, options
        are:
        - max, min, mean,
        - length_weighted (lines), max_length (lines),
        - area_weighted (polygon), max_area (polygon).
        The default is 'max'.

    Returns
    -------
    da : xr DataArray
        The DataArray with the projected vector data.
    """
    warnings.warn(
        "The method gdf_to_data_array_struc is deprecated. Please use gdf_to_da instead.",
        DeprecationWarning,
    )

    x = gwf.modelgrid.get_xcellcenters_for_layer(0)[0]
    y = gwf.modelgrid.get_ycellcenters_for_layer(0)[:, 0]
    da = xr.DataArray(np.nan, dims=("y", "x"), coords={"y": y, "x": x})

    # interpolate data
    if interp_method is not None:
        arr = interpolate_gdf_to_array(gdf, gwf, field=field, method=interp_method)
        da.values = arr

        return da

    gdf_cellid = gdf_to_grid(gdf, gwf)

    if gdf_cellid.cellid.duplicated().any():
        # aggregate data
        if agg_method is None:
            raise ValueError(
                "multiple geometries in one cell please define aggregation method"
            )
        gdf_agg = aggregate_vector_per_cell(gdf_cellid, {field: agg_method}, gwf)
    else:
        # aggregation not neccesary
        gdf_agg = gdf_cellid[[field]]
        gdf_agg.set_index(
            pd.MultiIndex.from_tuples(gdf_cellid.cellid.values), inplace=True
        )

    for ind, row in gdf_agg.iterrows():
        da.values[ind[0], ind[1]] = row[field]

    return da




[docs]
def gdf_to_da(
    gdf, ds, column, agg_method=None, fill_value=np.nan, min_total_overlap=0.0, ix=None
):
    """Project vector data on a grid. Aggregate data if multiple geometries are in a
    single cell. Supports structured and vertex grids. This method replaces
    gdf_to_data_array_struc.

    Parameters
    ----------
    gdf : geopandas.GeoDataframe
        vector data can only contain a single geometry type.
    ds : xr.Dataset
        model Datset
    column : str
        column name in the geodataframe.
    agg_method : str, optional
        aggregation method to handle multiple geometries in one cell, options
        are:
        - max, min, mean, sum
        - length_weighted (lines), max_length (lines),
        - area_weighted (polygon), max_area (polygon).
        The default is 'max'.
    fill_value : float or int, optional
        The value to fill in da outside gdf. The default is np.NaN
    min_total_overlap: float, optional
        Only assign cells with a gdf-area larger than min_total_overlap * cell-area. The
        default is 0.0
    ix : GridIntersect, optional
        If not provided it is computed from ds.

    Returns
    -------
    da : xr DataArray
        The DataArray with the projected vector data.
    """
    da = util.get_da_from_da_ds(ds, dims=ds.top.dims, data=fill_value)

    gdf_cellid = gdf_to_grid(gdf, ds, ix=ix)

    if gdf_cellid.size == 0:
        return da

    if min_total_overlap > 0:
        gdf_cellid["area"] = gdf_cellid.area
        area_sum = gdf_cellid[["cellid", "area"]].groupby("cellid").sum()
        min_area = min_total_overlap * ds["area"].data[area_sum.index]
        cellids = area_sum.index[area_sum["area"] > min_area]
        gdf_cellid = gdf_cellid[gdf_cellid["cellid"].isin(cellids)]

    if gdf_cellid.cellid.duplicated().any():
        # aggregate data
        if agg_method is None:
            raise ValueError(
                "multiple geometries in one cell please define aggregation method"
            )
        modelgrid = None
        if agg_method in ["nearest"]:
            modelgrid = modelgrid_from_ds(ds)
        gdf_agg = aggregate_vector_per_cell(
            gdf_cellid, {column: agg_method}, modelgrid=modelgrid
        )
    else:
        # aggregation not neccesary
        gdf_agg = gdf_cellid[[column]]
        if isinstance(gdf_cellid.cellid.iloc[0], tuple):
            gdf_agg.set_index(
                pd.MultiIndex.from_tuples(gdf_cellid.cellid.values), inplace=True
            )
        else:
            gdf_agg.set_index(gdf_cellid.cellid.values, inplace=True)

    if ds.gridtype == "structured":
        ixs, iys = zip(*gdf_agg.index.values)
        da.values[ixs, iys] = gdf_agg[column]
    elif ds.gridtype == "vertex":
        da[gdf_agg.index] = gdf_agg[column]

    return da




[docs]
def interpolate_gdf_to_array(gdf, gwf, field="values", method="nearest"):
    """Interpolate data from a point gdf.

    Parameters
    ----------
    gdf : geopandas.GeoDataframe
        vector data can only contain a single geometry type.
    gwf : flopy groundwater flow model
        model with a structured grid.
    field : str, optional
        column name in the geodataframe. The default is 'values'.
    method : str or None, optional
        method to obtain values in cells without geometry by interpolating
        between cells with values. Options are 'nearest' and 'linear'.

    Returns
    -------
    arr : np.array
        numpy array with interpolated data.
    """
    # check geometry
    geom_types = gdf.geometry.type.unique()
    if geom_types[0] != "Point":
        raise NotImplementedError("can only use interpolation with point geometries")

    # check field
    if field not in gdf.columns:
        raise ValueError(f"Missing column in DataFrame: {field}")

    points = np.array([[g.x, g.y] for g in gdf.geometry])
    values = gdf[field].values
    xi = np.vstack(
        (
            gwf.modelgrid.xcellcenters.flatten(),
            gwf.modelgrid.ycellcenters.flatten(),
        )
    ).T
    vals = griddata(points, values, xi, method=method)
    arr = np.reshape(vals, (gwf.modelgrid.nrow, gwf.modelgrid.ncol))

    return arr




[docs]
def _agg_max_area(gdf, col):
    return gdf.loc[gdf.area.idxmax(), col]




[docs]
def _agg_area_weighted(gdf, col):
    aw = (gdf.area * gdf[col]).sum(skipna=True) / gdf.area.sum(skipna=True)
    return aw




[docs]
def _agg_max_length(gdf, col):
    return gdf.loc[gdf.length.idxmax(), col]




[docs]
def _agg_length_weighted(gdf, col):
    nanmask = gdf[col].isna()
    aw = (gdf.length * gdf[col]).sum(skipna=True) / gdf.loc[~nanmask].length.sum()
    return aw




[docs]
def _agg_nearest(gdf, col, modelgrid):
    if modelgrid.grid_type == "structured":
        cid = gdf["cellid"].values[0]
        cellcenter = Point(
            modelgrid.xcellcenters[0, cid[1]], modelgrid.ycellcenters[cid[0], 0]
        )
        val = gdf.iloc[gdf.distance(cellcenter).argmin()].loc[col]
    elif modelgrid.grid_type == "vertex":
        cid = gdf["cellid"].values[0]
        cellcenter = Point(modelgrid.xcellcenters[cid], modelgrid.ycellcenters[cid])
        val = gdf.iloc[gdf.distance(cellcenter).argmin()].loc[col]

    return val




[docs]
def _get_aggregates_values(group, fields_methods, modelgrid=None):
    agg_dic = {}
    for field, method in fields_methods.items():
        # aggregation is only necesary if group shape is greater than 1
        if (group.shape[0] == 1) or (method == "first"):
            agg_dic[field] = group[field].values[0]
        elif method == "max":
            agg_dic[field] = group[field].max()
        elif method == "min":
            agg_dic[field] = group[field].min()
        elif method == "mean":
            agg_dic[field] = group[field].mean()
        elif method == "sum":
            agg_dic[field] = group[field].sum()
        elif method == "nearest":
            agg_dic[field] = _agg_nearest(group, field, modelgrid)
        elif method == "length_weighted":  # only for lines
            agg_dic[field] = _agg_length_weighted(group, field)
        elif method == "max_length":  # only for lines
            agg_dic[field] = _agg_max_length(group, field)
        elif method == "area_weighted":  # only for polygons
            agg_dic[field] = _agg_area_weighted(group, field)
        elif method == "max_area":  # only for polygons
            agg_dic[field] = _agg_max_area(group, field)
        elif method == "center_grid":  # only for polygons
            raise NotImplementedError
        else:
            raise ValueError(f"Method '{method}' not recognized!")

    return agg_dic




[docs]
def aggregate_vector_per_cell(gdf, fields_methods, modelgrid=None):
    """Aggregate vector features per cell.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        GeoDataFrame containing points, lines or polygons per grid cell.
    fields_methods: dict
        fields (keys) in the Geodataframe with their aggregation method (items)
        aggregation methods can be:
        max, min, mean, sum, length_weighted (lines), max_length (lines),
        area_weighted (polygon), max_area (polygon), and functions supported by
        pandas.*GroupBy.agg().
    modelgrid : flopy Groundwater flow modelgrid
        only necesary if one of the field methods is 'nearest'

    Returns
    -------
    celldata : pd.DataFrame
        DataFrame with aggregated surface water parameters per grid cell
    """
    # check geometry types
    geom_types = gdf.geometry.type.unique()
    if len(geom_types) > 1:
        if len(geom_types) == 2 and (
            set(geom_types) == set(["LineString", "MultiLineString"])
            or set(geom_types) == set(["Polygon", "MultiPolygon"])
        ):
            pass
        else:
            raise TypeError("cannot aggregate geometries of different types")
    if bool({"length_weighted", "max_length"} & set(fields_methods.values())):
        if ("LineString" in geom_types) or ("MultiLineString" in geom_types):
            pass
        else:
            raise TypeError("can only use length methods with line geometries")
    if bool({"area_weighted", "max_area"} & set(fields_methods.values())):
        if ("Polygon" in geom_types) or ("MultiPolygon" in geom_types):
            pass
        else:
            raise TypeError("can only use area methods with polygon geometries")

    # check fields
    missing_cols = set(fields_methods.keys()).difference(gdf.columns)
    if len(missing_cols) > 0:
        raise ValueError(f"Missing columns in DataFrame: {missing_cols}")

    # aggregate data
    gr = gdf.groupby(by="cellid")
    celldata = pd.DataFrame(index=gr.groups.keys())

    for field, method in fields_methods.items():
        if method == "area_weighted":
            gdf_copy = gdf.copy()
            gdf_copy["area_times_field"] = gdf_copy["area"] * gdf_copy[field]

            # skipna is not implemented by groupby therefore we use min_count=1
            celldata[field] = gdf_copy.groupby(by="cellid")["area_times_field"].sum(
                min_count=1
            ) / gdf_copy.groupby(by="cellid")["area"].sum(min_count=1)

        elif method in (
            "nearest",
            "length_weighted",
            "max_length",
            "max_area",
            "center_grid",
        ):
            for cid, group in tqdm(gr, desc="Aggregate vector data"):
                agg_dic = _get_aggregates_values(group, {field: method}, modelgrid)

                for key, item in agg_dic.items():
                    celldata.loc[cid, key] = item

        else:
            celldata[field] = gr[field].agg(method)

    return celldata




[docs]
def gdf_to_bool_da(
    gdf,
    ds,
    ix=None,
    buffer=0.0,
    contains_centroid=False,
    min_area_fraction=None,
    **kwargs,
):
    """Return True if grid cell is partly in polygons, False otherwise.

    This function returns True for grid cells that are partly in the polygons. If
    contains_centroid is True, only cells are returned where the centroid is in the
    polygon. If min_area_fraction is set, only cells are returned where the area of the
    intersection is larger than min_area_fraction * cell_area.

    ix can be provided to speed up the process. If not provided it is computed from ds.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame or shapely.geometry
        shapes that will be rasterised.
    ds : xr.Dataset
        Dataset with model data.
    ix : flopy.utils.GridIntersect, optional
        If not provided it is computed from ds. Speeds up the the function
    buffer : float, optional
        The distance to buffer around the geometries in meters. A positive distance
        produces a dilation, a negative distance an erosion. The default is 0.
    contains_centroid :  bool, optional
        if True, only store intersection result if cell centroid is
        contained within intersection shape, only used if shape type is
        "polygon"
    min_area_fraction : float, optional
        float defining minimum intersection area threshold, if intersection
        area is smaller than min_frac_area * cell_area, do not store
        intersection result.
    **kwargs : keyword arguments
        keyword arguments are passed to the intersect-method.

    Returns
    -------
    da : xr.DataArray
        True if polygon is in cell, False otherwise.
    """
    if ds.gridtype == "structured":
        da = util.get_da_from_da_ds(ds, dims=("y", "x"), data=False)
    elif ds.gridtype == "vertex":
        da = util.get_da_from_da_ds(ds, dims=("icell2d",), data=False)
    else:
        msg = "gdf_to_bool_da() only support structured or vertex gridtypes"
        raise ValueError(msg)

    if isinstance(gdf, gpd.GeoDataFrame):
        if len(gdf) == 0:
            return da
        elif len(gdf) == 1:
            multipolygon = gdf.geometry.values[0]
        else:
            multipolygon = unary_union(gdf.geometry)
    elif isinstance(gdf, shapely.geometry.base.BaseGeometry):
        multipolygon = gdf
    else:
        msg = "gdf_to_bool_da() only support GeoDataFrame or shapely"
        raise TypeError(msg)

    if buffer != 0.0:
        multipolygon = multipolygon.buffer(buffer)

    # Rotate the polygon instead of the modelgrid
    if ix is None:
        modelgrid = modelgrid_from_ds(ds, rotated=False)
        ix = GridIntersect(modelgrid)

    grid_rotation = ds.attrs.get("angrot", 0.0)
    ix_rotation = ix.mfgrid.angrot

    if grid_rotation != 0.0 and ix_rotation == 0.0:
        affine = get_affine_world_to_mod(ds).to_shapely()
        multipolygon = affine_transform(multipolygon, affine)

    if kwargs or contains_centroid or min_area_fraction is not None:
        df = ix.intersect(
            multipolygon,
            contains_centroid=contains_centroid,
            min_area_fraction=min_area_fraction,
            geo_dataframe=True,
            **kwargs,
        )
    else:
        df = ix.intersects(multipolygon, dataframe=True)

    if df.shape[0] > 0 and ds.gridtype == "structured":
        for _, row in df.iterrows():
            da[row["row"], row["col"]] = True
    elif df.shape[0] > 0 and ds.gridtype == "vertex":
        da[df["cellid"].values] = True

    return da




[docs]
def gdf_to_bool_ds(
    gdf,
    ds,
    da_name,
    keep_coords=None,
    ix=None,
    buffer=0.0,
    contains_centroid=False,
    min_area_fraction=None,
    **kwargs,
):
    """Return True if grid cell is partly in polygons, False otherwise.

    This function returns True for grid cells that are partly in the polygons. If
    contains_centroid is True, only cells are returned where the centroid is in the
    polygon. If min_area_fraction is set, only cells are returned where the area of the
    intersection is larger than min_area_fraction * cell_area.

    ix can be provided to speed up the process. If not provided it is computed from ds.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame or shapely.geometry
        shapes that will be rasterised.
    ds : xr.Dataset
        Dataset with model data.
    da_name : str
        The name of the variable with boolean data in the ds_out
    keep_coords : tuple or None, optional
        the coordinates in ds the you want keep in your empty ds. If None all
        coordinates are kept from original ds. The default is None.
    ix : flopy.utils.GridIntersect, optional
        If not provided it is computed from ds. Speeds up the the function
    buffer : float, optional
        The distance to buffer around the geometries in meters. A positive distance
        produces a dilation, a negative distance an erosion. The default is 0.
    contains_centroid :  bool, optional
        if True, only store intersection result if cell centroid is
        contained within intersection shape, only used if shape type is
        "polygon"
    min_area_fraction : float, optional
        float defining minimum intersection area threshold, if intersection
        area is smaller than min_frac_area * cell_area, do not store
        intersection result.
    **kwargs : keyword arguments
        keyword arguments are passed to the intersect_*-methods.

    Returns
    -------
    ds_out : xr.Dataset
        True if polygon is in cell, False otherwise.
    """
    ds_out = util.get_ds_empty(ds, keep_coords=keep_coords)
    ds_out[da_name] = gdf_to_bool_da(
        gdf,
        ds,
        ix=ix,
        buffer=buffer,
        contains_centroid=contains_centroid,
        min_area_fraction=min_area_fraction,
        **kwargs,
    )

    return ds_out




[docs]
def gdf_to_count_da(gdf, ds, ix=None, buffer=0.0, **kwargs):
    """Counts in how many polygons a coordinate of ds appears.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame or shapely.geometry
        shapes that will be rasterised.
    ds : xr.Dataset
        Dataset with model data.
    ix : flopy.utils.GridIntersect, optional
        If not provided it is computed from ds.
    buffer : float, optional
        buffer around the geometries. The default is 0.
    **kwargs : keyword arguments
        keyword arguments are passed to the intersect_*-methods.

    Returns
    -------
    da : xr.DataArray
        1 if polygon is in cell, 0 otherwise. Grid dimensions according to ds.
    """
    if is_rotated(ds):
        # transform gdf into model coordinates
        affine = get_affine_world_to_mod(ds)
        gdf = affine_transform_gdf(gdf, affine)

    # build list of gridcells
    if ix is None:
        modelgrid = modelgrid_from_ds(ds, rotated=False)
        ix = GridIntersect(modelgrid)

    if ds.gridtype == "structured":
        da = util.get_da_from_da_ds(ds, dims=("y", "x"), data=0)
    elif ds.gridtype == "vertex":
        da = util.get_da_from_da_ds(ds, dims=("icell2d",), data=0)
    else:
        raise ValueError("function only support structured or vertex gridtypes")

    if isinstance(gdf, gpd.GeoDataFrame):
        geoms = gdf.geometry.values
    elif isinstance(gdf, shapely.geometry.base.BaseGeometry):
        geoms = [gdf]

    for geom in geoms:
        if buffer > 0.0:
            df = ix.intersects(geom.buffer(buffer), dataframe=True, **kwargs)
        else:
            df = ix.intersects(geom, dataframe=True, **kwargs)

        if len(df) == 0:
            continue

        if ds.gridtype == "structured":
            da.values[df["row"].values, df["col"].values] += 1
        elif ds.gridtype == "vertex":
            da[df["cellid"].values] += 1

    return da




[docs]
def gdf_to_count_ds(gdf, ds, da_name, keep_coords=None, ix=None, buffer=0.0, **kwargs):
    """Counts in how many polygons a coordinate of ds appears.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        polygon shapes with surface water.
    ds : xr.Dataset
        Dataset with model data.
    da_name : str
        The name of the variable with boolean data in the ds_out
    keep_coords : tuple or None, optional
        the coordinates in ds the you want keep in your empty ds. If None all
        coordinates are kept from original ds. The default is None.
    ix : flopy.utils.GridIntersect, optional
        If not provided it is computed from ds.
    buffer : float, optional
        buffer around the geometries. The default is 0.
    **kwargs : keyword arguments
        keyword arguments are passed to the intersect_*-methods.

    Returns
    -------
    ds_out : xr.Dataset
        Dataset with a single DataArray, this DataArray is 1 if polygon is in
        cell, 0 otherwise. Grid dimensions according to ds and mfgrid.
    """
    ds_out = util.get_ds_empty(ds, keep_coords=keep_coords)
    ds_out[da_name] = gdf_to_count_da(gdf, ds, ix=ix, buffer=buffer, **kwargs)

    return ds_out




[docs]
@cache.cache_pickle
def gdf_to_grid(
    gdf,
    ml=None,
    ix=None,
    desc="Intersecting with grid",
    silent=False,
    **kwargs,
):
    """Intersect a geodataframe with the grid of a MODFLOW model.

    Note: This method is a wrapper around the GridIntersect method in flopy.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        A GeoDataFrame that needs to be cut by the grid. The GeoDataFrame can consist of
        multiple types (Point, LineString, Polygon and the Multi-variants).
    ml : flopy.modflow.Modflow or flopy.mf6.ModflowGwf or xr.Dataset, optional
        The flopy model or xarray dataset that defines the grid. When a Dataset is
        supplied, and the grid is rotated, the geodataframe is transformed in model
        coordinates. The default is None.
    ix : flopy.utils.GridIntersect, optional
        GridIntersect, if not provided the modelgrid in ml is used.
    desc : string, optional
        The description of the progressbar. The default is 'Intersecting with grid'.
    silent : bool, optional
        Do not show a progressbar when silent is True. The default is False.
    **kwargs : keyword arguments
        keyword arguments are passed to the intersect_*-methods.

    Returns
    -------
    gdfg : geopandas.GeoDataFrame
        The GeoDataFrame with the geometries per grid-cell.
    """
    if ml is None and ix is None:
        raise (ValueError("Either specify ml or ix"))

    if gdf.index.has_duplicates or gdf.columns.has_duplicates:
        raise ValueError("gdf should not have duplicate columns or index.")

    if ml is not None:
        if isinstance(ml, xr.Dataset):
            ds = ml
            modelgrid = modelgrid_from_ds(ds, rotated=False)
            if is_rotated(ds):
                # transform gdf into model coordinates
                affine = get_affine_world_to_mod(ds)
                gdf = affine_transform_gdf(gdf, affine)
        else:
            modelgrid = ml.modelgrid
            if modelgrid.angrot != 0:
                raise NotImplementedError(
                    "please use a model dataset instead of a model"
                )

    if ix is None:
        ix = flopy.utils.GridIntersect(modelgrid)
    shps = []
    geometry = gdf.geometry.name
    for _, shp in tqdm(gdf.iterrows(), total=gdf.shape[0], desc=desc, disable=silent):
        df = ix.intersect(shp[geometry], geo_dataframe=True, **kwargs)
        for _, row in df.iterrows():
            shpn = shp.copy()
            shpn["cellid"] = row["cellids"]
            shpn[geometry] = row["geometry"]
            if shp[geometry].geom_type == ["LineString", "MultiLineString"]:
                shpn["length"] = row["lengths"]
            elif shp[geometry].geom_type in ["Polygon", "MultiPolygon"]:
                shpn["area"] = row["areas"]
            shps.append(shpn)
    if len(shps) == 0:
        # Unable to determine the column names, because no geometries intersect with the grid
        logger.info("No geometries intersect with the grid")
        columns = gdf.columns.to_list() + ["area", "length", "cellid"]
    else:
        columns = None  # adopt from shps

    gdfg = gpd.GeoDataFrame(shps, columns=columns, geometry=geometry, crs=gdf.crs)
    gdfg.index.name = gdf.index.name
    return gdfg




[docs]
def gdf_area_to_da(
    gdf,
    ds,
    ix=None,
    index_name=None,
    desc="Intersecting with grid",
    silent=False,
    sparse=True,
    **kwargs,
):
    """
    Calculate the area of overlap between polygons in a GeoDataFrame and a model grid.

    This function computes the area of each geometry in `gdf` that falls within
    each model grid cell defined in the xarray `ds` Dataset. The result is returned
    as an xarray DataArray, with dimensions based on the model grid and the index
    of the input GeoDataFrame.

    Parameters
    ----------
    gdf : geopandas.GeoDataFrame
        GeoDataFrame containing polygon geometries to be intersected with the model
        grid.
    ds : xr.Dataset
        The xarray dataset that defines the grid. When the grid is rotated, the
        geodataframe is transformed in model coordinates.
    ix : flopy.utils.GridIntersect, optional
        GridIntersect, if not provided the modelgrid is determined from ds. The default
        is None.
    index_name : str, optional
        Name to use for the index dimension of the resulting DataArray. If None,
        uses the name of `gdf.index`, or defaults to 'index'.
    desc : string, optional
        The description of the progressbar. The default is 'Intersecting with grid'.
    silent : bool, optional
        Do not show a progressbar when silent is True. The default is False.
    sparse : bool, default True
        If True, attempts to return a sparse array. Falls back to dense if `sparse`
        is not available.
    **kwargs : dict
        Additional keyword arguments passed to `ix.intersect`.

    Returns
    -------
    xarray.DataArray
        A DataArray with shape (y, x, index) for structured grids or (icell2d, index)
        for unstructured grids. Each value represents the area of a given geometry
        within a specific grid cell.

    Notes
    -----
    - Uses `flopy.utils.GridIntersect` under the hood for spatial intersection.
    - For rotated grids, geometries are transformed into model space using an affine transformation.
    - If the `sparse` package is installed and `sparse=True`, a `sparse.COO` array is returned.
    - Suitable for use in spatial weighting or disaggregation tasks.

    Raises
    ------
    ImportWarning
        If `sparse=True` but the `sparse` library is not installed, a warning is issued
        and a dense array is used instead.

    """
    modelgrid = modelgrid_from_ds(ds, rotated=False)
    if is_rotated(ds):
        # transform gdf into model coordinates
        affine = get_affine_world_to_mod(ds)
        gdf = affine_transform_gdf(gdf, affine)
    if ix is None:
        ix = flopy.utils.GridIntersect(modelgrid)

    if index_name is None:
        index_name = gdf.index.name
        if index_name is None:
            index_name = "index"
    structured = is_structured(ds)
    if structured:
        dims = ["y", "x", index_name]
    else:
        dims = ["icell2d", index_name]
    coords = {key: ds.coords[key] for key in dims[:-1]}
    coords[index_name] = gdf.index
    shape = [len(coords[dim]) for dim in dims]

    geometry = gdf.geometry.name
    data = np.zeros(shape)
    for irow, index in tqdm(
        enumerate(gdf.index), total=gdf.shape[0], desc=desc, disable=silent
    ):
        df = ix.intersect(gdf.at[index, geometry], geo_dataframe=True, **kwargs)
        if structured:
            for row, col, area in zip(df["row"], df["col"], df["areas"]):
                data[(row, col, irow)] += area
        else:
            data[list(df["cellid"]), irow] = df["areas"]

    if sparse:
        try:
            from sparse import COO

            data = COO.from_numpy(data)
        except ImportError:
            logger.warning("sparse not installed. Using a normal numpy array.")
    area = xr.DataArray(data, dims=dims, coords=coords)
    return area




[docs]
def get_thickness_from_topbot(top, bot):
    """Get thickness from data arrays with top and bots.

    Parameters
    ----------
    top : xr.DataArray
        raster with top of each cell. dimensions should be (y,x) or (icell2d).
    bot : xr.DataArray
        raster with bottom of each cell. dimensions should be (layer, y,x) or
        (layer, icell2d).

    Returns
    -------
    thickness : xr.DataArray
        raster with thickness of each cell. dimensions should be (layer, y,x)
        or (layer, icell2d).
    """
    warnings.warn(
        "The method get_thickness_from_topbot is deprecated. Please use nlmod.layers.calculate_thickness instead.",
        DeprecationWarning,
    )

    if np.ndim(top) > 2:
        raise NotImplementedError("function works only for 2d top")

    # get thickness
    if bot.ndim == 3:
        thickness = util.get_da_from_da_ds(bot, dims=("layer", "y", "x"))
    elif bot.ndim == 2:
        thickness = util.get_da_from_da_ds(bot, dims=("layer", "icell2d"))
    else:
        raise ValueError("function only support structured or vertex gridtypes")

    for lay, botlay in enumerate(bot):
        if lay == 0:
            thickness[lay] = top - botlay
        else:
            thickness[lay] = bot[lay - 1] - botlay

    return thickness




[docs]
def get_vertices_arr(ds, modelgrid=None, vert_per_cid=4, epsilon=0, rotated=False):
    """Get vertices of a vertex modelgrid from a ds or the modelgrid. Only return the 4
    corners of each cell and not the corners of adjacent cells thus limiting the
    vertices per cell to 4 points.

    This method uses the xvertices and yvertices attributes of the modelgrid.
    When no modelgrid is supplied, a modelgrid-object is created from ds.

    Parameters
    ----------
    ds : xr.Dataset
        model dataset, attribute grid_type should be 'vertex'
    modelgrid : flopy.discretization.vertexgrid.VertexGrid
        vertex grid with attributes xvertices and yvertices.
    vert_per_cid : int or None:
        number of vertices per cell:
        - 4 return the 4 vertices of each cell
        - 5 return the 4 vertices of each cell + one duplicate vertex
        (sometimes useful if you want to create polygons)
        - anything else, the maximum number of vertices. For locally refined
        cells this includes all the vertices adjacent to the cell.

        if vert_per_cid is 4 or 5 vertices are removed using the
        Ramer-Douglas-Peucker Algorithm -> https://github.com/fhirschmann/rdp.
    epsilon : int or float, optional
        epsilon in the rdp algorithm. I (Onno) think this is: the maximum
        distance between a line and a point for which the point is considered
        to be on the line. The default is 0.

    Returns
    -------
    vertices_arr : numpy array
         Vertex coördinates per cell with dimensions(cid, no_vert, 2).
    """
    warnings.warn(
        "this function is deprecated and will eventually be removed, "
        "please use 'modelgrid_from_ds' and 'modelgrid.map_polygons' in the future.",
        DeprecationWarning,
    )
    if modelgrid is None:
        modelgrid = modelgrid_from_ds(ds, rotated=rotated)
    xvert = modelgrid.xvertices
    yvert = modelgrid.yvertices
    if vert_per_cid == 4:
        coord_list = []
        for xv, yv in zip(xvert, yvert):
            coords = rdp(list(zip(xv, yv)), epsilon=epsilon)[:-1]
            if len(coords) > 4:
                raise RuntimeError(
                    "unexpected number of coördinates, you probably want to change epsilon"
                )
            coord_list.append(coords)
        vertices_arr = np.array(coord_list)
    elif vert_per_cid == 5:
        coord_list = []
        for xv, yv in zip(xvert, yvert):
            coords = rdp(list(zip(xv, yv)), epsilon=epsilon)
            if len(coords) > 5:
                raise RuntimeError(
                    "unexpected number of coördinates, you probably want to change epsilon"
                )
            coord_list.append(coords)
        vertices_arr = np.array(coord_list)
    else:
        raise NotImplementedError()

    return vertices_arr




[docs]
def get_vertices(ds, vert_per_cid=4, epsilon=0, rotated=False):
    """Get vertices of a vertex modelgrid from a ds or the modelgrid. Only return the 4
    corners of each cell and not the corners of adjacent cells thus limiting the
    vertices per cell to 4 points.

    This method uses the xvertices and yvertices attributes of the modelgrid.
    When no modelgrid is supplied, a modelgrid-object is created from ds.

    Parameters
    ----------
    ds : xr.Dataset
        model dataset, attribute grid_type should be 'vertex'
    modelgrid : flopy.discretization.vertexgrid.VertexGrid
        vertex grid with attributes xvertices and yvertices.
    vert_per_cid : int or None:
        number of vertices per cell:
        - 4 return the 4 vertices of each cell
        - 5 return the 4 vertices of each cell + one duplicate vertex
        (sometimes useful if you want to create polygons)
        - anything else, the maximum number of vertices. For locally refined
        cells this includes all the vertices adjacent to the cell.

        if vert_per_cid is 4 or 5 vertices are removed using the
        Ramer-Douglas-Peucker Algorithm -> https://github.com/fhirschmann/rdp.
    epsilon : int or float, optional
        epsilon in the rdp algorithm. I (Onno) think this is: the maximum
        distance between a line and a point for which the point is considered
        to be on the line. The default is 0.

    Returns
    -------
    vertices_da : xr.DataArray
         Vertex coördinates per cell with dimensions(cid, no_vert, 2).
    """
    warnings.warn(
        "get_vertices is deprecated and will eventually be removed, "
        "please use 'modelgrid_from_ds' and 'modelgrid.map_polygons'.",
        DeprecationWarning,
    )

    vertices_arr = get_vertices_arr(
        ds,
        vert_per_cid=vert_per_cid,
        epsilon=epsilon,
        rotated=rotated,
    )

    vertices_da = xr.DataArray(
        vertices_arr,
        dims=("icell2d", "vert_per_cid", "xy"),
        coords={"xy": ["x", "y"]},
    )

    return vertices_da




[docs]
@cache.cache_netcdf(coords_2d=True)
def mask_model_edge(ds, idomain=None):
    """Get data array which is 1 for every active cell (defined by idomain) at the
    boundaries of the model (xmin, xmax, ymin, ymax). Other cells are 0.

    Parameters
    ----------
    ds : xr.Dataset
        dataset with model data.
    idomain : xr.DataArray, optional
        idomain used to get active cells and shape of DataArray. Calculate from ds when
        None. The default is None.

    Returns
    -------
    ds_out : xr.Dataset
        dataset with edge mask array
    """
    ds = ds.copy()  # avoid side effects

    # add constant head cells at model boundaries
    if is_rotated(ds):
        raise NotImplementedError("model edge not yet calculated for rotated grids")

    # get mask with grid edges
    xmin = ds["x"] == ds["x"].min()
    xmax = ds["x"] == ds["x"].max()
    ymin = ds["y"] == ds["y"].min()
    ymax = ds["y"] == ds["y"].max()

    ds_out = util.get_ds_empty(ds)

    if ds.gridtype == "structured":
        mask2d = ymin | ymax | xmin | xmax

        # assign 1 to cells that are on the edge and have an active idomain
        if idomain is None:
            idomain = get_idomain(ds)
        ds_out["edge_mask"] = xr.zeros_like(idomain)
        for lay in ds.layer:
            ds_out["edge_mask"].loc[lay] = np.where(
                mask2d & (idomain.loc[lay] == 1), 1, 0
            )

    elif ds.gridtype == "vertex":
        polygons_grid = polygons_from_ds(ds)
        gdf_grid = gpd.GeoDataFrame(geometry=polygons_grid)
        extent_edge = gdf_grid.union_all().exterior
        cids_edge = gdf_grid.loc[gdf_grid.touches(extent_edge)].index
        ds_out["edge_mask"] = util.get_da_from_da_ds(
            ds, dims=("layer", "icell2d"), data=0
        )

        for lay in ds.layer:
            ds_out["edge_mask"].loc[lay, cids_edge] = 1

    return ds_out




[docs]
def polygons_from_ds(ds):
    """Create polygons of each cell in a model dataset.

    Parameters
    ----------
    ds : xr.Dataset
        Dataset with model data.

    Raises
    ------
    ValueError
        for wrong gridtype or inconsistent grid definition.

    Returns
    -------
    polygons : list of shapely Polygons
        list with polygon of each raster cell.
    """
    if ds.gridtype == "structured":
        delr = get_delr(ds)
        delc = get_delc(ds)

        xmins = ds.x - (delr * 0.5)
        xmaxs = ds.x + (delr * 0.5)
        ymins = ds.y - (delc * 0.5)
        ymaxs = ds.y + (delc * 0.5)
        polygons = [
            Polygon(
                [
                    (xmins[i], ymins[j]),
                    (xmins[i], ymaxs[j]),
                    (xmaxs[i], ymaxs[j]),
                    (xmaxs[i], ymins[j]),
                ]
            )
            for i in range(len(xmins))
            for j in range(len(ymins))
        ]

    elif ds.gridtype == "vertex":
        modelgrid = modelgrid_from_ds(ds)
        polygons = [Polygon(v.vertices) for v in modelgrid.map_polygons]
    else:
        raise ValueError(
            f"gridtype must be 'structured' or 'vertex', not {ds.gridtype}"
        )
    if is_rotated(ds):
        # rotate the model coordinates to real coordinates
        affine = get_affine_mod_to_world(ds).to_shapely()
        polygons = [affine_transform(polygon, affine) for polygon in polygons]

    return polygons




[docs]
def _get_attrs(ds):
    if isinstance(ds, dict):
        return ds
    else:
        return ds.attrs




[docs]
def get_extent_polygon(ds, rotated=True):
    """Get the model extent, as a shapely Polygon."""
    attrs = _get_attrs(ds)
    polygon = util.extent_to_polygon(attrs["extent"])
    if rotated and "angrot" in ds.attrs and attrs["angrot"] != 0.0:
        affine = get_affine_mod_to_world(ds)
        polygon = affine_transform(polygon, affine.to_shapely())
    return polygon




[docs]
def get_extent_gdf(ds, rotated=True, crs="EPSG:28992"):
    """Get the model extent as a GeoDataFrame with a polygon.

    Parameters
    ----------
    ds : xr.Dataset
        model dataset.
    rotated : bool, optional
        if True, the extent is corrected for angrot. The default is True.
    crs : str, optional
        Coordinate reference system. The default is "EPSG:28992".

    Returns
    -------
    gdf : geopandas.GeoDataFrame
        GeoDataFrame containing the model extent as a polygon geometry.
    """
    polygon = get_extent_polygon(ds, rotated=rotated)
    return gpd.GeoDataFrame(geometry=[polygon], crs=crs)




[docs]
def affine_transform_gdf(gdf, affine):
    """Apply an affine transformation to a geopandas GeoDataFrame."""
    if isinstance(affine, Affine):
        affine = affine.to_shapely()
    gdfm = gdf.copy()
    gdfm.geometry = gdf.affine_transform(affine)
    return gdfm




[docs]
def get_extent(ds, rotated=True, xmargin=0.0, ymargin=0.0):
    """Get the model extent, corrected for angrot if necessary.

    Parameters
    ----------
    ds : xr.Dataset
        model dataset.
    rotated : bool, optional
        if True, the extent is corrected for angrot. The default is True.
    xmargin : float, optional
        margin to add to the x-extent. The default is 0.0.
    ymargin : float, optional
        margin to add to the y-extent. The default is 0.0.

    Returns
    -------
    extent : list
        [xmin, xmax, ymin, ymax]
    """
    attrs = _get_attrs(ds)
    extent = attrs["extent"]
    extent = [
        extent[0] - xmargin,
        extent[1] + xmargin,
        extent[2] - ymargin,
        extent[3] + ymargin,
    ]
    if rotated and "angrot" in attrs and attrs["angrot"] != 0.0:
        affine = get_affine_mod_to_world(ds)
        xc = np.array([extent[0], extent[1], extent[1], extent[0]])
        yc = np.array([extent[2], extent[2], extent[3], extent[3]])
        xc, yc = affine * (xc, yc)
        extent = [xc.min(), xc.max(), yc.min(), yc.max()]
    return extent




[docs]
def get_affine_mod_to_world(ds):
    """Get the affine-transformation from model to real-world coordinates."""
    attrs = _get_attrs(ds)
    xorigin = attrs["xorigin"]
    yorigin = attrs["yorigin"]
    angrot = attrs["angrot"]
    return Affine.translation(xorigin, yorigin) * Affine.rotation(angrot)




[docs]
def get_affine_world_to_mod(ds):
    """Get the affine-transformation from real-world to model coordinates."""
    attrs = _get_attrs(ds)
    xorigin = attrs["xorigin"]
    yorigin = attrs["yorigin"]
    angrot = attrs["angrot"]
    return Affine.rotation(-angrot) * Affine.translation(-xorigin, -yorigin)




[docs]
def get_affine(ds, sx=None, sy=None):
    """Get the affine-transformation, from pixel to real-world coordinates."""
    attrs = _get_attrs(ds)
    if sx is None:
        sx = get_delr(ds)
        assert len(np.unique(sx)) == 1, "Affine-transformation needs a constant delr"
        sx = sx[0]
    if sy is None:
        sy = get_delc(ds)
        assert len(np.unique(sy)) == 1, "Affine-transformation needs a constant delc"
        sy = -sy[0]

    if "angrot" in attrs:
        xorigin = attrs["xorigin"]
        yorigin = attrs["yorigin"]
        angrot = -attrs["angrot"]
        # xorigin and yorigin represent the lower left corner, while for the transform we
        # need the upper left
        dy = attrs["extent"][3] - attrs["extent"][2]
        xoff = xorigin + dy * np.sin(angrot * np.pi / 180)
        yoff = yorigin + dy * np.cos(angrot * np.pi / 180)
        return (
            Affine.translation(xoff, yoff)
            * Affine.scale(sx, sy)
            * Affine.rotation(angrot)
        )
    else:
        xoff = attrs["extent"][0]
        yoff = attrs["extent"][3]
        return Affine.translation(xoff, yoff) * Affine.scale(sx, sy)