Source code for nlmod.read.regis

import datetime as dt
import logging
import os
import warnings

import numpy as np
import pandas as pd
import xarray as xr
from packaging.version import parse as parse_version

from .. import cache
from ..dims.layers import calculate_thickness, remove_layer_dim_from_top
from . import geotop

logger = logging.getLogger(__name__)

REGIS_URL = "https://www.dinodata.nl/opendap/REGIS/REGIS.nc"



[docs]
@cache.cache_netcdf()
def get_combined_layer_models(
    extent,
    regis_botm_layer="AKc",
    use_regis=True,
    use_geotop=True,
    remove_nan_layers=True,
    geotop_layers="HLc",
    geotop_k=None,
    gt_layered=None,
):
    """Combine layer models into a single layer model.

    Possibilities so far include:
        - use_regis -> full model based on regis
        - use_regis and use_geotop -> holoceen of REGIS is filled with geotop


    Parameters
    ----------
    extent : list, tuple or np.array
        desired model extent (xmin, xmax, ymin, ymax)
    regis_botm_layer : binary str, optional
        regis layer that is used as the bottom of the model. This layer is
        included in the model. the Default is 'AKc' which is the bottom
        layer of regis. call nlmod.read.regis.get_layer_names() to get a list of
        regis names.
    use_regis : bool, optional
        True if part of the layer model should be REGIS. The default is True.
    use_geotop : bool, optional
        True if part of the layer model should be geotop. The default is True.
    remove_nan_layers : bool, optional
        When True, layers which contain only NaNs for the botm array are removed.
        The default is True.
    geotop_layers : str or list of strings
        The regis layers to be replaced by geotop-layers
    geotop_k : pd.DataFrame, optional
        The DataFrame with information about kh and kv of the GeoTOP-data. This
        DataFrame must at least contain columns 'lithok' and 'kh'.
    gt_layered : xarray.Dataset
        A layered representation of the geotop-dataset. By supplying this parameter, the
        user can change the GeoTOP-layering, which is usually defined by
        nlmod.read.geotop.to_model_layers(gt).

    Returns
    -------
    combined_ds : xarray dataset
        combination of layer models.

    Raises
    ------
    ValueError
        if an invalid combination of layers is used.
    """
    if use_regis:
        regis_ds = download_regis(
            extent, regis_botm_layer, remove_nan_layers=remove_nan_layers
        )
    else:
        raise ValueError("layer models without REGIS not supported")

    if use_geotop:
        geotop_ds = geotop.download_geotop(extent)

    if use_regis and use_geotop:
        combined_ds = add_geotop_to_regis_layers(
            regis_ds,
            geotop_ds,
            layers=geotop_layers,
            geotop_k=geotop_k,
            remove_nan_layers=remove_nan_layers,
            gt_layered=gt_layered,
        )

    elif use_regis:
        combined_ds = regis_ds
    else:
        raise ValueError("combination of model layers not supported")

    return combined_ds




[docs]
@cache.cache_netcdf()
def get_regis(
    extent,
    botm_layer="AKc",
    variables=("top", "botm", "kh", "kv"),
    remove_nan_layers=True,
    drop_layer_dim_from_top=True,
    probabilities=False,
    nodata=-9999,
    rename_layers_to_version_2_2_2=True,
):
    """Get a regis dataset projected on the modelgrid.

    .. deprecated:: 0.10.0
        `get_regis` will be removed in nlmod 1.0.0, it is replaced by
        `download_regis` because of new naming convention
        https://github.com/gwmod/nlmod/issues/47

    Parameters
    ----------
    extent : list, tuple or np.array
        desired model extent (xmin, xmax, ymin, ymax)
    botm_layer : str, optional
        regis layer that is used as the bottom of the model. This layer is
        included in the model. the Default is "AKc" which is the bottom
        layer of regis. call nlmod.read.regis.get_layer_names() to get a list
        of regis names.
    variables : tuple or list, optional
        The variables to keep from the regis Dataset. Possible entries in the list are
        'top', 'botm', 'kD', 'c', 'kh', 'kv', 'sdh' and 'sdv'. The default is
        ("top", "botm", "kh", "kv").
    remove_nan_layers : bool, optional
        When True, layers that do not occur in the requested extent (layers that contain
        only NaN values for the botm array) are removed. The default is True.
    drop_layer_dim_from_top : bool, optional
        When True, fill NaN values in top and botm and drop the layer dimension from
        top. This will transform top and botm to the data model in MODFLOW. An advantage
        of this data model is that the layer model is consistent by definition, with no
        possibilities of gaps between layers. The default is True.
    probabilities : bool, optional
        if True, also download probability data. The default is False.
    nodata : int or float, optional
        When nodata is not None, set values equal to nodata to nan. The default is
        -9999.
    rename_layers_to_version_2_2_2 : bool, toptional
        From version 2.2.3 of regis, the names of stratigraphic layers change, compared
        to previous versions. If rename_layers_to_version_2_2_2 is True, the layer-names are
        renamed back to their original names. The default is True.

    Returns
    -------
    regis_ds : xarray dataset
        dataset with regis data projected on the modelgrid.
    """

    warnings.warn(
        "'get_regis' is deprecated and will eventually be removed, "
        "please use 'nlmod.read.regis.download_regis()' in the future.",
        DeprecationWarning,
    )

    return download_regis(
        extent,
        botm_layer,
        variables,
        remove_nan_layers,
        drop_layer_dim_from_top,
        probabilities,
        nodata,
        rename_layers_to_version_2_2_2,
    )




[docs]
@cache.cache_netcdf()
def download_regis(
    extent,
    botm_layer="AKc",
    variables=("top", "botm", "kh", "kv"),
    remove_nan_layers=True,
    drop_layer_dim_from_top=True,
    probabilities=False,
    nodata=-9999,
    rename_layers_to_version_2_2_2=True,
):
    """Download a regis dataset within an extent.

    Parameters
    ----------
    extent : list, tuple or np.array
        desired model extent (xmin, xmax, ymin, ymax)
    botm_layer : str, optional
        regis layer that is used as the bottom of the model. This layer is
        included in the model. the Default is "AKc" which is the bottom
        layer of regis. call nlmod.read.regis.get_layer_names() to get a list
        of regis names.
    variables : tuple or list, optional
        The variables to keep from the regis Dataset. Possible entries in the list are
        'top', 'botm', 'kD', 'c', 'kh', 'kv', 'sdh' and 'sdv'. The default is
        ("top", "botm", "kh", "kv").
    remove_nan_layers : bool, optional
        When True, layers that do not occur in the requested extent (layers that contain
        only NaN values for the botm array) are removed. The default is True.
    drop_layer_dim_from_top : bool, optional
        When True, fill NaN values in top and botm and drop the layer dimension from
        top. This will transform top and botm to the data model in MODFLOW. An advantage
        of this data model is that the layer model is consistent by definition, with no
        possibilities of gaps between layers. The default is True.
    probabilities : bool, optional
        if True, also download probability data. The default is False.
    nodata : int or float, optional
        When nodata is not None, set values equal to nodata to nan. The default is
        -9999.
    rename_layers_to_version_2_2_2 : bool, toptional
        From version 2.2.3 of regis, the names of stratigraphic layers change, compared
        to previous versions. If rename_layers_to_version_2_2_2 is True, the layer-names are
        renamed back to their original names. The default is True.

    Returns
    -------
    regis_ds : xarray dataset
        dataset with regis data projected on the modelgrid.
    """
    ds = xr.open_dataset(REGIS_URL, decode_times=False, decode_coords="all")
    if "crs" in ds.coords:
        # remove the crs coordinate, as rioxarray does not recognise the crs
        # and we set the crs at the end of this method by hand
        ds = ds.drop_vars("crs")

    # set x and y dimensions to cell center
    ds["x"] = ds.x_bounds.mean("bounds")
    ds["y"] = ds.y_bounds.mean("bounds")

    # slice extent
    ds = ds.sel(x=slice(extent[0], extent[1]), y=slice(extent[2], extent[3]))

    if len(ds.x) == 0 or len(ds.y) == 0:
        msg = "No data found. Please supply valid extent in the Netherlands in RD-coordinates"
        raise (ValueError(msg))

    ds["layer"] = get_layer_names(
        ds=ds, rename_layers_to_version_2_2_2=rename_layers_to_version_2_2_2
    )

    # make sure y is descending
    if (ds["y"].diff("y") > 0).all():
        ds = ds.isel(y=slice(None, None, -1))

    # slice layers
    if botm_layer is not None and botm_layer in ds.layer:
        ds = ds.sel(layer=slice(botm_layer))

    # rename bottom to botm, as it is called in FloPy
    ds = ds.rename_vars({"bottom": "botm"})

    # slice data vars
    if variables is None:
        variables = list(ds.data_vars)
    else:
        if isinstance(variables, str):
            variables = [variables]
        if probabilities:
            variables = variables + ("sdh", "sdv")
        ds = ds[list(variables)]

    # since version REGIS v02r2s2 (22.07.2024) NaN values are replaced by -9999
    # we set these values to NaN again
    if nodata is not None:
        for var in variables:
            ds[var] = ds[var].where(ds[var] != nodata)

    if remove_nan_layers:
        # only keep layers with at least one active cell
        if "botm" in ds:
            mask_layer = ~(np.isnan(ds["botm"])).all(ds["botm"].dims[1:])
        else:
            var = variables[0]
            mask_layer = ~(np.isnan(ds[var])).all(ds[var].dims[1:])
            for var in variables[1:]:
                mask_layer = mask_layer | ~(np.isnan(ds[var])).all(ds[var].dims[1:])
        ds = ds.sel(layer=mask_layer)

        if len(ds.layer) == 0:
            msg = "No data found. Please supply valid extent in the Netherlands in RD-coordinates"
            raise (Exception(msg))

    if drop_layer_dim_from_top and "botm" in ds and "top" in ds:
        ds = remove_layer_dim_from_top(ds)

    ds.attrs["gridtype"] = "structured"
    ds.attrs["extent"] = extent
    for datavar in ds:
        ds[datavar].attrs["source"] = "REGIS"
        ds[datavar].attrs["url"] = REGIS_URL
        ds[datavar].attrs["date"] = dt.datetime.now().strftime("%Y%m%d")
        if datavar in ["top", "botm"]:
            ds[datavar].attrs["units"] = "mNAP"
        elif datavar in ["kh", "kv"]:
            ds[datavar].attrs["units"] = "m/day"
        elif datavar in ["c"]:
            ds[datavar].attrs["units"] = "day"
        elif datavar in ["kD"]:
            ds[datavar].attrs["units"] = "m2/day"

    # set the crs to dutch rd-coordinates
    ds.rio.write_crs(28992, inplace=True)

    return ds




[docs]
def add_geotop_to_regis_layers(
    rg,
    gt,
    layers="HLc",
    geotop_k=None,
    remove_nan_layers=True,
    anisotropy=1.0,
    gt_layered=None,
):
    """Combine geotop and regis in such a way that the one or more layers in Regis are
    replaced by the geo_eenheden of geotop.

    Parameters
    ----------
    rg : xarray.DataSet
        regis dataset
    gt : xarray.DataSet
        geotop dataset
    layers : str or list of strings
        The regis layers to be replaced by geotop-layers
    geotop_k : pd.DataFrame, optional
        The DataFrame with information about kh and kv of the GeoTOP-data. This
        DataFrame must at least contain columns 'lithok' and 'kh'.
    remove_nan_layers : bool, optional
        When True, layers with only 0 or NaN thickness are removed. The default is True.
    anisotropy : float, optional
        The anisotropy value (kh/kv) used when there are no kv values in df. The
        default is 1.0.
    gt_layered : xarray.Dataset
        A layered representation of the geotop-dataset. By supplying this parameter, the
        user can change the GeoTOP-layering, which is usueally defined by
        nlmod.read.geotop.to_model_layers(gt).

    Returns
    -------
    gt: xr.DataSet
        combined dataset
    """
    if isinstance(layers, str):
        layers = [layers]

    # make sure geotop dataset contains kh and kv
    if "kh" not in gt or "kv" not in gt:
        if "kv" in gt:
            logger.info(
                f"Calculating kh of geotop by multiplying kv with an anisotropy of {anisotropy}"
            )
            gt["kh"] = gt["kv"] * anisotropy
        elif "kh" in gt:
            logger.info(
                f"Calculating kv of geotop by dividing kh by an anisotropy of {anisotropy}"
            )
            gt["kv"] = gt["kh"] / anisotropy
        else:
            # add kh and kv to gt
            if geotop_k is None:
                geotop_k = geotop.get_lithok_props()
            gt = geotop.add_kh_and_kv(gt, geotop_k, anisotropy=anisotropy)

    # copy the regis-dataset, before altering it
    rg = rg.copy(deep=True)
    if "layer" in rg["top"].dims:
        msg = "Top in rg has a layer dimension. add_geotop_to_regis_layers will remove the layer dimension from top in rg."
        logger.warning(msg)
    else:
        # temporarily add layer dimension to top in rg
        rg["top"] = rg["botm"] + calculate_thickness(rg)

    for layer in layers:
        if gt_layered is not None:
            gtl = gt_layered.copy(deep=True)
        else:
            # transform geotop data into layers
            gtl = geotop.to_model_layers(gt)

        # temporarily add layer dimension to top in gtl
        gtl["top"] = gtl["botm"] + calculate_thickness(gtl)

        # only keep the part of layers inside the regis layer
        top = rg["top"].loc[layer]
        bot = rg["botm"].loc[layer]
        gtl["top"] = gtl["top"].where(gtl["top"].isnull() | (gtl["top"] < top), top)
        gtl["top"] = gtl["top"].where(gtl["top"].isnull() | (gtl["top"] > bot), bot)
        gtl["botm"] = gtl["botm"].where(gtl["botm"].isnull() | (gtl["botm"] < top), top)
        gtl["botm"] = gtl["botm"].where(gtl["botm"].isnull() | (gtl["botm"] > bot), bot)

        if remove_nan_layers:
            # drop layers with a remaining thickness of 0 (or NaN) everywhere
            th = calculate_thickness(gtl)
            gtl = gtl.sel(layer=(th > 0).any(th.dims[1:]))

        # add kh and kv from gt to gtl
        if "kh" not in gtl or "kv" not in gtl:
            gtl = geotop.aggregate_to_ds(gt, gtl)

        # add gtl-layers to rg-layers
        lay = np.where(rg.layer == layer)[0][0]
        layer_order = np.concatenate([rg.layer[:lay], gtl.layer, rg.layer[lay + 1 :]])

        # call xr.concat with rg first, so we keep attributes of rg
        rg = xr.concat((rg, gtl), "layer")
        # we will then make sure the layer order is right
        rg = rg.reindex({"layer": layer_order})

    # remove the layer dimension from top again
    rg = remove_layer_dim_from_top(rg)
    return rg




[docs]
def extract_version_from_title(version_string):
    """
    Extract version number in format X.Y.Z from a string like "REGIS vXXrYsZ".

    Parameters
    ----------
    version_string : str
        The input string containing version information in format "REGIS vXXrYsZ".

    Returns
    -------
    packaging.version.Version
        Extracted version in format "X.Y.Z".

    Examples
    --------
    >>> extract_version("REGIS v02r2s3")
    <Version('2.2.3')>
    """
    # Extract digits from the string after 'v', 'r', and 's'
    parts = version_string.split()
    code = parts[1]  # Get 'vXXrYsZ' part

    # Extract the numbers after v, r, and s
    major = (
        code[1:3].lstrip("0") or "0"
    )  # Remove leading zeros, but keep at least one digit
    minor = code[code.find("r") + 1 : code.find("s")].lstrip("0") or "0"
    patch = code[code.find("s") + 1 :].lstrip("0") or "0"

    # Combine into version format
    version = f"{major}.{minor}.{patch}"
    return parse_version(version)




[docs]
def get_layer_names(ds=None, rename_layers_to_version_2_2_2=True):
    """Get all the available regis layer names.

    Parameters
    ----------
    ds : xarray.Dataset, optional
        The regis dataset. If None, a connection is made to the REGIS server.
        The default is None.
    rename_layers_to_version_2_2_2 : bool, optional
        If True, the layer names are renamed to their pre-v2.2.3 names. The default is
        True.

    Returns
    -------
    layer_names : np.array
        array with names of all the regis layers.
    """
    if ds is None:
        ds = xr.open_dataset(REGIS_URL, decode_times=False, decode_coords=False)

    regis_version = extract_version_from_title(ds.attrs["title"])

    layer_names = ds.layer.values.astype(str)

    if rename_layers_to_version_2_2_2 and regis_version >= parse_version("2.2.3"):
        df = get_table_name_changes()
        return (
            df.set_index("Nieuwe code").loc[layer_names]["Oude code"].values.astype(str)
        )
    else:
        return layer_names




[docs]
def get_legend(kind="REGIS"):
    """Get a legend (DataFrame) with the colors of REGIS and/or GeoTOP layers.

    These colors can be used when plotting cross-sections.
    """
    allowed_kinds = ["REGIS", "GeoTOP", "combined"]
    if kind not in allowed_kinds:
        raise (ValueError(f"Only allowed values for kind are {allowed_kinds}"))
    if kind in ["REGIS", "combined"]:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        fname = os.path.join(dir_path, "..", "data", "regis", "regis_2_2.gleg")
        leg_regis = read_gleg(fname)
        if kind == "REGIS":
            return leg_regis
    if kind in ["GeoTOP", "combined"]:
        dir_path = os.path.dirname(os.path.realpath(__file__))
        fname = os.path.join(dir_path, "..", "data", "geotop", "geotop.gleg")
        leg_geotop = read_gleg(fname)
        if kind == "GeoTOP":
            return leg_geotop
    # return a combination of regis and geotop
    leg = pd.concat((leg_regis, leg_geotop))
    # drop duplicates, keeping first occurrences (from regis)
    leg = leg.loc[~leg.index.duplicated(keep="first")]
    return leg




[docs]
def get_legend_lithoclass():
    dir_path = os.path.dirname(os.path.realpath(__file__))
    fname = os.path.join(dir_path, "..", "data", "geotop", "Lithoklasse.voleg")
    leg = read_voleg(fname)
    return leg




[docs]
def get_legend_lithostratigraphy():
    dir_path = os.path.dirname(os.path.realpath(__file__))
    fname = os.path.join(dir_path, "..", "data", "geotop", "Lithostratigrafie.voleg")
    leg = read_voleg(fname)
    return leg




[docs]
def read_gleg(fname):
    leg = pd.read_csv(
        fname,
        sep="\t",
        header=None,
        names=["naam", "beschrijving", "r", "g", "b", "a", "x"],
    )
    leg["naam"] = leg["naam"].str.replace("-", "")
    leg.set_index("naam", inplace=True)
    clrs = np.array(leg.loc[:, ["r", "g", "b"]])
    clrs = [tuple(rgb / 255.0) for rgb in clrs]
    leg["color"] = clrs
    leg = leg.drop(["x", "r", "g", "b", "a"], axis=1)
    return leg




[docs]
def read_voleg(fname):
    leg = pd.read_csv(
        fname,
        sep="\t",
        header=None,
        names=["code", "naam", "r", "g", "b", "a", "beschrijving"],
    )
    leg.set_index("code", inplace=True)
    clrs = np.array(leg.loc[:, ["r", "g", "b"]])
    clrs = [tuple(rgb / 255.0) for rgb in clrs]
    leg["color"] = clrs
    leg = leg.drop(["r", "g", "b", "a"], axis=1)
    return leg




[docs]
def get_table_name_changes():
    """
    Get the table with name changes of REGIS.

    Returns
    -------
    df : pd.DataFrame
        A DataFrame containsing old and new names.

    """
    dir_path = os.path.dirname(os.path.realpath(__file__))
    fname = "Tabellen.bij.naamgevingsreleases.REGIS.II.csv"
    fname = os.path.join(dir_path, "..", "data", "regis", fname)
    df = pd.read_csv(fname)

    # remove (REGIS II) for the header of the first column, after "Naam"
    df.columns = df.columns.str.replace(" (REGIS II)", "")

    # drop the lines after the first empty row
    first_empty_row = np.where(df.iloc[:, 0].isna())[0][0]
    df = df.iloc[:first_empty_row]
    return df