{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Gridding vector data \n",
    "Vector data can be points, lines or polygons often saved as shapefiles and visualised using GIS software. A common operation is to project vector data on a modelgrid. For example, to add a surface water line to a grid. In this section we present some functions in `nlmod` to project vector data on a modelgrid and to aggregate vector data to model cells."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import geopandas as gpd\n",
    "import matplotlib.pyplot as plt\n",
    "import numpy as np\n",
    "import xarray as xr\n",
    "from IPython.display import display\n",
    "from shapely.geometry import LineString, Point\n",
    "from shapely.geometry import Polygon as shp_polygon\n",
    "\n",
    "import nlmod"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "nlmod.show_versions()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Grid types\n",
    "\n",
    "We create the same two grids as in the 'Resampling raster data' notebook."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# structured grid\n",
    "ds = nlmod.get_ds([950, 1250, 20050, 20350], delr=100)\n",
    "# vertex grid\n",
    "dsv = nlmod.grid.refine(\n",
    "    ds, refinement_features=[([Point(1200, 20200)], \"point\", 1)], model_ws=\"06_gridding_vector_data\"\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Vector to grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "point_geom = [\n",
    "    Point(x, y) for x, y in zip([1000, 1200, 1225, 1300], [20200, 20175, 20175, 20425])\n",
    "]\n",
    "point_gdf = gpd.GeoDataFrame({\"values\": [1, 52, 66, 24]}, geometry=point_geom)\n",
    "line_geom = [\n",
    "    LineString([point_geom[0], point_geom[1]]),\n",
    "    LineString([point_geom[2], point_geom[3]]),\n",
    "    LineString([point_geom[0], point_geom[3]]),\n",
    "]\n",
    "line_gdf = gpd.GeoDataFrame({\"values\": [1, 52, 66]}, geometry=line_geom)\n",
    "pol_geom = [\n",
    "    shp_polygon(\n",
    "        [\n",
    "            [p.x, p.y]\n",
    "            for p in [point_geom[0], point_geom[1], point_geom[2], point_geom[3]]\n",
    "        ]\n",
    "    ),\n",
    "    shp_polygon(\n",
    "        [\n",
    "            [p.x, p.y]\n",
    "            for p in [point_geom[0], point_geom[1], point_geom[2], Point(1200, 20300)]\n",
    "        ]\n",
    "    ),\n",
    "]\n",
    "pol_gdf = gpd.GeoDataFrame({\"values\": [166, 5]}, geometry=pol_geom)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots()\n",
    "nlmod.grid.modelgrid_from_ds(ds).plot(ax=ax)\n",
    "point_gdf.plot(ax=ax, color=\"green\")\n",
    "line_gdf.plot(ax=ax, color=\"purple\")\n",
    "pol_gdf.plot(ax=ax, alpha=0.6)\n",
    "\n",
    "ax.set_xlim(ax.get_xlim()[0], 1400)\n",
    "ax.set_ylim(ax.get_ylim()[0], 20500)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Points"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Aggregation methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(ncols=4, figsize=(20, 5))\n",
    "\n",
    "da1 = nlmod.grid.gdf_to_da(point_gdf, ds, \"values\", agg_method=\"min\")\n",
    "da2 = nlmod.grid.gdf_to_da(point_gdf, ds, \"values\", agg_method=\"max\")\n",
    "da3 = nlmod.grid.gdf_to_da(point_gdf, ds, \"values\", agg_method=\"mean\")\n",
    "\n",
    "vmin = min(da1.min(), da2.min(), da3.min())\n",
    "vmax = max(da1.max(), da2.max(), da3.max())\n",
    "\n",
    "da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)\n",
    "axes[0].set_title(\"aggregation min\")\n",
    "axes[0].axis(\"scaled\")\n",
    "\n",
    "da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)\n",
    "axes[1].set_title(\"aggregation max\")\n",
    "axes[1].axis(\"scaled\")\n",
    "\n",
    "da3.plot(ax=axes[2], vmin=vmin, vmax=vmax)\n",
    "axes[2].set_title(\"aggregation mean\")\n",
    "axes[2].axis(\"scaled\")\n",
    "\n",
    "point_gdf.plot(\"values\", ax=axes[3], vmin=vmin, vmax=vmax, legend=True)\n",
    "nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[3])\n",
    "axes[3].set_title(\"points\")\n",
    "axes[3].axis(\"scaled\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Interpolation methods"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(ncols=3, figsize=(15, 5))\n",
    "ds.attrs[\"model_ws\"] = \"\"\n",
    "sim = nlmod.sim.sim(ds)\n",
    "gwf = nlmod.gwf.gwf(ds, sim)\n",
    "dis = nlmod.gwf.dis(ds, gwf)\n",
    "da1 = nlmod.grid.gdf_to_da(point_gdf, ds, column=\"values\", agg_method=\"nearest\")\n",
    "da2 = xr.DataArray(np.nan, dims=(\"y\", \"x\"), coords={\"y\": ds.y, \"x\": ds.x})\n",
    "da2.values = nlmod.grid.interpolate_gdf_to_array(\n",
    "    point_gdf, gwf, field=\"values\", method=\"linear\"\n",
    ")\n",
    "\n",
    "vmin = min(da1.min(), da2.min())\n",
    "vmax = max(da1.max(), da2.max())\n",
    "\n",
    "da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)\n",
    "axes[0].set_title(\"interpolation nearest\")\n",
    "axes[0].axis(\"scaled\")\n",
    "\n",
    "da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)\n",
    "axes[1].set_title(\"interpolation linear\")\n",
    "axes[1].axis(\"scaled\")\n",
    "\n",
    "\n",
    "point_gdf.plot(\"values\", ax=axes[2], vmin=vmin, vmax=vmax, legend=True)\n",
    "nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[2])\n",
    "axes[2].set_title(\"points\")\n",
    "axes[2].axis(\"scaled\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Lines"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(ncols=4, figsize=(20, 5))\n",
    "\n",
    "da1 = nlmod.grid.gdf_to_da(line_gdf, ds, \"values\", agg_method=\"max_length\")\n",
    "da2 = nlmod.grid.gdf_to_da(line_gdf, ds, \"values\", agg_method=\"length_weighted\")\n",
    "da3 = nlmod.grid.gdf_to_da(line_gdf, ds, \"values\", agg_method=\"mean\")\n",
    "\n",
    "vmin = min(da1.min(), da2.min(), da3.min())\n",
    "vmax = max(da1.max(), da2.max(), da3.max())\n",
    "\n",
    "da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)\n",
    "axes[0].set_title(\"aggregation max_length\")\n",
    "axes[0].axis(\"scaled\")\n",
    "\n",
    "da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)\n",
    "axes[1].set_title(\"aggregation length_weighted\")\n",
    "axes[1].axis(\"scaled\")\n",
    "\n",
    "da3.plot(ax=axes[2], vmin=vmin, vmax=vmax)\n",
    "axes[2].set_title(\"aggregation mean\")\n",
    "axes[2].axis(\"scaled\")\n",
    "\n",
    "line_gdf.plot(\"values\", ax=axes[3], vmin=vmin, vmax=vmax, legend=True)\n",
    "nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[3])\n",
    "axes[3].set_title(\"lines\")\n",
    "axes[3].axis(\"scaled\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Polygons"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, axes = plt.subplots(ncols=4, figsize=(20, 5))\n",
    "\n",
    "da1 = nlmod.grid.gdf_to_da(pol_gdf, ds, \"values\", agg_method=\"max_area\")\n",
    "da2 = nlmod.grid.gdf_to_da(pol_gdf, ds, \"values\", agg_method=\"area_weighted\")\n",
    "da3 = nlmod.grid.gdf_to_da(pol_gdf, ds, \"values\", agg_method=\"nearest\")\n",
    "\n",
    "vmin = min(da1.min(), da2.min(), da3.min())\n",
    "vmax = max(da1.max(), da2.max(), da3.max())\n",
    "\n",
    "da1.plot(ax=axes[0], vmin=vmin, vmax=vmax)\n",
    "axes[0].set_title(\"aggregation max_area\")\n",
    "axes[0].axis(\"scaled\")\n",
    "\n",
    "da2.plot(ax=axes[1], vmin=vmin, vmax=vmax)\n",
    "axes[1].set_title(\"aggregation area_weighted\")\n",
    "axes[1].axis(\"scaled\")\n",
    "\n",
    "da3.plot(ax=axes[2], vmin=vmin, vmax=vmax)\n",
    "axes[2].set_title(\"aggregation nearest\")\n",
    "axes[2].axis(\"scaled\")\n",
    "\n",
    "pol_gdf.plot(\"values\", ax=axes[3], vmin=vmin, vmax=vmax, legend=True)\n",
    "nlmod.grid.modelgrid_from_ds(ds).plot(ax=axes[3])\n",
    "axes[3].set_title(\"polygons\")\n",
    "axes[3].axis(\"scaled\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Intersect vector data with grid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "gdf_point_grid = nlmod.grid.gdf_to_grid(point_gdf, ds)\n",
    "gdf_line_grid = nlmod.grid.gdf_to_grid(line_gdf, ds)\n",
    "gdf_pol_grid = nlmod.grid.gdf_to_grid(pol_gdf, ds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig, ax = plt.subplots()\n",
    "\n",
    "gdf_point_grid.plot(ax=ax, color=\"green\")\n",
    "gdf_line_grid[\"ind\"] = range(gdf_line_grid.shape[0])\n",
    "gdf_line_grid.plot(\"ind\", ax=ax, cmap=\"jet\")\n",
    "gdf_pol_grid[\"ind\"] = range(gdf_pol_grid.shape[0])\n",
    "gdf_pol_grid.plot(\"ind\", ax=ax, alpha=0.6)\n",
    "\n",
    "nlmod.grid.modelgrid_from_ds(ds).plot(ax=ax)\n",
    "ax.set_xlim(ax.get_xlim()[0], 1300)\n",
    "ax.set_ylim(ax.get_ylim()[0], 20400)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Aggregate parameters per model cell\n",
    "\n",
    "Aggregation options:\n",
    "\n",
    "- point: max, min, mean\n",
    "- line: max, min, length_weighted, max_length\n",
    "- polygon: max, min, area_weighted, area_max\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# point\n",
    "display(gdf_point_grid)\n",
    "nlmod.grid.aggregate_vector_per_cell(gdf_point_grid, {\"values\": \"max\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# line\n",
    "display(gdf_line_grid)\n",
    "nlmod.grid.aggregate_vector_per_cell(gdf_line_grid, {\"values\": \"length_weighted\"})"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# polygon\n",
    "display(gdf_pol_grid)\n",
    "nlmod.grid.aggregate_vector_per_cell(gdf_pol_grid, {\"values\": \"area_weighted\"})"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Grid to reclist\n",
    "For some modflow packages (drn, riv, ghb, wel) you need to specify stress_period_data to create them using flopy. This stress_period_data consists of lists of records, known as reclists (also called lrcd (\"layer, row, column-data\") for a structured grid), for every time step.\n",
    "\n",
    "The function `da_to_reclist` can be used to convert grid data (both structured and vertex) to a reclist. This function has many arguments:\n",
    "\n",
    "- `mask`, boolean DataArray to determine which cells should be added to the reclist. Can be 2d or 3d.\n",
    "- `layer`, if `mask` is a 2d array the value of `layer` is used in the reclist. If `mask` is 3d or `first_active_layer` is True the `layer` argument is ignored.\n",
    "- `only_active_cells`, if True only add cells with an idomain of 1 to the reclist\n",
    "- `first_active_layer`, if True use the first active layer, obtained from the idomain, as the layer for each cell.\n",
    "- `col1`,`col2` and `col3`, The column data of the reclist.\n",
    "\n",
    "The examples below show the result of each argument."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add layer dimension\n",
    "if \"layer\" not in ds.dims:\n",
    "    ds = ds.expand_dims({\"layer\": range(3)})\n",
    "\n",
    "# create some data arrays\n",
    "rng = np.random.default_rng(12345)\n",
    "ds[\"da1\"] = (\n",
    "    (\"layer\", \"y\", \"x\"),\n",
    "    rng.integers(0, 10, (ds.sizes[\"layer\"], ds.sizes[\"y\"], ds.sizes[\"x\"])),\n",
    ")\n",
    "ds[\"da2\"] = (\"y\", \"x\"), rng.integers(0, 10, (ds.sizes[\"y\"], ds.sizes[\"x\"]))\n",
    "ds[\"da3\"] = (\"y\", \"x\"), rng.integers(0, 10, (ds.sizes[\"y\"], ds.sizes[\"x\"]))\n",
    "\n",
    "# add a nodata value\n",
    "ds.attrs[\"nodata\"] = -999\n",
    "\n",
    "# set the thickness of the first cell to 0, so this cell will become inactive\n",
    "ds[\"top\"][0, 0] = ds[\"botm\"][0, 0, 0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Mask and layer\n",
    "If `mask` is a 2d array, the `layer` argument specifies the layer that is used in the reclist."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# structured 2d grid to reclist\n",
    "mask2d = ds[\"da2\"] == ds[\"da2\"][0, 0]\n",
    "reclist1 = nlmod.grid.da_to_reclist(\n",
    "    ds, mask2d, col1=ds[\"da1\"][0], col2=\"da2\", layer=0, only_active_cells=False\n",
    ")\n",
    "reclist1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "If the `mask` is three dimensional the `layer` argument is ignored."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create a 3dmask\n",
    "mask3d = ds[\"da1\"] == ds[\"da1\"].values[0, 0, 0]\n",
    "\n",
    "# use this mask to create the reclist\n",
    "reclist2 = nlmod.grid.da_to_reclist(\n",
    "    ds, mask3d, col1=\"da1\", col2=100, layer=0, only_active_cells=False\n",
    ")\n",
    "reclist2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Only active cells\n",
    "With `only_active_cells=True` we make sure only active cells end up in the reclist. Which cells are active is based on the `idomain` in the model dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Only return the cells with an active idomain\n",
    "reclist3 = nlmod.grid.da_to_reclist(\n",
    "    ds, mask3d, col1=\"da1\", col2=100, only_active_cells=True\n",
    ")\n",
    "reclist3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# also possible for a 2d grid\n",
    "mask2d = ds[\"da2\"] == ds[\"da2\"][0, 0]\n",
    "reclist1 = nlmod.grid.da_to_reclist(\n",
    "    ds, mask2d, col1=ds[\"da1\"][0], col2=\"da2\", layer=0, only_active_cells=True\n",
    ")\n",
    "reclist1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### First active_layer\n",
    "Use `first_active_layer=True` to add the first active layer to the reclist. The first active layer is obtained from the idomain."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create a reclist with col1 (str), col2 (DataArray), col3 (int)\n",
    "reclist4 = nlmod.grid.da_to_reclist(\n",
    "    ds, mask2d, col1=\"da2\", col2=\"da3\", first_active_layer=True\n",
    ")\n",
    "reclist4"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Reclist columns\n",
    "The `col1`, `col2` and `col3` arguments specify what data should be put into the reclist. The types can be `str`,`xarray.DataArray`,`None` or other. If the value is a `str` the corresponding DataArray from the Dataset is used to get data for the reclist. If the value is an `xarray.DataArray` the DataArray is used. If the value is `None` the column is not added to the reclist and if the value is another type that value is used for every record in the reclist.\n",
    "\n",
    "Be aware that if `mask` is a 3d array, and the DataArrays of the column should also be 3d."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# create a reclist with col1 (str), col2 (DataArray), col3 (int)\n",
    "idomain = nlmod.layers.get_idomain(ds)\n",
    "reclist5 = nlmod.grid.da_to_reclist(\n",
    "    ds, mask3d, col1=idomain, col2=\"da1\", col3=9, layer=0, only_active_cells=False\n",
    ")\n",
    "reclist5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Vertex model to reclist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# add some random DataArray to the vertex dataset\n",
    "da_vert = rng.integers(0, 10, (dsv[\"area\"].shape))\n",
    "dsv[\"da_vert\"] = (\"icell2d\"), da_vert\n",
    "\n",
    "# create rec list from a vertex dataset\n",
    "mask_vert = dsv[\"da_vert\"] == dsv[\"da_vert\"][0]\n",
    "reclist6 = nlmod.grid.da_to_reclist(\n",
    "    dsv, mask_vert, col1=\"da_vert\", col2=2330, only_active_cells=False\n",
    ")\n",
    "reclist6"
   ]
  }
 ],
 "metadata": {
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}