{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Coarsen a dataset\n", "\n", "``coarsen_dataset`` is used to perform block aggregation along specified dimensions. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import ecodata as eco\n", "import xarray as xr" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# ECMWF dataset \n", "filein = eco.get_path(\"ECMWF_subset.nc\")\n", "ds = xr.load_dataset(filein)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:      (longitude: 81, latitude: 41, time: 8784)\n",
       "Coordinates:\n",
       "  * longitude    (longitude) float32 -130.0 -129.8 -129.5 ... -110.2 -110.0\n",
       "  * latitude     (latitude) float32 60.0 59.75 59.5 59.25 ... 50.5 50.25 50.0\n",
       "  * time         (time) datetime64[ns] 2008-01-01 ... 2008-12-31T23:00:00\n",
       "Data variables:\n",
       "    spatial_ref  int64 0\n",
       "    u10          (time, latitude, longitude) float32 1.148 1.015 ... 9.102 10.09\n",
       "    v10          (time, latitude, longitude) float32 0.9952 0.3224 ... 1.36 1.25\n",
       "    t2m          (time, latitude, longitude) float32 249.6 249.1 ... 270.1 270.2\n",
       "Attributes:\n",
       "    Conventions:  CF-1.6\n",
       "    history:      2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw...
" ], "text/plain": [ "\n", "Dimensions: (longitude: 81, latitude: 41, time: 8784)\n", "Coordinates:\n", " * longitude (longitude) float32 -130.0 -129.8 -129.5 ... -110.2 -110.0\n", " * latitude (latitude) float32 60.0 59.75 59.5 59.25 ... 50.5 50.25 50.0\n", " * time (time) datetime64[ns] 2008-01-01 ... 2008-12-31T23:00:00\n", "Data variables:\n", " spatial_ref int64 0\n", " u10 (time, latitude, longitude) float32 1.148 1.015 ... 9.102 10.09\n", " v10 (time, latitude, longitude) float32 0.9952 0.3224 ... 1.36 1.25\n", " t2m (time, latitude, longitude) float32 249.6 249.1 ... 270.1 270.2\n", "Attributes:\n", " Conventions: CF-1.6\n", " history: 2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw..." ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Apply block aggregation along specified dimensions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This example will take a block mean across every 5 points in the time dimension, \n", "and every 4 points in the latitude and longitude dimensions:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:      (time: 1756, latitude: 10, longitude: 20)\n",
       "Coordinates:\n",
       "  * longitude    (longitude) float32 -129.6 -128.6 -127.6 ... -111.6 -110.6\n",
       "  * latitude     (latitude) float32 59.62 58.62 57.62 ... 52.62 51.62 50.62\n",
       "  * time         (time) datetime64[ns] 2008-01-01T02:00:00 ... 2008-12-31T17:...\n",
       "Data variables:\n",
       "    spatial_ref  int64 0\n",
       "    u10          (time, latitude, longitude) float32 0.6489 -0.1787 ... -0.5751\n",
       "    v10          (time, latitude, longitude) float32 0.3738 0.3756 ... 6.851\n",
       "    t2m          (time, latitude, longitude) float32 250.3 247.3 ... 265.9 265.7\n",
       "Attributes:\n",
       "    Conventions:  CF-1.6\n",
       "    history:      2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw...
" ], "text/plain": [ "\n", "Dimensions: (time: 1756, latitude: 10, longitude: 20)\n", "Coordinates:\n", " * longitude (longitude) float32 -129.6 -128.6 -127.6 ... -111.6 -110.6\n", " * latitude (latitude) float32 59.62 58.62 57.62 ... 52.62 51.62 50.62\n", " * time (time) datetime64[ns] 2008-01-01T02:00:00 ... 2008-12-31T17:...\n", "Data variables:\n", " spatial_ref int64 0\n", " u10 (time, latitude, longitude) float32 0.6489 -0.1787 ... -0.5751\n", " v10 (time, latitude, longitude) float32 0.3738 0.3756 ... 6.851\n", " t2m (time, latitude, longitude) float32 250.3 247.3 ... 265.9 265.7\n", "Attributes:\n", " Conventions: CF-1.6\n", " history: 2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw..." ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds2 = eco.coarsen_dataset(ds, {'time': 5, 'latitude': 4, 'longitude': 4})\n", "ds2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "If you want to use a function other than mean for a certain dimension, you can pass this using the `coord_func` option:" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:      (time: 1756, latitude: 10, longitude: 20)\n",
       "Coordinates:\n",
       "  * longitude    (longitude) float32 -129.6 -128.6 -127.6 ... -111.6 -110.6\n",
       "  * latitude     (latitude) float32 59.62 58.62 57.62 ... 52.62 51.62 50.62\n",
       "  * time         (time) datetime64[ns] 2008-01-01 ... 2008-12-31T15:00:00\n",
       "Data variables:\n",
       "    spatial_ref  int64 0\n",
       "    u10          (time, latitude, longitude) float32 0.6489 -0.1787 ... -0.5751\n",
       "    v10          (time, latitude, longitude) float32 0.3738 0.3756 ... 6.851\n",
       "    t2m          (time, latitude, longitude) float32 250.3 247.3 ... 265.9 265.7\n",
       "Attributes:\n",
       "    Conventions:  CF-1.6\n",
       "    history:      2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw...
" ], "text/plain": [ "\n", "Dimensions: (time: 1756, latitude: 10, longitude: 20)\n", "Coordinates:\n", " * longitude (longitude) float32 -129.6 -128.6 -127.6 ... -111.6 -110.6\n", " * latitude (latitude) float32 59.62 58.62 57.62 ... 52.62 51.62 50.62\n", " * time (time) datetime64[ns] 2008-01-01 ... 2008-12-31T15:00:00\n", "Data variables:\n", " spatial_ref int64 0\n", " u10 (time, latitude, longitude) float32 0.6489 -0.1787 ... -0.5751\n", " v10 (time, latitude, longitude) float32 0.3738 0.3756 ... 6.851\n", " t2m (time, latitude, longitude) float32 250.3 247.3 ... 265.9 265.7\n", "Attributes:\n", " Conventions: CF-1.6\n", " history: 2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw..." ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ds2 = eco.coarsen_dataset(ds, {'time': 5, 'latitude': 4, 'longitude': 4}, coord_func={\"time\": \"min\"})\n", "ds2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Save the dataset\n", "\n", "The new dataset will be saved to a netcdf file if the outfile argument is provided." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "\n", "
<xarray.Dataset>\n",
       "Dimensions:      (time: 1756, latitude: 10, longitude: 20)\n",
       "Coordinates:\n",
       "  * longitude    (longitude) float32 -129.6 -128.6 -127.6 ... -111.6 -110.6\n",
       "  * latitude     (latitude) float32 59.62 58.62 57.62 ... 52.62 51.62 50.62\n",
       "  * time         (time) datetime64[ns] 2008-01-01T02:00:00 ... 2008-12-31T17:...\n",
       "Data variables:\n",
       "    spatial_ref  int64 0\n",
       "    u10          (time, latitude, longitude) float32 0.6489 -0.1787 ... -0.5751\n",
       "    v10          (time, latitude, longitude) float32 0.3738 0.3756 ... 6.851\n",
       "    t2m          (time, latitude, longitude) float32 250.3 247.3 ... 265.9 265.7\n",
       "Attributes:\n",
       "    Conventions:  CF-1.6\n",
       "    history:      2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw...
" ], "text/plain": [ "\n", "Dimensions: (time: 1756, latitude: 10, longitude: 20)\n", "Coordinates:\n", " * longitude (longitude) float32 -129.6 -128.6 -127.6 ... -111.6 -110.6\n", " * latitude (latitude) float32 59.62 58.62 57.62 ... 52.62 51.62 50.62\n", " * time (time) datetime64[ns] 2008-01-01T02:00:00 ... 2008-12-31T17:...\n", "Data variables:\n", " spatial_ref int64 0\n", " u10 (time, latitude, longitude) float32 0.6489 -0.1787 ... -0.5751\n", " v10 (time, latitude, longitude) float32 0.3738 0.3756 ... 6.851\n", " t2m (time, latitude, longitude) float32 250.3 247.3 ... 265.9 265.7\n", "Attributes:\n", " Conventions: CF-1.6\n", " history: 2022-06-14 00:45:00 GMT by grib_to_netcdf-2.24.3: /opt/ecmw..." ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "outfile = \"../../output/coarse_output.nc\"\n", "eco.coarsen_dataset(ds, \n", " {'time': 5, 'latitude': 4, 'longitude': 4}, \n", " outfile=outfile)" ] } ], "metadata": { "kernelspec": { "display_name": "pmv-dev", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.15" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "5b6f9a2c562b6e60868cbf0c86ab18522a63e76e5fe9fe366e3c27fb9acdc7d5" } } }, "nbformat": 4, "nbformat_minor": 2 }