diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000..b06e6e1 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,4 @@ +[codespell] + +# Ignore really long strings that look like base64-encoded data, e.g. in Jupyter Notebook outputs +ignore-regex = [A-Za-z0-9+/]{100,} diff --git a/02-schedule.md b/02-schedule.md index 61932a4..2ba73b0 100644 --- a/02-schedule.md +++ b/02-schedule.md @@ -36,8 +36,8 @@ Vegetarian options will be available and all ingredients will be listed. ## šŸŒ‡ Afternoon (1:30 - 5:00) | Time | Duration | Topic | Presenter(s) | -| ------- | ---------- | ---------------------------------------------- | --------------- | -| 1:30 PM | 60 minutes | [](modules/04-data-in-the-cloud/index.md) | Max | +| ------- | ---------- | -----------------------------------------------| --------------- | +| 1:30 PM | 60 minutes | [](modules/04-data-in-the-cloud/index.ipynb) | Max | | 2:30 PM | 10 minutes | **Break** | | | 2:40 PM | 60 minutes | [](modules/05-sharing-and-publishing/index.md) | Fernando & Matt | | 3:40 PM | 10 minutes | **Break** | | diff --git a/docker/environment.yml b/docker/environment.yml index 00bf5ee..ac1ef97 100644 --- a/docker/environment.yml +++ b/docker/environment.yml @@ -22,6 +22,14 @@ dependencies: - "scipy" - "xarray" + # Storage + - "s3fs" + - "obstore" + - "fsspec" + - "icechunk" + - "virtualizarr" + - "h5netcdf" + # geo - "gdal" - "geopandas" @@ -51,9 +59,17 @@ dependencies: - "ruff" - "pre-commit" + # Dependency management + - "uv" + - "pixi" + # GitHub utilities - "gh" - "gh-scoped-creds" # Infrastructure - "jupyter-server-proxy" # Enable proxying MyST sites built from CLI, accessed at `$PREFIX/proxy/$PORT` + + - "pip" + - pip: # Stuff not yet on conda forge + - "obspec-utils" diff --git a/modules/04-data-in-the-cloud/aws_regions.jpg b/modules/04-data-in-the-cloud/aws_regions.jpg new file mode 100644 index 0000000..77822e7 Binary files /dev/null and b/modules/04-data-in-the-cloud/aws_regions.jpg differ diff --git a/modules/04-data-in-the-cloud/cloud_data_timings.json b/modules/04-data-in-the-cloud/cloud_data_timings.json new file mode 100644 index 0000000..59177bc --- /dev/null +++ b/modules/04-data-in-the-cloud/cloud_data_timings.json @@ -0,0 +1,62 @@ +{ + "cloud": { + "description": "JupyterHub - us-west-2", + "timestamp": "2025-12-11 16:08:23", + "timings": { + "open": { + "fsspec_default_cache": 467.4452876969999, + "fsspec_block_cache": 65.81539951800005, + "obstore": 127.15990847400008, + "virtualzarr_icechunk": 2.579058487000111 + }, + "spatial_subset_load": { + "fsspec_default_cache": 0.6042853349999859, + "fsspec_block_cache": 0.025397076999979618, + "obstore": 0.2673111340000105, + "virtualzarr_icechunk": 2.010724728000241 + }, + "time_slice_load": { + "fsspec_default_cache": 30.614898986999833, + "fsspec_block_cache": 2.269339702999787, + "obstore": 7.793771065999863, + "virtualzarr_icechunk": 2.974931951999679 + }, + "timeseries_load": { + "fsspec_default_cache": 0.2537525479992837, + "fsspec_block_cache": 4.6087899290005225, + "obstore": 8.809160454999983, + "virtualzarr_icechunk": 1.723680136999974 + } + } + }, + "local": { + "description": "MacBook Pro - Durham, NC", + "timestamp": "2025-12-11 12:18:33", + "timings": { + "open": { + "fsspec_default_cache": 1819.1012450830003, + "fsspec_block_cache": 278.71061658300096, + "obstore": 434.0572458329989, + "virtualzarr_icechunk": 2.463514082999609 + }, + "spatial_subset_load": { + "fsspec_default_cache": 0.6070321250008419, + "fsspec_block_cache": 0.052599582999391714, + "obstore": 0.7011319169996568, + "virtualzarr_icechunk": 2.534569625000586 + }, + "time_slice_load": { + "fsspec_default_cache": 136.6284629999991, + "fsspec_block_cache": 4.599118041998736, + "obstore": 26.773726958001134, + "virtualzarr_icechunk": 5.418655583000145 + }, + "timeseries_load": { + "fsspec_default_cache": 0.1992982080009824, + "fsspec_block_cache": 19.891594749999058, + "obstore": 18.95907412500128, + "virtualzarr_icechunk": 1.2114160420005646 + } + } + } +} diff --git a/modules/04-data-in-the-cloud/index.ipynb b/modules/04-data-in-the-cloud/index.ipynb new file mode 100644 index 0000000..4eebb4d --- /dev/null +++ b/modules/04-data-in-the-cloud/index.ipynb @@ -0,0 +1,3024 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "54832c99", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "---\n", + "authors:\n", + " - name: \"Max Jones\"\n", + " affiliations:\n", + " - \"Development Seed\"\n", + " email: \"max@developmentseed.org\"\n", + " orcid: \"0000-0003-0180-8928\"\n", + " github: \"maxrjones\"\n", + "---\n", + "\n", + "# ā˜ļø 4 - Data in the Cloud 101\n", + "\n", + ":::{tip} 🧭 Where we are going\n", + ":icon: false\n", + "\n", + "By the end of this module, you will be able to:\n", + "\n", + "- Explain the key benefits and pitfalls of working with data on the Cloud\n", + "- Open cloud hosted data in a performant way\n", + "- Scale your analyses using Dask or Cubed\n", + "- Find communities to learn more about cloud native science\n", + "- **Compare performance between local and cloud-based computing**\n", + ":::\n", + "\n", + "\n", + "## Introduction\n", + "\n", + "This notebook is for the workshop ([Open Source Geospatial Workflows in the Cloud](https://geojupyter.github.io/workshop-open-source-geospatial)) presented at the [AGU Fall Meeting 2025](https://agu.confex.com/agu/agu25/meetingapp.cgi/Session/252640).\n", + "\n", + "### šŸ†• Local vs. Cloud Comparison Feature\n", + "\n", + "This notebook supports running in both **local** and **cloud** environments to demonstrate the performance benefits of \"data-proximate computing\". Run it once locally, then again on cloud infrastructure (e.g., AWS us-west-2) to see the dramatic difference in performance when your compute is co-located with your data." + ] + }, + { + "cell_type": "markdown", + "id": "89da0fe3", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "source": [ + "## What is the cloud?\n", + "\n", + "The cloud (as defined by [cloudflare](https://www.cloudflare.com/)) is a distributed collection of servers that host software and infrastructure, and it is accessed over the Internet. The map below (from [salesforce](https://trailhead.salesforce.com/content/learn/modules/aws-cloud-technical-professionals/explore-the-aws-global-infrastructure-technical-professionals)) shows [Amazon Web Service (AWS)](https://aws.amazon.com)'s global distribution of data centers, which contain the resources that make up the AWS cloud. Three large cloud providers in the United States are [AWS](https://aws.amazon.com/), [Google Cloud Platform](https://cloud.google.com/), and [Microsoft Azure](https://azure.microsoft.com/en-us), but other cloud providers are larger elsewhere in the world and there are numerous smaller providers available.\n", + "\n", + "![AWS regions](./aws_regions.jpg)" + ] + }, + { + "cell_type": "markdown", + "id": "ad673655", + "metadata": {}, + "source": [ + "## What makes data on the cloud different?\n", + "\n", + "Hosting data on the cloud differs from storing data locally (or on-premises) in a few important ways:\n", + "\n", + "- Redundancy - you can easily replicate your data across multiple servers, which may be distributed across the globe\n", + "- Reliability - cloud providers offer services for reliability, such as automated backups and recovery\n", + "- Scalability - cloud object storage enables nearly limitless simultaneous access across users/connections, without needing to order or decommission servers or hard-drives\n", + "- Accessibility - anyone in the world, with proper authorization, can rapidly access data shared on the cloud\n", + "\n", + "Gotchas: There are a couple of considerations to be aware of when working with data on the cloud:\n", + "\n", + "- Pay-as-you-go - Most cloud providers use pay-as-you-go pricing, where you only pay for the storage and services that you use. This can potentially reduce costs, especially upfront costs (e.g., you never need to buy a hard drive). However, **you may want to provide indefinite access or you may forget about data in storage, in both cases you may end up continuing to pay for data storage indefinitely**.\n", + "- Time and cost of bringing data to your computer - Hosting the data on the cloud naturally means it's no longer already near your computer's processing resources. Transporting data from the cloud to your computer is expensive, since most cloud providers charge for any data leaving their network, and slow, since the data needs to travel large distances. The primary solution for this is \"data-proximate computing\" which involves running your code on computing resources in the same cloud location as your data. For example, I commonly use NASA data products that are hosted on AWS servers in the 'us-west-2' region, which corresponds to Oregon in the figure above. Following the \"data-proximate computing\" paradigm, I use AWS compute resources that are also in Oregon when working with those data, rather than downloading data to use the computing resources on my laptop in North Carolina. In addition to \"data-proximate computing\", there are many other ways to make working with data on the cloud cheaper and easier. Let's take a look!" + ] + }, + { + "cell_type": "markdown", + "id": "286f73e5", + "metadata": {}, + "source": [ + "## What is cloud-native data?\n", + "\n", + "Cloud-native data are structured for efficient querying across a network. For this 101 tutorial, you can think of \"a network\" as synonymous with \"the internet\". You can learn more about these data in the [CNG data formats guide](https://guide.cloudnativegeo.org/), but here we'll just explore working with data that is, compared to data that isn't, optimized for cloud usage." + ] + }, + { + "cell_type": "markdown", + "id": "0e6efe60", + "metadata": {}, + "source": [ + "### Setup and Helper Functions\n", + "\n", + "First import the necessary libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "0b4d349c", + "metadata": {}, + "outputs": [], + "source": [ + "import fsspec\n", + "import numpy as np\n", + "import xarray as xr\n", + "from time import perf_counter\n", + "import time # for timestamps only\n", + "import warnings\n", + "import json\n", + "import os\n", + "from pathlib import Path\n", + "\n", + "warnings.filterwarnings(\n", + " \"ignore\",\n", + " message=\"Numcodecs codecs are not in the Zarr version 3 specification*\",\n", + " category=UserWarning\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "34f650d4", + "metadata": {}, + "source": [ + "### šŸ†• Environment Configuration\n", + "\n", + "Configure whether you're running locally or on cloud infrastructure. This notebook will save timings to a JSON file so you can compare results from different environments." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "84534bcd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "šŸ–„ļø Running in 'LOCAL' environment\n", + "šŸ“ Description: MacBook Pro - Durham, NC\n" + ] + } + ], + "source": [ + "# =============================================================================\n", + "# ENVIRONMENT CONFIGURATION - MODIFY THIS FOR YOUR RUN\n", + "# =============================================================================\n", + "\n", + "# Set to \"local\" when running on your local machine (not in the cloud)\n", + "# Set to \"cloud\" when running on cloud infrastructure (e.g., AWS us-west-2)\n", + "ENVIRONMENT = \"local\" # Options: \"local\" or \"cloud\"\n", + "\n", + "# Optional: Add a description for this run (e.g., your location, machine specs)\n", + "RUN_DESCRIPTION = \"MacBook Pro - Durham, NC\"\n", + "\n", + "# File to store timing results for comparison\n", + "TIMINGS_FILE = Path(\"cloud_data_timings.json\")\n", + "\n", + "print(f\"šŸ–„ļø Running in '{ENVIRONMENT.upper()}' environment\")\n", + "print(f\"šŸ“ Description: {RUN_DESCRIPTION}\")" + ] + }, + { + "cell_type": "markdown", + "id": "a7b5e1c1", + "metadata": {}, + "source": [ + "Set up timing dictionary and helper functions:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "44da9087", + "metadata": {}, + "outputs": [], + "source": [ + "# Dictionary to store timings for current run\n", + "timings = {\n", + " 'open': {},\n", + " 'spatial_subset_load': {},\n", + " 'time_slice_load': {},\n", + " 'timeseries_load': {},\n", + "}\n", + "\n", + "# Constants for consistent test parameters\n", + "SPATIAL_SUBSET_KWARGS = {\"time\": \"2001-01-02\", \"lat\": slice(10, 15), \"lon\": slice(-60, -55)}\n", + "TIME_SLICE_KWARGS = {\"time\": \"2001-01-10\"}\n", + "SPATIAL_POINT_KWARGS = {\"lat\": 45, \"lon\": -150, \"method\": \"nearest\"}\n", + "N_FILES = 30\n", + "\n", + "def record_timing(category, method, elapsed_time):\n", + " \"\"\"Helper to record timing results (using perf_counter for precision).\"\"\"\n", + " timings[category][method] = elapsed_time\n", + " print(f\" ā±ļø {category} / {method}: {elapsed_time:.2f}s\")\n", + "\n", + "\n", + "def benchmark_method(ds, method_name, n_files=N_FILES):\n", + " \"\"\"Run all benchmark tests on a dataset and record timings.\"\"\"\n", + " \n", + " start = perf_counter()\n", + " data = ds['Tair'].sel(**SPATIAL_SUBSET_KWARGS).load()\n", + " record_timing('spatial_subset_load', method_name, perf_counter() - start)\n", + "\n", + " start = perf_counter()\n", + " data = ds['Tair'].sel(**TIME_SLICE_KWARGS).load()\n", + " record_timing('time_slice_load', method_name, perf_counter() - start)\n", + "\n", + " start = perf_counter()\n", + " data = ds['Tair'].sel(**SPATIAL_POINT_KWARGS).isel(time=slice(0, n_files)).load()\n", + " record_timing('timeseries_load', method_name, perf_counter() - start)\n", + " \n", + " return data\n", + "\n", + "\n", + "def print_summary(method_name):\n", + " \"\"\"Print a summary of timings for a method.\"\"\"\n", + " total = sum(timings[op].get(method_name, 0) for op in timings.keys())\n", + " print(f\"\\n ā”Œ{'─'*50}\")\n", + " print(f\" │ šŸ“‹ Summary for {method_name}\")\n", + " print(f\" ā”œ{'─'*50}\")\n", + " print(f\" │ Open: {timings['open'].get(method_name, 0):>8.2f}s\")\n", + " print(f\" │ Spatial subset: {timings['spatial_subset_load'].get(method_name, 0):>8.2f}s\")\n", + " print(f\" │ Time slice: {timings['time_slice_load'].get(method_name, 0):>8.2f}s\")\n", + " print(f\" │ Time series: {timings['timeseries_load'].get(method_name, 0):>8.2f}s\")\n", + " print(f\" ā”œ{'─'*50}\")\n", + " print(f\" │ TOTAL: {total:>8.2f}s\")\n", + " print(f\" ā””{'─'*50}\\n\")\n", + "\n", + "\n", + "def load_all_timings():\n", + " \"\"\"Load all saved timing results from file.\"\"\"\n", + " if TIMINGS_FILE.exists():\n", + " with open(TIMINGS_FILE, 'r') as f:\n", + " return json.load(f)\n", + " return {}\n", + "\n", + "\n", + "def save_current_timings():\n", + " \"\"\"Save current timing results to file.\"\"\"\n", + " all_timings = load_all_timings()\n", + " \n", + " # Create entry for this run\n", + " run_entry = {\n", + " 'description': RUN_DESCRIPTION,\n", + " 'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),\n", + " 'timings': timings\n", + " }\n", + " \n", + " # Store under environment key\n", + " all_timings[ENVIRONMENT] = run_entry\n", + " \n", + " with open(TIMINGS_FILE, 'w') as f:\n", + " json.dump(all_timings, f, indent=2)\n", + " \n", + " print(f\"\\nāœ… Timings saved to {TIMINGS_FILE}\")\n", + " print(f\" Environment: {ENVIRONMENT}\")\n", + " print(f\" Timestamp: {run_entry['timestamp']}\")\n", + "\n", + "\n", + "def get_saved_environments():\n", + " \"\"\"Get list of environments with saved timings.\"\"\"\n", + " all_timings = load_all_timings()\n", + " return list(all_timings.keys())" + ] + }, + { + "cell_type": "markdown", + "id": "09d4e400", + "metadata": {}, + "source": [ + "List the files available following this pattern on AWS S3 storage:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a51c6f89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'s3://nasa-waterinsight/NLDAS3/forcing/daily/200101/NLDAS_FOR0010_D.A20010101.030.beta.nc'" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fs = fsspec.filesystem('s3', anon=True)\n", + "nldas_files = fs.glob('s3://nasa-waterinsight/NLDAS3/forcing/daily/**/*.nc')\n", + "nldas_files = sorted(['s3://'+f for f in nldas_files])\n", + "nldas_files[0]" + ] + }, + { + "cell_type": "markdown", + "id": "252e7449", + "metadata": {}, + "source": [ + "### Opening archival data with fsspec + h5netcdf" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "0c84f267", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š Testing: fsspec + h5netcdf (default cache)\n", + " ā±ļø open / fsspec_default_cache: 1819.10s\n", + " ā±ļø spatial_subset_load / fsspec_default_cache: 0.61s\n", + " ā±ļø time_slice_load / fsspec_default_cache: 136.63s\n", + " ā±ļø timeseries_load / fsspec_default_cache: 0.20s\n", + "\n", + " ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ šŸ“‹ Summary for fsspec_default_cache\n", + " ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ Open: 1819.10s\n", + " │ Spatial subset: 0.61s\n", + " │ Time slice: 136.63s\n", + " │ Time series: 0.20s\n", + " ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ TOTAL: 1956.54s\n", + " └──────────────────────────────────────────────────\n", + "\n" + ] + } + ], + "source": [ + "print(f\"\\nšŸ“Š Testing: fsspec + h5netcdf (default cache)\")\n", + "start = perf_counter()\n", + "fs = fsspec.filesystem('s3', anon=True)\n", + "file_objs = [fs.open(f) for f in nldas_files[:N_FILES]]\n", + "ds = xr.open_mfdataset(file_objs, engine=\"h5netcdf\", combine=\"nested\", concat_dim=\"time\")\n", + "record_timing('open', 'fsspec_default_cache', perf_counter() - start)\n", + "\n", + "benchmark_method(ds, 'fsspec_default_cache')\n", + "print_summary('fsspec_default_cache')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "6cc41725", + "metadata": {}, + "outputs": [], + "source": [ + "[f.close() for f in file_objs]\n", + "fs.clear_instance_cache()\n", + "del fs, file_objs, ds" + ] + }, + { + "cell_type": "markdown", + "id": "ad2c97f0", + "metadata": {}, + "source": [ + "This took a lot of time to open the file. Let's look how we can speed that up by configuring the caching strategy:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bd8645bc", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š Testing: fsspec + h5netcdf (block cache)\n", + " ā±ļø open / fsspec_block_cache: 278.71s\n", + " ā±ļø spatial_subset_load / fsspec_block_cache: 0.05s\n", + " ā±ļø time_slice_load / fsspec_block_cache: 4.60s\n", + " ā±ļø timeseries_load / fsspec_block_cache: 19.89s\n", + "\n", + " ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ šŸ“‹ Summary for fsspec_block_cache\n", + " ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ Open: 278.71s\n", + " │ Spatial subset: 0.05s\n", + " │ Time slice: 4.60s\n", + " │ Time series: 19.89s\n", + " ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ TOTAL: 303.25s\n", + " └──────────────────────────────────────────────────\n", + "\n" + ] + } + ], + "source": [ + "print(f\"\\nšŸ“Š Testing: fsspec + h5netcdf (block cache)\")\n", + "start = perf_counter()\n", + "fsspec_caching = {\n", + " \"cache_type\": \"blockcache\",\n", + " \"block_size\": 1024 * 1024 * 8,\n", + "}\n", + "fs = fsspec.filesystem('s3', anon=True)\n", + "file_objs = [fs.open(f, **fsspec_caching) for f in nldas_files[:N_FILES]]\n", + "ds = xr.open_mfdataset(file_objs, engine=\"h5netcdf\", combine=\"nested\", concat_dim=\"time\")\n", + "record_timing('open', 'fsspec_block_cache', perf_counter() - start)\n", + "\n", + "benchmark_method(ds, 'fsspec_block_cache')\n", + "print_summary('fsspec_block_cache')" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cf5239ab", + "metadata": {}, + "outputs": [], + "source": [ + "[f.close() for f in file_objs]\n", + "fs.clear_instance_cache()\n", + "del fs, file_objs, ds" + ] + }, + { + "cell_type": "markdown", + "id": "57a8ae8f", + "metadata": {}, + "source": [ + "### Opening archival data using VirtualiZarr + Icechunk\n", + "\n", + "Now, for the really cool part! Using [VirtualiZarr](https://virtualizarr.readthedocs.io/) + [Icechunk](https://icechunk.io/), we can rapidly open not just that file but all of the files included in the NLDAS3 dataset! In less than 2 seconds, we can have a lazy view of a dataset that contains 24 years of data. People will often use the term \"lazy loading\" when an operation loads metadata from a storage location, but does not load any actual data. Without the cloud-native adaptation virtual Zarr, it's not possible for a software library to determine how much data it should load from disk to get all the necessary metadata. Virtual Zarr is a faster, cheaper, and easier way to work with data on the cloud :rocket:." + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "7a0ca8fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import icechunk\n", + "import zarr\n", + "import xarray as xr\n", + "\n", + "zarr.config.set({'threading.max_workers': 32, 'async.concurrency': 128})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "cf0d2e56", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š Testing: VirtualiZarr + Icechunk\n", + " ā±ļø open / virtualzarr_icechunk: 2.46s\n", + " ā±ļø spatial_subset_load / virtualzarr_icechunk: 2.53s\n", + " ā±ļø time_slice_load / virtualzarr_icechunk: 5.42s\n", + " ā±ļø timeseries_load / virtualzarr_icechunk: 1.21s\n", + "\n", + " ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ šŸ“‹ Summary for virtualzarr_icechunk\n", + " ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ Open: 2.46s\n", + " │ Spatial subset: 2.53s\n", + " │ Time slice: 5.42s\n", + " │ Time series: 1.21s\n", + " ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€\n", + " │ TOTAL: 11.63s\n", + " └──────────────────────────────────────────────────\n", + "\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
<xarray.Dataset> Size: 51TB\n",
+       "Dimensions:   (time: 8399, lat: 6500, lon: 11700)\n",
+       "Coordinates:\n",
+       "  * time      (time) datetime64[ns] 67kB 2001-01-02 2001-01-03 ... 2024-01-01\n",
+       "  * lat       (lat) float64 52kB 7.005 7.015 7.025 7.035 ... 71.97 71.98 71.99\n",
+       "  * lon       (lon) float64 94kB -169.0 -169.0 -169.0 ... -52.03 -52.01 -52.0\n",
+       "Data variables:\n",
+       "    LWdown    (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    PSurf     (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Rainf     (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Wind_N    (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Qair      (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Tair      (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Wind_E    (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    SWdown    (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Tair_max  (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "    Tair_min  (time, lat, lon) float64 5TB dask.array<chunksize=(1, 500, 900), meta=np.ndarray>\n",
+       "Attributes: (12/17)\n",
+       "    missing_value:          -9999.0\n",
+       "    time_definition:        daily\n",
+       "    shortname:              NLDAS_FOR0010_D_3.0\n",
+       "    title:                  NLDAS Forcing Data L4 Daily 0.01 x 0.01 degree V3...\n",
+       "    version:                3.0 beta\n",
+       "    institution:            NASA GSFC\n",
+       "    ...                     ...\n",
+       "    websites:               https://ldas.gsfc.nasa.gov/nldas/v3/ ; https://li...\n",
+       "    MAP_PROJECTION:         EQUIDISTANT CYLINDRICAL\n",
+       "    SOUTH_WEST_CORNER_LAT:  7.005000114440918\n",
+       "    SOUTH_WEST_CORNER_LON:  -168.9949951171875\n",
+       "    DX:                     0.009999999776482582\n",
+       "    DY:                     0.009999999776482582
" + ], + "text/plain": [ + " Size: 51TB\n", + "Dimensions: (time: 8399, lat: 6500, lon: 11700)\n", + "Coordinates:\n", + " * time (time) datetime64[ns] 67kB 2001-01-02 2001-01-03 ... 2024-01-01\n", + " * lat (lat) float64 52kB 7.005 7.015 7.025 7.035 ... 71.97 71.98 71.99\n", + " * lon (lon) float64 94kB -169.0 -169.0 -169.0 ... -52.03 -52.01 -52.0\n", + "Data variables:\n", + " LWdown (time, lat, lon) float64 5TB dask.array\n", + " PSurf (time, lat, lon) float64 5TB dask.array\n", + " Rainf (time, lat, lon) float64 5TB dask.array\n", + " Wind_N (time, lat, lon) float64 5TB dask.array\n", + " Qair (time, lat, lon) float64 5TB dask.array\n", + " Tair (time, lat, lon) float64 5TB dask.array\n", + " Wind_E (time, lat, lon) float64 5TB dask.array\n", + " SWdown (time, lat, lon) float64 5TB dask.array\n", + " Tair_max (time, lat, lon) float64 5TB dask.array\n", + " Tair_min (time, lat, lon) float64 5TB dask.array\n", + "Attributes: (12/17)\n", + " missing_value: -9999.0\n", + " time_definition: daily\n", + " shortname: NLDAS_FOR0010_D_3.0\n", + " title: NLDAS Forcing Data L4 Daily 0.01 x 0.01 degree V3...\n", + " version: 3.0 beta\n", + " institution: NASA GSFC\n", + " ... ...\n", + " websites: https://ldas.gsfc.nasa.gov/nldas/v3/ ; https://li...\n", + " MAP_PROJECTION: EQUIDISTANT CYLINDRICAL\n", + " SOUTH_WEST_CORNER_LAT: 7.005000114440918\n", + " SOUTH_WEST_CORNER_LON: -168.9949951171875\n", + " DX: 0.009999999776482582\n", + " DY: 0.009999999776482582" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "print(f\"\\nšŸ“Š Testing: VirtualiZarr + Icechunk\")\n", + "start = perf_counter()\n", + "storage = icechunk.s3_storage(\n", + " bucket='nasa-waterinsight',\n", + " prefix=f\"virtual-zarr-store/NLDAS-3-icechunk\",\n", + " region=\"us-west-2\",\n", + " anonymous=True,\n", + ")\n", + "\n", + "chunk_url = \"s3://nasa-waterinsight/NLDAS3/forcing/daily/\"\n", + "virtual_credentials = icechunk.containers_credentials({\n", + " chunk_url: icechunk.s3_anonymous_credentials()\n", + "})\n", + "\n", + "repo = icechunk.Repository.open(\n", + " storage=storage,\n", + " authorize_virtual_chunk_access=virtual_credentials,\n", + ")\n", + "\n", + "session = repo.readonly_session('main')\n", + "ds = xr.open_zarr(session.store, consolidated=False, zarr_format=3, chunks={})\n", + "record_timing('open', 'virtualzarr_icechunk', perf_counter() - start)\n", + "\n", + "benchmark_method(ds, 'virtualzarr_icechunk')\n", + "print_summary('virtualzarr_icechunk')\n", + "ds" + ] + }, + { + "cell_type": "markdown", + "id": "9c499d4a", + "metadata": {}, + "source": [ + "### Save Timings for This Environment" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "884f4f48", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "āœ… Timings saved to cloud_data_timings.json\n", + " Environment: local\n", + " Timestamp: 2025-12-11 12:18:33\n", + "\n", + "šŸ“ Saved timing data available for: ['cloud', 'local']\n" + ] + } + ], + "source": [ + "# Save the timing results for this environment\n", + "save_current_timings()\n", + "\n", + "# Show what environments we have data for\n", + "saved_envs = get_saved_environments()\n", + "print(f\"\\nšŸ“ Saved timing data available for: {saved_envs}\")" + ] + }, + { + "cell_type": "markdown", + "id": "554035bf", + "metadata": {}, + "source": [ + "## šŸ†• Local vs. Cloud Performance Comparison\n", + "\n", + "After running this notebook in both environments, run this section to see the performance differences." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "dbce4bc9", + "metadata": {}, + "outputs": [], + "source": [ + "def create_comparison_report():\n", + " \"\"\"Create a comprehensive comparison between local and cloud runs.\"\"\"\n", + " all_timings = load_all_timings()\n", + " \n", + " if len(all_timings) < 2:\n", + " missing = []\n", + " if 'local' not in all_timings:\n", + " missing.append('local')\n", + " if 'cloud' not in all_timings:\n", + " missing.append('cloud')\n", + " print(f\"āš ļø Need timing data from both environments!\")\n", + " print(f\" Missing: {missing}\")\n", + " print(f\" Available: {list(all_timings.keys())}\")\n", + " print(f\"\\n To complete the comparison:\")\n", + " print(f\" 1. Run this notebook with ENVIRONMENT = 'local' on your local machine\")\n", + " print(f\" 2. Run this notebook with ENVIRONMENT = 'cloud' on cloud infrastructure (e.g., AWS us-west-2)\")\n", + " print(f\" 3. Copy the {TIMINGS_FILE} file between environments or manually combine results\")\n", + " return None\n", + " \n", + " return all_timings\n", + "\n", + "all_timings = create_comparison_report()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "41e19522", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "šŸ“Š Chart saved to 'local_vs_cloud_comparison.png'\n" + ] + } + ], + "source": [ + "def plot_comparison(all_timings):\n", + " \"\"\"Create a visual comparison of local vs cloud performance.\"\"\"\n", + " if all_timings is None:\n", + " return\n", + " \n", + " import matplotlib.pyplot as plt\n", + " \n", + " methods = ['fsspec_default_cache', 'fsspec_block_cache', 'virtualzarr_icechunk']\n", + " method_labels = ['fsspec\\n(default)', 'fsspec\\n(block)', 'VirtualiZarr\\n+ Icechunk']\n", + " \n", + " # Calculate total times for each method\n", + " local_totals = []\n", + " cloud_totals = []\n", + " \n", + " for method in methods:\n", + " local_total = sum(\n", + " all_timings.get('local', {}).get('timings', {}).get(op, {}).get(method, 0)\n", + " for op in ['open', 'spatial_subset_load', 'time_slice_load', 'timeseries_load']\n", + " )\n", + " cloud_total = sum(\n", + " all_timings.get('cloud', {}).get('timings', {}).get(op, {}).get(method, 0)\n", + " for op in ['open', 'spatial_subset_load', 'time_slice_load', 'timeseries_load']\n", + " )\n", + " local_totals.append(local_total)\n", + " cloud_totals.append(cloud_total)\n", + " \n", + " # Create bar chart\n", + " x = np.arange(len(methods))\n", + " width = 0.35\n", + " \n", + " fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))\n", + " \n", + " # Total time comparison\n", + " bars1 = ax1.bar(x - width/2, local_totals, width, label='Local', color='#ff6b6b', alpha=0.8)\n", + " bars2 = ax1.bar(x + width/2, cloud_totals, width, label='Cloud (in-region)', color='#4ecdc4', alpha=0.8)\n", + " \n", + " ax1.set_ylabel('Total Time (seconds)', fontsize=12)\n", + " ax1.set_title('Total Execution Time: Local vs Cloud', fontsize=14, fontweight='bold')\n", + " ax1.set_xticks(x)\n", + " ax1.set_xticklabels(method_labels, fontsize=10)\n", + " ax1.legend()\n", + " ax1.grid(axis='y', alpha=0.3)\n", + " \n", + " # Add value labels on bars\n", + " for bar in bars1:\n", + " height = bar.get_height()\n", + " ax1.annotate(f'{height:.1f}s',\n", + " xy=(bar.get_x() + bar.get_width() / 2, height),\n", + " xytext=(0, 3),\n", + " textcoords=\"offset points\",\n", + " ha='center', va='bottom', fontsize=9)\n", + " for bar in bars2:\n", + " height = bar.get_height()\n", + " ax1.annotate(f'{height:.1f}s',\n", + " xy=(bar.get_x() + bar.get_width() / 2, height),\n", + " xytext=(0, 3),\n", + " textcoords=\"offset points\",\n", + " ha='center', va='bottom', fontsize=9)\n", + " \n", + " # Speedup comparison\n", + " speedups = [l/c if c > 0 else 0 for l, c in zip(local_totals, cloud_totals)]\n", + " colors = ['#2ecc71' if s > 1 else '#e74c3c' for s in speedups]\n", + " bars3 = ax2.bar(x, speedups, color=colors, alpha=0.8)\n", + " ax2.axhline(y=1, color='gray', linestyle='--', alpha=0.7, label='No speedup')\n", + " ax2.set_ylabel('Speedup Factor (Local Time / Cloud Time)', fontsize=12)\n", + " ax2.set_title('Cloud Speedup by Method', fontsize=14, fontweight='bold')\n", + " ax2.set_xticks(x)\n", + " ax2.set_xticklabels(method_labels, fontsize=10)\n", + " ax2.grid(axis='y', alpha=0.3)\n", + " \n", + " # Add value labels\n", + " for bar, speedup in zip(bars3, speedups):\n", + " height = bar.get_height()\n", + " ax2.annotate(f'{speedup:.1f}x',\n", + " xy=(bar.get_x() + bar.get_width() / 2, height),\n", + " xytext=(0, 3),\n", + " textcoords=\"offset points\",\n", + " ha='center', va='bottom', fontsize=11, fontweight='bold')\n", + " \n", + " plt.tight_layout()\n", + " plt.savefig('local_vs_cloud_comparison.png', dpi=150, bbox_inches='tight')\n", + " plt.show()\n", + " \n", + " print(\"\\nšŸ“Š Chart saved to 'local_vs_cloud_comparison.png'\")\n", + "\n", + "if all_timings:\n", + " plot_comparison(all_timings)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "a27f1458", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "================================================================================\n", + "šŸ’” KEY INSIGHTS\n", + "================================================================================\n", + "\n", + "šŸ“ˆ Average speedup from data-proximate computing: 3.1x\n", + "šŸ† Best speedup: fsspec_block_cache (4.2x faster on cloud)\n", + "\n", + "ā±ļø Total time (all methods):\n", + " Local: 2271.4 seconds\n", + " Cloud: 580.9 seconds\n", + " Saved: 1690.5 seconds (74% reduction)\n", + "\n", + "šŸŽÆ Key takeaway: Running your code on cloud infrastructure\n", + " co-located with your data can dramatically improve performance!\n" + ] + } + ], + "source": [ + "def print_key_insights(all_timings):\n", + " \"\"\"Print key insights from the comparison.\"\"\"\n", + " if all_timings is None:\n", + " return\n", + " \n", + " print(\"\\n\" + \"=\"*80)\n", + " print(\"šŸ’” KEY INSIGHTS\")\n", + " print(\"=\"*80)\n", + " \n", + " # Calculate average speedup\n", + " speedups = []\n", + " methods = ['fsspec_default_cache', 'fsspec_block_cache', 'virtualzarr_icechunk']\n", + " \n", + " for method in methods:\n", + " local_total = sum(\n", + " all_timings.get('local', {}).get('timings', {}).get(op, {}).get(method, 0)\n", + " for op in ['open', 'spatial_subset_load', 'time_slice_load', 'timeseries_load']\n", + " )\n", + " cloud_total = sum(\n", + " all_timings.get('cloud', {}).get('timings', {}).get(op, {}).get(method, 0)\n", + " for op in ['open', 'spatial_subset_load', 'time_slice_load', 'timeseries_load']\n", + " )\n", + " if cloud_total > 0:\n", + " speedups.append((method, local_total / cloud_total, local_total, cloud_total))\n", + " \n", + " if speedups:\n", + " avg_speedup = sum(s[1] for s in speedups) / len(speedups)\n", + " max_speedup = max(speedups, key=lambda x: x[1])\n", + " \n", + " print(f\"\\nšŸ“ˆ Average speedup from data-proximate computing: {avg_speedup:.1f}x\")\n", + " print(f\"šŸ† Best speedup: {max_speedup[0]} ({max_speedup[1]:.1f}x faster on cloud)\")\n", + " \n", + " # Time saved\n", + " total_local = sum(s[2] for s in speedups)\n", + " total_cloud = sum(s[3] for s in speedups)\n", + " time_saved = total_local - total_cloud\n", + " \n", + " print(f\"\\nā±ļø Total time (all methods):\")\n", + " print(f\" Local: {total_local:.1f} seconds\")\n", + " print(f\" Cloud: {total_cloud:.1f} seconds\")\n", + " print(f\" Saved: {time_saved:.1f} seconds ({(time_saved/total_local)*100:.0f}% reduction)\")\n", + " \n", + " print(\"\\nšŸŽÆ Key takeaway: Running your code on cloud infrastructure\")\n", + " print(\" co-located with your data can dramatically improve performance!\")\n", + "\n", + "if all_timings:\n", + " print_key_insights(all_timings)" + ] + }, + { + "cell_type": "markdown", + "id": "a3e7d59c", + "metadata": {}, + "source": [ + "## Takeaways\n", + "\n", + "- When working on the cloud, try to find computing resources that are \"in-region\" to the data you're working with.\n", + "- File formats matter - consider using virtual Zarr if your data are not already \"cloud-optimized\".\n", + "- File access patterns matter - the default arguments for reading data from the cloud may be very slow! You can customize the configuration for better performance.\n", + "- **šŸ†• Data-proximate computing can provide dramatic speedups compared to working locally!**" + ] + }, + { + "cell_type": "markdown", + "id": "60e8db4b", + "metadata": {}, + "source": [ + "## References\n", + "\n", + "- [Cloud-Optimized Geospatial Formats Guide](https://guide.cloudnativegeo.org/)\n", + "- [Xarray Tutorial - Zarr in Cloud Object Storage](https://tutorial.xarray.dev/intermediate/remote_data/cmip6-cloud.html)\n", + "- [Xarray Tutorial - Access Patterns to Remote Data with fsspec](https://tutorial.xarray.dev/intermediate/remote_data/cmip6-cloud.html)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "04-data-in-the-cloud", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/modules/04-data-in-the-cloud/index.md b/modules/04-data-in-the-cloud/index.md deleted file mode 100644 index 0bbc4e3..0000000 --- a/modules/04-data-in-the-cloud/index.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -authors: - - name: "Max Jones" - affiliations: - - "Development Seed" - email: "max@developmentseed.org" - orcid: "0000-0003-0180-8928" - github: "maxrjones" ---- - -# ā˜ļø 4 - Data in the Cloud 101 - -:::{note} šŸ› Slides -:icon: false -:class: dropdown - - -::: diff --git a/modules/04-data-in-the-cloud/local_vs_cloud_comparison.png b/modules/04-data-in-the-cloud/local_vs_cloud_comparison.png new file mode 100644 index 0000000..74b5ffd Binary files /dev/null and b/modules/04-data-in-the-cloud/local_vs_cloud_comparison.png differ diff --git a/modules/04-data-in-the-cloud/pyproject.toml b/modules/04-data-in-the-cloud/pyproject.toml new file mode 100644 index 0000000..98728bb --- /dev/null +++ b/modules/04-data-in-the-cloud/pyproject.toml @@ -0,0 +1,41 @@ +[project] +name = "04-data-in-the-cloud" +version = "0.1.0" +description = "Dependencies for module 4 - Data in the Cloud" +readme = "README.md" +requires-python = ">=3.11,<3.13" +dependencies = [ + "xarray>=2025.7.1", + "pandas>=2.3.1", + "numpy>=2.2.6,<2.3", + "scipy>=1.16.1", + "netCDF4>=1.7.2", + "cftime>=1.6.4", + "bottleneck>=1.5.0", + "dask>=2025.7.0", + "distributed>=2025.7.0", + "matplotlib>=3.10.5", + "cartopy>=0.25.0", + "numbagg>=0.9.0", + "pint>=0.24.4", + "sparse>=0.17.0", + "flox>=0.10.4", + "h5netcdf>=1.6.4,<1.8", + "h5py>=3.14.0,<3.15", + "zarr>=3.1.1,<3.2", + "fsspec>=2025.7.0,<2025.11", + "cubed-xarray>=0.0.8", + "cubed[diagnostics]>=0.23.0", + "icechunk>=1.1.12", + "obspec-utils>=0.2.0", + "obstore>=0.8.2", + "s3fs>=2025.10.0", + "virtualizarr>=2.2.1", +] + +[dependency-groups] +dev = [ + "ipykernel>=7.1.0", + "jupytext>=1.18.1", + "ipython>=9.4.0,<9.5", +] diff --git a/myst.yml b/myst.yml index 6e2a459..784c362 100644 --- a/myst.yml +++ b/myst.yml @@ -30,6 +30,7 @@ project: children: - pattern: "for-instructors/*.md" + site: template: "book-theme" actions: diff --git a/pixi.lock b/pixi.lock index 7c95e3a..be9624a 100644 --- a/pixi.lock +++ b/pixi.lock @@ -8,32 +8,66 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2 - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.10.5-hbd8a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.6-hb03c661_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-ha97dd6f_2.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.1-hecca717_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-h767d61c_7.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_7.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-h767d61c_7.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_104.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.2.0-hb03c661_1.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_16.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.50.4-h0c1763c_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h8f9b012_7.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-h4852527_7.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-he9a06e4_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.67.0-had1ee68_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.1-h0c1763c_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_16.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-h5347b49_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libuv-1.51.0-hb03c661_1.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda - conda: https://conda.anaconda.org/conda-forge/noarch/mystmd-1.7.0-pyhcf101f3_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/nodejs-24.9.0-heeeca48_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.4-h26f9b46_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.0-h5989046_101_cp314.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/nodejs-24.10.0-h36edbcc_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_100_cp314.conda - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/readline-8.2-h8c095d6_2.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_hd72426e_102.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda + osx-arm64: + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-hd037594_8.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.34.6-hc919400_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/icu-75.1-hfee45f7_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlicommon-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlidec-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlienc-1.2.0-hc919400_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-21.1.7-hf598326_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libev-4.33-h93a5062_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.7.3-haf25636_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.5.2-he5f378a_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.8.1-h39f12f2_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libmpdec-4.0.0-h5505292_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libnghttp2-1.67.0-hc438710_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.51.1-h9a5124b_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libuv-1.51.0-h6caf38d_1.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-h8359307_2.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/mystmd-1.7.0-pyhcf101f3_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-h5e97a16_3.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/nodejs-24.10.0-h64c5147_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.6.0-h5503f6c_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.14.2-h40d2674_100_cp314.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h1d1bf99_2.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h892fb3f_3.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.7-hbf9d68e_6.conda packages: - conda: https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2 sha256: fe51de6107f9edc7aa4f786a70f4a883943bc9d39b3bb7307c04c41410990726 @@ -64,14 +98,42 @@ packages: license_family: BSD size: 260341 timestamp: 1757437258798 -- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.10.5-hbd8a1cb_0.conda - sha256: 3b5ad78b8bb61b6cdc0978a6a99f8dfb2cc789a451378d054698441005ecbdb6 - md5: f9e5fbc24009179e8b0409624691758a +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-hd037594_8.conda + sha256: b456200636bd5fecb2bec63f7e0985ad2097cf1b83d60ce0b6968dffa6d02aa1 + md5: 58fd217444c2a5701a44244faf518206 + depends: + - __osx >=11.0 + license: bzip2-1.0.6 + license_family: BSD + size: 125061 + timestamp: 1757437486465 +- conda: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.6-hb03c661_0.conda + sha256: cc9accf72fa028d31c2a038460787751127317dcfa991f8d1f1babf216bb454e + md5: 920bb03579f15389b9e512095ad995b7 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + license: MIT + license_family: MIT + size: 207882 + timestamp: 1765214722852 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.34.6-hc919400_0.conda + sha256: 2995f2aed4e53725e5efbc28199b46bf311c3cab2648fc4f10c2227d6d5fa196 + md5: bcb3cba70cf1eec964a03b4ba7775f01 + depends: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 180327 + timestamp: 1765215064054 +- conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.11.12-hbd8a1cb_0.conda + sha256: b986ba796d42c9d3265602bc038f6f5264095702dd546c14bc684e60c385e773 + md5: f0991f0f84902f6b6009b4d2350a83aa depends: - __unix license: ISC - size: 155907 - timestamp: 1759649036195 + size: 152432 + timestamp: 1762967197890 - conda: https://conda.anaconda.org/conda-forge/linux-64/icu-75.1-he02047a_0.conda sha256: 71e750d509f5fa3421087ba88ef9a7b9be11c53174af3aa4d06aff4c18b38e8e md5: 8b189310083baabfb622af68fd9d3ae3 @@ -83,70 +145,183 @@ packages: license_family: MIT size: 12129203 timestamp: 1720853576813 -- conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-ha97dd6f_2.conda - sha256: 707dfb8d55d7a5c6f95c772d778ef07a7ca85417d9971796f7d3daad0b615de8 - md5: 14bae321b8127b63cba276bd53fac237 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/icu-75.1-hfee45f7_0.conda + sha256: 9ba12c93406f3df5ab0a43db8a4b4ef67a5871dfd401010fbe29b218b2cbe620 + md5: 5eb22c1d7b3fc4abb50d92d621583137 + depends: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 11857802 + timestamp: 1720853997952 +- conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.45-default_hbd61a6d_104.conda + sha256: 9e191baf2426a19507f1d0a17be0fdb7aa155cdf0f61d5a09c808e0a69464312 + md5: a6abd2796fc332536735f68ba23f7901 depends: - __glibc >=2.17,<3.0.a0 + - zstd >=1.5.7,<1.6.0a0 constrains: - - binutils_impl_linux-64 2.44 + - binutils_impl_linux-64 2.45 license: GPL-3.0-only license_family: GPL - size: 747158 - timestamp: 1758810907507 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.1-hecca717_0.conda - sha256: da2080da8f0288b95dd86765c801c6e166c4619b910b11f9a8446fb852438dc2 - md5: 4211416ecba1866fab0c6470986c22d6 + size: 725545 + timestamp: 1764007826689 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlicommon-1.2.0-hb03c661_1.conda + sha256: 318f36bd49ca8ad85e6478bd8506c88d82454cc008c1ac1c6bf00a3c42fa610e + md5: 72c8fd1af66bd67bf580645b426513ed depends: - __glibc >=2.17,<3.0.a0 - libgcc >=14 + license: MIT + license_family: MIT + size: 79965 + timestamp: 1764017188531 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlicommon-1.2.0-hc919400_1.conda + sha256: a7cb9e660531cf6fbd4148cff608c85738d0b76f0975c5fc3e7d5e92840b7229 + md5: 006e7ddd8a110771134fcc4e1e3a6ffa + depends: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 79443 + timestamp: 1764017945924 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlidec-1.2.0-hb03c661_1.conda + sha256: 12fff21d38f98bc446d82baa890e01fd82e3b750378fedc720ff93522ffb752b + md5: 366b40a69f0ad6072561c1d09301c886 + depends: + - __glibc >=2.17,<3.0.a0 + - libbrotlicommon 1.2.0 hb03c661_1 + - libgcc >=14 + license: MIT + license_family: MIT + size: 34632 + timestamp: 1764017199083 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlidec-1.2.0-hc919400_1.conda + sha256: 2eae444039826db0454b19b52a3390f63bfe24f6b3e63089778dd5a5bf48b6bf + md5: 079e88933963f3f149054eec2c487bc2 + depends: + - __osx >=11.0 + - libbrotlicommon 1.2.0 hc919400_1 + license: MIT + license_family: MIT + size: 29452 + timestamp: 1764017979099 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libbrotlienc-1.2.0-hb03c661_1.conda + sha256: a0c15c79997820bbd3fbc8ecf146f4fe0eca36cc60b62b63ac6cf78857f1dd0d + md5: 4ffbb341c8b616aa2494b6afb26a0c5f + depends: + - __glibc >=2.17,<3.0.a0 + - libbrotlicommon 1.2.0 hb03c661_1 + - libgcc >=14 + license: MIT + license_family: MIT + size: 298378 + timestamp: 1764017210931 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libbrotlienc-1.2.0-hc919400_1.conda + sha256: 01436c32bb41f9cb4bcf07dda647ce4e5deb8307abfc3abdc8da5317db8189d1 + md5: b2b7c8288ca1a2d71ff97a8e6a1e8883 + depends: + - __osx >=11.0 + - libbrotlicommon 1.2.0 hc919400_1 + license: MIT + license_family: MIT + size: 290754 + timestamp: 1764018009077 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libcxx-21.1.7-hf598326_0.conda + sha256: 4bdbef0241b52e7a8552e8af7425f0b56d5621dd69df46c816546fefa17d77ab + md5: 0de94f39727c31c0447e408c5a210a56 + depends: + - __osx >=11.0 + license: Apache-2.0 WITH LLVM-exception + license_family: Apache + size: 568715 + timestamp: 1764676451068 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libev-4.33-hd590300_2.conda + sha256: 1cd6048169fa0395af74ed5d8f1716e22c19a81a8a36f934c110ca3ad4dd27b4 + md5: 172bf1cd1ff8629f2b1179945ed45055 + depends: + - libgcc-ng >=12 + license: BSD-2-Clause + license_family: BSD + size: 112766 + timestamp: 1702146165126 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libev-4.33-h93a5062_2.conda + sha256: 95cecb3902fbe0399c3a7e67a5bed1db813e5ab0e22f4023a5e0f722f2cc214f + md5: 36d33e440c31857372a72137f78bacf5 + license: BSD-2-Clause + license_family: BSD + size: 107458 + timestamp: 1702146414478 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libexpat-2.7.3-hecca717_0.conda + sha256: 1e1b08f6211629cbc2efe7a5bca5953f8f6b3cae0eeb04ca4dacee1bd4e2db2f + md5: 8b09ae86839581147ef2e5c5e229d164 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + constrains: + - expat 2.7.3.* + license: MIT + license_family: MIT + size: 76643 + timestamp: 1763549731408 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libexpat-2.7.3-haf25636_0.conda + sha256: fce22610ecc95e6d149e42a42fbc3cc9d9179bd4eb6232639a60f06e080eec98 + md5: b79875dbb5b1db9a4a22a4520f918e1a + depends: + - __osx >=11.0 constrains: - - expat 2.7.1.* + - expat 2.7.3.* license: MIT license_family: MIT - size: 74811 - timestamp: 1752719572741 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.4.6-h2dba641_1.conda - sha256: 764432d32db45466e87f10621db5b74363a9f847d2b8b1f9743746cd160f06ab - md5: ede4673863426c0883c0063d853bbd85 + size: 67800 + timestamp: 1763549994166 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libffi-3.5.2-h9ec8514_0.conda + sha256: 25cbdfa65580cfab1b8d15ee90b4c9f1e0d72128f1661449c9a999d341377d54 + md5: 35f29eec58405aaf55e01cb470d8c26a depends: - __glibc >=2.17,<3.0.a0 - - libgcc >=13 + - libgcc >=14 license: MIT license_family: MIT - size: 57433 - timestamp: 1743434498161 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-h767d61c_7.conda - sha256: 08f9b87578ab981c7713e4e6a7d935e40766e10691732bba376d4964562bcb45 - md5: c0374badb3a5d4b1372db28d19462c53 + size: 57821 + timestamp: 1760295480630 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libffi-3.5.2-he5f378a_0.conda + sha256: 9b8acdf42df61b7bfe8bdc545c016c29e61985e79748c64ad66df47dbc2e295f + md5: 411ff7cd5d1472bba0f55c0faf04453b + depends: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 40251 + timestamp: 1760295839166 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-15.2.0-he0feb66_16.conda + sha256: 6eed58051c2e12b804d53ceff5994a350c61baf117ec83f5f10c953a3f311451 + md5: 6d0363467e6ed84f11435eb309f2ff06 depends: - __glibc >=2.17,<3.0.a0 - _openmp_mutex >=4.5 constrains: - - libgomp 15.2.0 h767d61c_7 - - libgcc-ng ==15.2.0=*_7 + - libgcc-ng ==15.2.0=*_16 + - libgomp 15.2.0 he0feb66_16 license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - size: 822552 - timestamp: 1759968052178 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_7.conda - sha256: 2045066dd8e6e58aaf5ae2b722fb6dfdbb57c862b5f34ac7bfb58c40ef39b6ad - md5: 280ea6eee9e2ddefde25ff799c4f0363 + size: 1042798 + timestamp: 1765256792743 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-15.2.0-h69a702a_16.conda + sha256: 5f07f9317f596a201cc6e095e5fc92621afca64829785e483738d935f8cab361 + md5: 5a68259fac2da8f2ee6f7bfe49c9eb8b depends: - - libgcc 15.2.0 h767d61c_7 + - libgcc 15.2.0 he0feb66_16 license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - size: 29313 - timestamp: 1759968065504 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-h767d61c_7.conda - sha256: e9fb1c258c8e66ee278397b5822692527c5f5786d372fe7a869b900853f3f5ca - md5: f7b4d76975aac7e5d9e6ad13845f92fe + size: 27256 + timestamp: 1765256804124 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libgomp-15.2.0-he0feb66_16.conda + sha256: 5b3e5e4e9270ecfcd48f47e3a68f037f5ab0f529ccb223e8e5d5ac75a58fc687 + md5: 26c46f90d0e727e95c6c9498a33a09f3 depends: - __glibc >=2.17,<3.0.a0 license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - size: 447919 - timestamp: 1759967942498 + size: 603284 + timestamp: 1765256703881 - conda: https://conda.anaconda.org/conda-forge/linux-64/liblzma-5.8.1-hb9d3cd8_2.conda sha256: f2591c0069447bbe28d4d696b7fcb0c5bd0b4ac582769b89addbcf26fb3430d8 md5: 1a580f7796c7bf6393fddb8bbbde58dc @@ -158,6 +333,16 @@ packages: license: 0BSD size: 112894 timestamp: 1749230047870 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/liblzma-5.8.1-h39f12f2_2.conda + sha256: 0cb92a9e026e7bd4842f410a5c5c665c89b2eb97794ffddba519a626b8ce7285 + md5: d6df911d4564d77c4374b02552cb17d1 + depends: + - __osx >=11.0 + constrains: + - xz 5.8.1.* + license: 0BSD + size: 92286 + timestamp: 1749230283517 - conda: https://conda.anaconda.org/conda-forge/linux-64/libmpdec-4.0.0-hb9d3cd8_0.conda sha256: 3aa92d4074d4063f2a162cd8ecb45dccac93e543e565c01a787e16a43501f7ee md5: c7e925f37e3b40d893459e625f6a53f1 @@ -168,47 +353,94 @@ packages: license_family: BSD size: 91183 timestamp: 1748393666725 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.50.4-h0c1763c_0.conda - sha256: 6d9c32fc369af5a84875725f7ddfbfc2ace795c28f246dc70055a79f9b2003da - md5: 0b367fad34931cb79e0d6b7e5c06bb1c +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libmpdec-4.0.0-h5505292_0.conda + sha256: 0a1875fc1642324ebd6c4ac864604f3f18f57fbcf558a8264f6ced028a3c75b2 + md5: 85ccccb47823dd9f7a99d2c7f530342f + depends: + - __osx >=11.0 + license: BSD-2-Clause + license_family: BSD + size: 71829 + timestamp: 1748393749336 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libnghttp2-1.67.0-had1ee68_0.conda + sha256: a4a7dab8db4dc81c736e9a9b42bdfd97b087816e029e221380511960ac46c690 + md5: b499ce4b026493a13774bcf0f4c33849 depends: - __glibc >=2.17,<3.0.a0 + - c-ares >=1.34.5,<2.0a0 + - libev >=4.33,<4.34.0a0 + - libev >=4.33,<5.0a0 - libgcc >=14 + - libstdcxx >=14 + - libzlib >=1.3.1,<2.0a0 + - openssl >=3.5.2,<4.0a0 + license: MIT + license_family: MIT + size: 666600 + timestamp: 1756834976695 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libnghttp2-1.67.0-hc438710_0.conda + sha256: a07cb53b5ffa2d5a18afc6fd5a526a5a53dd9523fbc022148bd2f9395697c46d + md5: a4b4dd73c67df470d091312ab87bf6ae + depends: + - __osx >=11.0 + - c-ares >=1.34.5,<2.0a0 + - libcxx >=19 + - libev >=4.33,<4.34.0a0 + - libev >=4.33,<5.0a0 + - libzlib >=1.3.1,<2.0a0 + - openssl >=3.5.2,<4.0a0 + license: MIT + license_family: MIT + size: 575454 + timestamp: 1756835746393 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libsqlite-3.51.1-h0c1763c_0.conda + sha256: 6f0e8a812e8e33a4d8b7a0e595efe28373080d27b78ee4828aa4f6649a088454 + md5: 2e1b84d273b01835256e53fd938de355 + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libzlib >=1.3.1,<2.0a0 + license: blessing + size: 938979 + timestamp: 1764359444435 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libsqlite-3.51.1-h9a5124b_0.conda + sha256: a46b167447e2a9e38586320c30b29e3b68b6f7e6b873c18d6b1aa2efd2626917 + md5: 67e50e5bd4e5e2310d66b88c4da50096 + depends: + - __osx >=11.0 - libzlib >=1.3.1,<2.0a0 license: blessing - size: 932581 - timestamp: 1753948484112 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h8f9b012_7.conda - sha256: 1b981647d9775e1cdeb2fab0a4dd9cd75a6b0de2963f6c3953dbd712f78334b3 - md5: 5b767048b1b3ee9a954b06f4084f93dc + size: 906292 + timestamp: 1764359907797 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-15.2.0-h934c35e_16.conda + sha256: 813427918316a00c904723f1dfc3da1bbc1974c5cfe1ed1e704c6f4e0798cbc6 + md5: 68f68355000ec3f1d6f26ea13e8f525f depends: - __glibc >=2.17,<3.0.a0 - - libgcc 15.2.0 h767d61c_7 + - libgcc 15.2.0 he0feb66_16 constrains: - - libstdcxx-ng ==15.2.0=*_7 + - libstdcxx-ng ==15.2.0=*_16 license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - size: 3898269 - timestamp: 1759968103436 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-h4852527_7.conda - sha256: 024fd46ac3ea8032a5ec3ea7b91c4c235701a8bf0e6520fe5e6539992a6bd05f - md5: f627678cf829bd70bccf141a19c3ad3e + size: 5856456 + timestamp: 1765256838573 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libstdcxx-ng-15.2.0-hdf11a46_16.conda + sha256: 81f2f246c7533b41c5e0c274172d607829019621c4a0823b5c0b4a8c7028ee84 + md5: 1b3152694d236cf233b76b8c56bf0eae depends: - - libstdcxx 15.2.0 h8f9b012_7 + - libstdcxx 15.2.0 h934c35e_16 license: GPL-3.0-only WITH GCC-exception-3.1 - license_family: GPL - size: 29343 - timestamp: 1759968157195 -- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-he9a06e4_0.conda - sha256: e5ec6d2ad7eef538ddcb9ea62ad4346fde70a4736342c4ad87bd713641eb9808 - md5: 80c07c68d2f6870250959dcc95b209d1 + size: 27300 + timestamp: 1765256885128 +- conda: https://conda.anaconda.org/conda-forge/linux-64/libuuid-2.41.2-h5347b49_1.conda + sha256: 030447cf827c471abd37092ab9714fde82b8222106f22fde94bc7a64e2704c40 + md5: 41f5c09a211985c3ce642d60721e7c3e depends: - __glibc >=2.17,<3.0.a0 - libgcc >=14 license: BSD-3-Clause license_family: BSD - size: 37135 - timestamp: 1758626800002 + size: 40235 + timestamp: 1764790744114 - conda: https://conda.anaconda.org/conda-forge/linux-64/libuv-1.51.0-hb03c661_1.conda sha256: c180f4124a889ac343fc59d15558e93667d894a966ec6fdb61da1604481be26b md5: 0f03292cc56bf91a077a134ea8747118 @@ -219,6 +451,15 @@ packages: license_family: MIT size: 895108 timestamp: 1753948278280 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libuv-1.51.0-h6caf38d_1.conda + sha256: 042c7488ad97a5629ec0a991a8b2a3345599401ecc75ad6a5af73b60e6db9689 + md5: c0d87c3c8e075daf1daf6c31b53e8083 + depends: + - __osx >=11.0 + license: MIT + license_family: MIT + size: 421195 + timestamp: 1753948426421 - conda: https://conda.anaconda.org/conda-forge/linux-64/libzlib-1.3.1-hb9d3cd8_2.conda sha256: d4bfe88d7cb447768e31650f06257995601f89076080e76df55e3112d4e47dc4 md5: edb0dca6bc32e4f4789199455a1dbeb8 @@ -231,6 +472,17 @@ packages: license_family: Other size: 60963 timestamp: 1727963148474 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/libzlib-1.3.1-h8359307_2.conda + sha256: ce34669eadaba351cd54910743e6a2261b67009624dbc7daeeafdef93616711b + md5: 369964e85dc26bfe78f41399b366c435 + depends: + - __osx >=11.0 + constrains: + - zlib 1.3.1 *_2 + license: Zlib + license_family: Other + size: 46438 + timestamp: 1727963202283 - conda: https://conda.anaconda.org/conda-forge/noarch/mystmd-1.7.0-pyhcf101f3_0.conda sha256: 11c8fdb494493e636024696395cbf5271f4890a1d69009daa4df17128a8bf792 md5: 8c0f1ed376697206261d1b99bd4858b6 @@ -239,6 +491,7 @@ packages: - nodejs >=18 - python license: MIT + license_family: MIT size: 2163398 timestamp: 1764855843630 - conda: https://conda.anaconda.org/conda-forge/linux-64/ncurses-6.5-h2d0b736_3.conda @@ -250,46 +503,91 @@ packages: license: X11 AND BSD-3-Clause size: 891641 timestamp: 1738195959188 -- conda: https://conda.anaconda.org/conda-forge/linux-64/nodejs-24.9.0-heeeca48_0.conda - sha256: 6abb823fd4d28e6474f40dfcf38e772e5869ee755be855cf5d2c0d49f888c75e - md5: 8a2a73951c1ea275e76fb1b92d97ff3e +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/ncurses-6.5-h5e97a16_3.conda + sha256: 2827ada40e8d9ca69a153a45f7fd14f32b2ead7045d3bbb5d10964898fe65733 + md5: 068d497125e4bf8a66bf707254fff5ae + depends: + - __osx >=11.0 + license: X11 AND BSD-3-Clause + size: 797030 + timestamp: 1738196177597 +- conda: https://conda.anaconda.org/conda-forge/linux-64/nodejs-24.10.0-h36edbcc_2.conda + sha256: 3c1d59afd09c52dac76eed71b1e3b8d4b8502db151745216c036ad57808b82bb + md5: e2484efbb090278c0070dee87d9cdb21 depends: - __glibc >=2.28,<3.0.a0 - libstdcxx >=14 - libgcc >=14 + - libzlib >=1.3.1,<2.0a0 + - icu >=75.1,<76.0a0 + - c-ares >=1.34.5,<2.0a0 - libuv >=1.51.0,<2.0a0 - - openssl >=3.5.3,<4.0a0 + - openssl >=3.5.4,<4.0a0 + - libbrotlicommon >=1.2.0,<1.3.0a0 + - libbrotlienc >=1.2.0,<1.3.0a0 + - libbrotlidec >=1.2.0,<1.3.0a0 + - libsqlite >=3.51.1,<4.0a0 + - zstd >=1.5.7,<1.6.0a0 + - libnghttp2 >=1.67.0,<2.0a0 + license: MIT + size: 23504771 + timestamp: 1765210770135 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/nodejs-24.10.0-h64c5147_2.conda + sha256: d85fd71ed029ed2d8df9eeb05061c22a4e595a9d2634ee7abc5efa6bcde8f3c6 + md5: f3573bab908a11e6ed14265832779de6 + depends: + - __osx >=11.0 + - libcxx >=19 + - libbrotlicommon >=1.2.0,<1.3.0a0 + - libbrotlienc >=1.2.0,<1.3.0a0 + - libbrotlidec >=1.2.0,<1.3.0a0 + - libsqlite >=3.51.1,<4.0a0 + - openssl >=3.5.4,<4.0a0 + - c-ares >=1.34.5,<2.0a0 + - zstd >=1.5.7,<1.6.0a0 - icu >=75.1,<76.0a0 - libzlib >=1.3.1,<2.0a0 + - libnghttp2 >=1.67.0,<2.0a0 + - libuv >=1.51.0,<2.0a0 license: MIT license_family: MIT - size: 25557455 - timestamp: 1759064044872 -- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.5.4-h26f9b46_0.conda - sha256: e807f3bad09bdf4075dbb4168619e14b0c0360bacb2e12ef18641a834c8c5549 - md5: 14edad12b59ccbfa3910d42c72adc2a0 + size: 16182130 + timestamp: 1765047982974 +- conda: https://conda.anaconda.org/conda-forge/linux-64/openssl-3.6.0-h26f9b46_0.conda + sha256: a47271202f4518a484956968335b2521409c8173e123ab381e775c358c67fe6d + md5: 9ee58d5c534af06558933af3c845a780 depends: - __glibc >=2.17,<3.0.a0 - ca-certificates - libgcc >=14 license: Apache-2.0 license_family: Apache - size: 3119624 - timestamp: 1759324353651 -- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.0-h5989046_101_cp314.conda - build_number: 101 - sha256: 61ae2c29b1097c12161a09a4061be8f909bc1387d8388e875d8ed5e357ef0824 - md5: b2ad21488149ec2c4d83640619de2430 + size: 3165399 + timestamp: 1762839186699 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/openssl-3.6.0-h5503f6c_0.conda + sha256: ebe93dafcc09e099782fe3907485d4e1671296bc14f8c383cb6f3dfebb773988 + md5: b34dc4172653c13dcf453862f251af2b + depends: + - __osx >=11.0 + - ca-certificates + license: Apache-2.0 + license_family: Apache + size: 3108371 + timestamp: 1762839712322 +- conda: https://conda.anaconda.org/conda-forge/linux-64/python-3.14.2-h32b2ec7_100_cp314.conda + build_number: 100 + sha256: a120fb2da4e4d51dd32918c149b04a08815fd2bd52099dad1334647984bb07f1 + md5: 1cef1236a05c3a98f68c33ae9425f656 depends: - __glibc >=2.17,<3.0.a0 - bzip2 >=1.0.8,<2.0a0 - ld_impl_linux-64 >=2.36.1 - - libexpat >=2.7.1,<3.0a0 - - libffi >=3.4.6,<3.5.0a0 + - libexpat >=2.7.3,<3.0a0 + - libffi >=3.5.2,<3.6.0a0 - libgcc >=14 - liblzma >=5.8.1,<6.0a0 - libmpdec >=4.0.0,<5.0a0 - - libsqlite >=3.50.4,<4.0a0 + - libsqlite >=3.51.1,<4.0a0 - libuuid >=2.41.2,<3.0a0 - libzlib >=1.3.1,<2.0a0 - ncurses >=6.5,<7.0a0 @@ -300,8 +598,32 @@ packages: - tzdata - zstd >=1.5.7,<1.6.0a0 license: Python-2.0 - size: 36692257 - timestamp: 1760299587505 + size: 36790521 + timestamp: 1765021515427 + python_site_packages_path: lib/python3.14/site-packages +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/python-3.14.2-h40d2674_100_cp314.conda + build_number: 100 + sha256: 1a93782e90b53e04c2b1a50a0f8bf0887936649d19dba6a05b05c4b44dae96b7 + md5: 14f15ab0d31a2ee5635aa56e77132594 + depends: + - __osx >=11.0 + - bzip2 >=1.0.8,<2.0a0 + - libexpat >=2.7.3,<3.0a0 + - libffi >=3.5.2,<3.6.0a0 + - liblzma >=5.8.1,<6.0a0 + - libmpdec >=4.0.0,<5.0a0 + - libsqlite >=3.51.1,<4.0a0 + - libzlib >=1.3.1,<2.0a0 + - ncurses >=6.5,<7.0a0 + - openssl >=3.5.4,<4.0a0 + - python_abi 3.14.* *_cp314 + - readline >=8.2,<9.0a0 + - tk >=8.6.13,<8.7.0a0 + - tzdata + - zstd >=1.5.7,<1.6.0a0 + license: Python-2.0 + size: 13575758 + timestamp: 1765021280625 python_site_packages_path: lib/python3.14/site-packages - conda: https://conda.anaconda.org/conda-forge/noarch/python_abi-3.14-8_cp314.conda build_number: 8 @@ -323,32 +645,61 @@ packages: license_family: GPL size: 282480 timestamp: 1740379431762 -- conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_hd72426e_102.conda - sha256: a84ff687119e6d8752346d1d408d5cf360dee0badd487a472aa8ddedfdc219e1 - md5: a0116df4f4ed05c303811a837d5b39d8 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/readline-8.2-h1d1bf99_2.conda + sha256: 7db04684d3904f6151eff8673270922d31da1eea7fa73254d01c437f49702e34 + md5: 63ef3f6e6d6d5c589e64f11263dc5676 + depends: + - ncurses >=6.5,<7.0a0 + license: GPL-3.0-only + license_family: GPL + size: 252359 + timestamp: 1740379663071 +- conda: https://conda.anaconda.org/conda-forge/linux-64/tk-8.6.13-noxft_ha0e22de_103.conda + sha256: 1544760538a40bcd8ace2b1d8ebe3eb5807ac268641f8acdc18c69c5ebfeaf64 + md5: 86bc20552bf46075e3d92b67f089172d depends: - __glibc >=2.17,<3.0.a0 - libgcc >=13 - libzlib >=1.3.1,<2.0a0 + constrains: + - xorg-libx11 >=1.8.12,<2.0a0 license: TCL license_family: BSD - size: 3285204 - timestamp: 1748387766691 + size: 3284905 + timestamp: 1763054914403 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/tk-8.6.13-h892fb3f_3.conda + sha256: ad0c67cb03c163a109820dc9ecf77faf6ec7150e942d1e8bb13e5d39dc058ab7 + md5: a73d54a5abba6543cb2f0af1bfbd6851 + depends: + - __osx >=11.0 + - libzlib >=1.3.1,<2.0a0 + license: TCL + license_family: BSD + size: 3125484 + timestamp: 1763055028377 - conda: https://conda.anaconda.org/conda-forge/noarch/tzdata-2025b-h78e105d_0.conda sha256: 5aaa366385d716557e365f0a4e9c3fca43ba196872abbbe3d56bb610d131e192 md5: 4222072737ccff51314b5ece9c7d6f5a license: LicenseRef-Public-Domain size: 122968 timestamp: 1742727099393 -- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb8e6e7a_2.conda - sha256: a4166e3d8ff4e35932510aaff7aa90772f84b4d07e9f6f83c614cba7ceefe0eb - md5: 6432cb5d4ac0046c3ac0a8a0f95842f9 +- conda: https://conda.anaconda.org/conda-forge/linux-64/zstd-1.5.7-hb78ec9c_6.conda + sha256: 68f0206ca6e98fea941e5717cec780ed2873ffabc0e1ed34428c061e2c6268c7 + md5: 4a13eeac0b5c8e5b8ab496e6c4ddd829 depends: - __glibc >=2.17,<3.0.a0 - - libgcc >=13 - - libstdcxx >=13 - libzlib >=1.3.1,<2.0a0 license: BSD-3-Clause license_family: BSD - size: 567578 - timestamp: 1742433379869 + size: 601375 + timestamp: 1764777111296 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/zstd-1.5.7-hbf9d68e_6.conda + sha256: 9485ba49e8f47d2b597dd399e88f4802e100851b27c21d7525625b0b4025a5d9 + md5: ab136e4c34e97f34fb621d2592a393d8 + depends: + - __osx >=11.0 + - libzlib >=1.3.1,<2.0a0 + license: BSD-3-Clause + license_family: BSD + size: 433413 + timestamp: 1764777166076 diff --git a/pixi.toml b/pixi.toml index 6ba12fc..07f2afd 100644 --- a/pixi.toml +++ b/pixi.toml @@ -2,7 +2,7 @@ authors = [] channels = ["conda-forge"] name = "workshop-open-source-geospatial" -platforms = ["linux-64"] +platforms = ["linux-64", "osx-arm64"] version = "0.1.0" [tasks]