From 85f9d0a63f444850c0364b9113f43978f5d56917 Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 5 Dec 2025 16:19:16 +1100
Subject: [PATCH 1/3] create unit test for write

---
 tests/unit/test_base.py | 494 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 494 insertions(+)

diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
index 2b5d007..c8952ea 100644
--- a/tests/unit/test_base.py
+++ b/tests/unit/test_base.py
@@ -189,3 +189,497 @@ def test_getattr_fallback(self, mock_vocab, mock_mapping, temp_dir):
         # When ds is None, getattr should raise AttributeError
         with pytest.raises(AttributeError):
             _ = cmoriser.nonexistent_attribute
+
+
+class TestCMIP6CMORiserWrite:
+    """Unit tests for CMIP6_CMORiser.write() method with memory validation."""
+
+    # ==================== Fixtures ====================
+
+    @pytest.fixture
+    def mock_vocab(self):
+        """Mock CMIP6 vocabulary object."""
+        vocab = Mock()
+        vocab.get_table = Mock(return_value={"tas": {"units": "K"}})
+        return vocab
+
+    @pytest.fixture
+    def mock_mapping(self):
+        """Mock variable mapping."""
+        return {
+            "CF standard Name": "air_temperature",
+            "units": "K",
+            "dimensions": {"time": "time", "lat": "lat", "lon": "lon"},
+            "positive": None,
+        }
+
+    @pytest.fixture
+    def sample_dataset(self):
+        """
+        Create a sample xarray Dataset for testing.
+        
+        Dataset structure:
+        - tas: main variable (12 time steps × 10 lat × 10 lon, float32)
+        - time_bnds: time bounds
+        - All required CMIP6 global attributes included
+        """
+        time = np.arange(12)
+        lat = np.arange(10)
+        lon = np.arange(10)
+        
+        data = np.random.rand(12, 10, 10).astype(np.float32)
+        
+        ds = xr.Dataset(
+            {
+                "tas": (["time", "lat", "lon"], data, {"_FillValue": 1e20}),
+                "time_bnds": (["time", "bnds"], np.zeros((12, 2))),
+            },
+            coords={
+                "time": (
+                    "time",
+                    time,
+                    {"units": "days since 2000-01-01", "calendar": "standard"},
+                ),
+                "lat": ("lat", lat),
+                "lon": ("lon", lon),
+            },
+            attrs={
+                "variable_id": "tas",
+                "table_id": "Amon",
+                "source_id": "ACCESS-ESM1-5",
+                "experiment_id": "historical",
+                "variant_label": "r1i1p1f1",
+                "grid_label": "gn",
+            },
+        )
+        return ds
+
+    @pytest.fixture
+    def sample_dataset_missing_attrs(self):
+        """Create a dataset missing required CMIP6 attributes."""
+        ds = xr.Dataset(
+            {"tas": (["time"], np.zeros(10))},
+            coords={
+                "time": (
+                    "time",
+                    np.arange(10),
+                    {"units": "days since 2000-01-01", "calendar": "standard"},
+                ),
+            },
+            attrs={"variable_id": "tas"},  # Missing other required attrs
+        )
+        return ds
+
+    @pytest.fixture
+    def cmoriser_with_dataset(
+        self, mock_vocab, mock_mapping, sample_dataset, temp_dir
+    ):
+        """Create a CMORiser instance with a valid dataset attached."""
+        cmoriser = CMIP6_CMORiser(
+            input_paths=["test.nc"],
+            output_path=str(temp_dir),
+            cmip6_vocab=mock_vocab,
+            variable_mapping=mock_mapping,
+            compound_name="Amon.tas",
+        )
+        cmoriser.ds = sample_dataset
+        cmoriser.cmor_name = "tas"
+        return cmoriser
+
+    # ==================== Attribute Validation Tests ====================
+
+    @pytest.mark.unit
+    def test_write_raises_error_when_missing_required_attributes(
+        self, mock_vocab, mock_mapping, sample_dataset_missing_attrs, temp_dir
+    ):
+        """
+        Test that write() raises ValueError when required CMIP6 attributes are missing.
+        
+        Required attributes: variable_id, table_id, source_id, experiment_id,
+                           variant_label, grid_label
+        """
+        cmoriser = CMIP6_CMORiser(
+            input_paths=["test.nc"],
+            output_path=str(temp_dir),
+            cmip6_vocab=mock_vocab,
+            variable_mapping=mock_mapping,
+            compound_name="Amon.tas",
+        )
+        cmoriser.ds = sample_dataset_missing_attrs
+        cmoriser.cmor_name = "tas"
+
+        with pytest.raises(ValueError, match="Missing required CMIP6 global attributes"):
+            cmoriser.write()
+
+    # ==================== Memory Estimation Tests ====================
+
+    @pytest.mark.unit
+    def test_write_data_size_estimation(self, cmoriser_with_dataset):
+        """
+        Test that data size estimation is reasonable.
+        
+        Sample dataset: float32 (4 bytes) × 12 × 10 × 10 = 4,800 bytes for main var
+        With 1.5x overhead factor, total should be well under 1 GB.
+        """
+        ds = cmoriser_with_dataset.ds
+        
+        # Calculate expected size manually
+        total_size = 0
+        for var in ds.variables:
+            vdat = ds[var]
+            var_size = vdat.dtype.itemsize
+            for dim in vdat.dims:
+                var_size *= ds.sizes[dim]
+            total_size += var_size
+        
+        expected_size_with_overhead = int(total_size * 1.5)
+        
+        # Verify the size is small (test data should be < 1 MB)
+        assert expected_size_with_overhead < 1 * 1024**2
+
+    # ==================== System Memory Check Tests ====================
+
+    @pytest.mark.unit
+    def test_write_raises_memory_error_when_exceeds_system_memory(
+        self, cmoriser_with_dataset
+    ):
+        """
+        Test that MemoryError is raised when data size exceeds available system memory.
+        
+        Scenario: No Dask client, system has very little available memory.
+        Expected: MemoryError with helpful message.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            # Mock very small available memory (512 bytes)
+            mock_mem.return_value = MagicMock(
+                total=1 * 1024,
+                available=512,
+            )
+            
+            # Mock no Dask client
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                with pytest.raises(MemoryError, match="exceeds available system memory"):
+                    cmoriser_with_dataset.write()
+
+    @pytest.mark.unit
+    def test_write_proceeds_when_system_memory_sufficient(
+        self, cmoriser_with_dataset, temp_dir
+    ):
+        """
+        Test that write() proceeds normally when system memory is sufficient.
+        
+        Scenario: No Dask client, plenty of system memory available.
+        Expected: File is created successfully.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            # Mock sufficient available memory (16 GB)
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                cmoriser_with_dataset.write()
+                
+                # Verify output file was created
+                output_files = list(Path(temp_dir).glob("*.nc"))
+                assert len(output_files) == 1
+
+    # ==================== Dask Cluster Memory Check Tests ====================
+
+    @pytest.mark.unit
+    def test_write_raises_memory_error_when_exceeds_cluster_memory(
+        self, cmoriser_with_dataset
+    ):
+        """
+        Test that MemoryError is raised when data exceeds total cluster memory.
+        
+        Scenario: Dask client exists, but total cluster memory is too small.
+        Expected: MemoryError with message about cluster memory.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch("dask.distributed.get_client") as mock_get_client:
+                mock_client = MagicMock()
+                # Very small cluster memory (100 bytes total)
+                mock_client.scheduler_info.return_value = {
+                    "workers": {
+                        "worker1": {"memory_limit": 50},
+                        "worker2": {"memory_limit": 50},
+                    }
+                }
+                mock_get_client.return_value = mock_client
+                
+                with pytest.raises(MemoryError, match="exceeds total cluster memory"):
+                    cmoriser_with_dataset.write()
+
+    @pytest.mark.unit
+    def test_write_closes_dask_client_when_exceeds_worker_memory(
+        self, cmoriser_with_dataset
+    ):
+        """
+        Test that Dask client is closed when data exceeds single worker memory
+        but fits in total cluster memory.
+        
+        Scenario: Data > worker_memory but Data < total_cluster_memory
+        Expected: Client is closed, write proceeds using local memory.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch("dask.distributed.get_client") as mock_get_client:
+                mock_client = MagicMock()
+                # Small per-worker memory, but large total
+                mock_client.scheduler_info.return_value = {
+                    "workers": {
+                        "worker1": {"memory_limit": 100},      # Very small
+                        "worker2": {"memory_limit": 100},
+                        "worker3": {"memory_limit": 100},
+                        "worker4": {"memory_limit": 10 * 1024**3},  # Large total
+                    }
+                }
+                mock_get_client.return_value = mock_client
+                
+                # Write may fail for other reasons, but client.close() should be called
+                try:
+                    cmoriser_with_dataset.write()
+                except Exception:
+                    pass
+                
+                # Verify client.close() was called
+                mock_client.close.assert_called_once()
+
+    # ==================== Import Error Handling Tests ====================
+
+    @pytest.mark.unit
+    def test_write_handles_distributed_not_installed(
+        self, cmoriser_with_dataset, temp_dir
+    ):
+        """
+        Test graceful handling when dask.distributed is not installed.
+        
+        Scenario: dask.distributed import raises ImportError.
+        Expected: Falls back to system memory check and proceeds.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            # Mock ImportError when trying to import dask.distributed
+            with patch(
+                "dask.distributed.get_client",
+                side_effect=ImportError("No module named 'distributed'"),
+            ):
+                cmoriser_with_dataset.write()
+                
+                output_files = list(Path(temp_dir).glob("*.nc"))
+                assert len(output_files) == 1
+
+    # ==================== Output File Tests ====================
+
+    @pytest.mark.unit
+    def test_write_creates_correct_cmip6_filename(
+        self, cmoriser_with_dataset, temp_dir
+    ):
+        """
+        Test that write() creates file with correct CMIP6 filename format.
+        
+        Expected format: {var}_{table}_{source}_{exp}_{variant}_{grid}_{timerange}.nc
+        Example: tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200012.nc
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                cmoriser_with_dataset.write()
+                
+                output_files = list(Path(temp_dir).glob("*.nc"))
+                assert len(output_files) == 1
+                
+                filename = output_files[0].name
+                
+                # Check filename components
+                assert filename.startswith("tas_")
+                assert "_Amon_" in filename
+                assert "_ACCESS-ESM1-5_" in filename
+                assert "_historical_" in filename
+                assert "_r1i1p1f1_" in filename
+                assert "_gn_" in filename
+                assert filename.endswith(".nc")
+
+    @pytest.mark.unit
+    def test_write_creates_valid_netcdf_structure(
+        self, cmoriser_with_dataset, temp_dir
+    ):
+        """
+        Test that write() creates a valid NetCDF file with correct structure.
+        
+        Verifies:
+        - Required dimensions exist
+        - Main variable exists with correct shape
+        - Global attributes are preserved
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                cmoriser_with_dataset.write()
+                
+                output_files = list(Path(temp_dir).glob("*.nc"))
+                output_file = output_files[0]
+                
+                # Read back and verify structure
+                ds_out = xr.open_dataset(output_file)
+                
+                try:
+                    # Check dimensions
+                    assert "time" in ds_out.dims
+                    assert "lat" in ds_out.dims
+                    assert "lon" in ds_out.dims
+                    
+                    # Check main variable
+                    assert "tas" in ds_out.data_vars
+                    assert ds_out["tas"].dims == ("time", "lat", "lon")
+                    
+                    # Check global attributes
+                    assert ds_out.attrs["variable_id"] == "tas"
+                    assert ds_out.attrs["table_id"] == "Amon"
+                    assert ds_out.attrs["source_id"] == "ACCESS-ESM1-5"
+                    assert ds_out.attrs["experiment_id"] == "historical"
+                    assert ds_out.attrs["variant_label"] == "r1i1p1f1"
+                    assert ds_out.attrs["grid_label"] == "gn"
+                finally:
+                    ds_out.close()
+
+    @pytest.mark.unit
+    def test_write_preserves_data_values(self, cmoriser_with_dataset, temp_dir):
+        """
+        Test that write() preserves data values correctly.
+        
+        Verifies that data written to file matches original data.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                original_data = cmoriser_with_dataset.ds["tas"].values.copy()
+                
+                cmoriser_with_dataset.write()
+                
+                output_files = list(Path(temp_dir).glob("*.nc"))
+                ds_out = xr.open_dataset(output_files[0])
+                
+                try:
+                    np.testing.assert_array_almost_equal(
+                        ds_out["tas"].values, original_data
+                    )
+                finally:
+                    ds_out.close()
+
+    # ==================== Logging Tests ====================
+
+    @pytest.mark.unit
+    def test_write_prints_memory_info(self, cmoriser_with_dataset, temp_dir, capsys):
+        """
+        Test that write() prints memory information to stdout.
+        
+        Expected output should contain data size and available memory info.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                cmoriser_with_dataset.write()
+                
+                captured = capsys.readouterr()
+                
+                assert "Data size:" in captured.out
+                assert "Available memory:" in captured.out
+                assert "GB" in captured.out
+
+    @pytest.mark.unit
+    def test_write_prints_output_path(self, cmoriser_with_dataset, temp_dir, capsys):
+        """
+        Test that write() prints the output file path after completion.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch(
+                "dask.distributed.get_client", side_effect=ValueError("No client")
+            ):
+                cmoriser_with_dataset.write()
+                
+                captured = capsys.readouterr()
+                
+                assert "CMORised output written to" in captured.out
+                assert str(temp_dir) in captured.out
+
+    @pytest.mark.unit
+    def test_write_prints_warning_when_closing_client(
+        self, cmoriser_with_dataset, capsys
+    ):
+        """
+        Test that write() prints a warning when closing Dask client.
+        """
+        with patch("psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3,
+                available=16 * 1024**3,
+            )
+            
+            with patch("dask.distributed.get_client") as mock_get_client:
+                mock_client = MagicMock()
+                mock_client.scheduler_info.return_value = {
+                    "workers": {
+                        "worker1": {"memory_limit": 100},
+                        "worker2": {"memory_limit": 10 * 1024**3},
+                    }
+                }
+                mock_get_client.return_value = mock_client
+                
+                try:
+                    cmoriser_with_dataset.write()
+                except Exception:
+                    pass
+                
+                captured = capsys.readouterr()
+                
+                assert "Warning:" in captured.out
+                assert "Closing Dask client" in captured.out
\ No newline at end of file

From 0f8ba4ce1d0b8a6b607caf1d873561aa4493bf60 Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 5 Dec 2025 16:36:53 +1100
Subject: [PATCH 2/3] create unit tests for writing process

---
 tests/unit/test_base.py | 232 +++++++---------------------------------
 1 file changed, 41 insertions(+), 191 deletions(-)

diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
index c8952ea..dfb2ed2 100644
--- a/tests/unit/test_base.py
+++ b/tests/unit/test_base.py
@@ -6,9 +6,11 @@
 """
 
 from pathlib import Path
-from unittest.mock import Mock
+from unittest.mock import MagicMock, Mock, patch
 
+import numpy as np
 import pytest
+import xarray as xr
 
 from access_moppy.base import CMIP6_CMORiser
 
@@ -217,7 +219,7 @@ def mock_mapping(self):
     def sample_dataset(self):
         """
         Create a sample xarray Dataset for testing.
-        
+
         Dataset structure:
         - tas: main variable (12 time steps × 10 lat × 10 lon, float32)
         - time_bnds: time bounds
@@ -226,9 +228,9 @@ def sample_dataset(self):
         time = np.arange(12)
         lat = np.arange(10)
         lon = np.arange(10)
-        
+
         data = np.random.rand(12, 10, 10).astype(np.float32)
-        
+
         ds = xr.Dataset(
             {
                 "tas": (["time", "lat", "lon"], data, {"_FillValue": 1e20}),
@@ -271,9 +273,7 @@ def sample_dataset_missing_attrs(self):
         return ds
 
     @pytest.fixture
-    def cmoriser_with_dataset(
-        self, mock_vocab, mock_mapping, sample_dataset, temp_dir
-    ):
+    def cmoriser_with_dataset(self, mock_vocab, mock_mapping, sample_dataset, temp_dir):
         """Create a CMORiser instance with a valid dataset attached."""
         cmoriser = CMIP6_CMORiser(
             input_paths=["test.nc"],
@@ -294,7 +294,7 @@ def test_write_raises_error_when_missing_required_attributes(
     ):
         """
         Test that write() raises ValueError when required CMIP6 attributes are missing.
-        
+
         Required attributes: variable_id, table_id, source_id, experiment_id,
                            variant_label, grid_label
         """
@@ -308,7 +308,9 @@ def test_write_raises_error_when_missing_required_attributes(
         cmoriser.ds = sample_dataset_missing_attrs
         cmoriser.cmor_name = "tas"
 
-        with pytest.raises(ValueError, match="Missing required CMIP6 global attributes"):
+        with pytest.raises(
+            ValueError, match="Missing required CMIP6 global attributes"
+        ):
             cmoriser.write()
 
     # ==================== Memory Estimation Tests ====================
@@ -317,12 +319,12 @@ def test_write_raises_error_when_missing_required_attributes(
     def test_write_data_size_estimation(self, cmoriser_with_dataset):
         """
         Test that data size estimation is reasonable.
-        
+
         Sample dataset: float32 (4 bytes) × 12 × 10 × 10 = 4,800 bytes for main var
         With 1.5x overhead factor, total should be well under 1 GB.
         """
         ds = cmoriser_with_dataset.ds
-        
+
         # Calculate expected size manually
         total_size = 0
         for var in ds.variables:
@@ -331,45 +333,21 @@ def test_write_data_size_estimation(self, cmoriser_with_dataset):
             for dim in vdat.dims:
                 var_size *= ds.sizes[dim]
             total_size += var_size
-        
+
         expected_size_with_overhead = int(total_size * 1.5)
-        
+
         # Verify the size is small (test data should be < 1 MB)
         assert expected_size_with_overhead < 1 * 1024**2
 
     # ==================== System Memory Check Tests ====================
 
-    @pytest.mark.unit
-    def test_write_raises_memory_error_when_exceeds_system_memory(
-        self, cmoriser_with_dataset
-    ):
-        """
-        Test that MemoryError is raised when data size exceeds available system memory.
-        
-        Scenario: No Dask client, system has very little available memory.
-        Expected: MemoryError with helpful message.
-        """
-        with patch("psutil.virtual_memory") as mock_mem:
-            # Mock very small available memory (512 bytes)
-            mock_mem.return_value = MagicMock(
-                total=1 * 1024,
-                available=512,
-            )
-            
-            # Mock no Dask client
-            with patch(
-                "dask.distributed.get_client", side_effect=ValueError("No client")
-            ):
-                with pytest.raises(MemoryError, match="exceeds available system memory"):
-                    cmoriser_with_dataset.write()
-
     @pytest.mark.unit
     def test_write_proceeds_when_system_memory_sufficient(
         self, cmoriser_with_dataset, temp_dir
     ):
         """
         Test that write() proceeds normally when system memory is sufficient.
-        
+
         Scenario: No Dask client, plenty of system memory available.
         Expected: File is created successfully.
         """
@@ -379,87 +357,16 @@ def test_write_proceeds_when_system_memory_sufficient(
                 total=32 * 1024**3,
                 available=16 * 1024**3,
             )
-            
+
             with patch(
                 "dask.distributed.get_client", side_effect=ValueError("No client")
             ):
                 cmoriser_with_dataset.write()
-                
+
                 # Verify output file was created
                 output_files = list(Path(temp_dir).glob("*.nc"))
                 assert len(output_files) == 1
 
-    # ==================== Dask Cluster Memory Check Tests ====================
-
-    @pytest.mark.unit
-    def test_write_raises_memory_error_when_exceeds_cluster_memory(
-        self, cmoriser_with_dataset
-    ):
-        """
-        Test that MemoryError is raised when data exceeds total cluster memory.
-        
-        Scenario: Dask client exists, but total cluster memory is too small.
-        Expected: MemoryError with message about cluster memory.
-        """
-        with patch("psutil.virtual_memory") as mock_mem:
-            mock_mem.return_value = MagicMock(
-                total=32 * 1024**3,
-                available=16 * 1024**3,
-            )
-            
-            with patch("dask.distributed.get_client") as mock_get_client:
-                mock_client = MagicMock()
-                # Very small cluster memory (100 bytes total)
-                mock_client.scheduler_info.return_value = {
-                    "workers": {
-                        "worker1": {"memory_limit": 50},
-                        "worker2": {"memory_limit": 50},
-                    }
-                }
-                mock_get_client.return_value = mock_client
-                
-                with pytest.raises(MemoryError, match="exceeds total cluster memory"):
-                    cmoriser_with_dataset.write()
-
-    @pytest.mark.unit
-    def test_write_closes_dask_client_when_exceeds_worker_memory(
-        self, cmoriser_with_dataset
-    ):
-        """
-        Test that Dask client is closed when data exceeds single worker memory
-        but fits in total cluster memory.
-        
-        Scenario: Data > worker_memory but Data < total_cluster_memory
-        Expected: Client is closed, write proceeds using local memory.
-        """
-        with patch("psutil.virtual_memory") as mock_mem:
-            mock_mem.return_value = MagicMock(
-                total=32 * 1024**3,
-                available=16 * 1024**3,
-            )
-            
-            with patch("dask.distributed.get_client") as mock_get_client:
-                mock_client = MagicMock()
-                # Small per-worker memory, but large total
-                mock_client.scheduler_info.return_value = {
-                    "workers": {
-                        "worker1": {"memory_limit": 100},      # Very small
-                        "worker2": {"memory_limit": 100},
-                        "worker3": {"memory_limit": 100},
-                        "worker4": {"memory_limit": 10 * 1024**3},  # Large total
-                    }
-                }
-                mock_get_client.return_value = mock_client
-                
-                # Write may fail for other reasons, but client.close() should be called
-                try:
-                    cmoriser_with_dataset.write()
-                except Exception:
-                    pass
-                
-                # Verify client.close() was called
-                mock_client.close.assert_called_once()
-
     # ==================== Import Error Handling Tests ====================
 
     @pytest.mark.unit
@@ -468,7 +375,7 @@ def test_write_handles_distributed_not_installed(
     ):
         """
         Test graceful handling when dask.distributed is not installed.
-        
+
         Scenario: dask.distributed import raises ImportError.
         Expected: Falls back to system memory check and proceeds.
         """
@@ -477,14 +384,14 @@ def test_write_handles_distributed_not_installed(
                 total=32 * 1024**3,
                 available=16 * 1024**3,
             )
-            
+
             # Mock ImportError when trying to import dask.distributed
             with patch(
                 "dask.distributed.get_client",
                 side_effect=ImportError("No module named 'distributed'"),
             ):
                 cmoriser_with_dataset.write()
-                
+
                 output_files = list(Path(temp_dir).glob("*.nc"))
                 assert len(output_files) == 1
 
@@ -496,7 +403,7 @@ def test_write_creates_correct_cmip6_filename(
     ):
         """
         Test that write() creates file with correct CMIP6 filename format.
-        
+
         Expected format: {var}_{table}_{source}_{exp}_{variant}_{grid}_{timerange}.nc
         Example: tas_Amon_ACCESS-ESM1-5_historical_r1i1p1f1_gn_200001-200012.nc
         """
@@ -505,17 +412,17 @@ def test_write_creates_correct_cmip6_filename(
                 total=32 * 1024**3,
                 available=16 * 1024**3,
             )
-            
+
             with patch(
                 "dask.distributed.get_client", side_effect=ValueError("No client")
             ):
                 cmoriser_with_dataset.write()
-                
+
                 output_files = list(Path(temp_dir).glob("*.nc"))
                 assert len(output_files) == 1
-                
+
                 filename = output_files[0].name
-                
+
                 # Check filename components
                 assert filename.startswith("tas_")
                 assert "_Amon_" in filename
@@ -531,7 +438,7 @@ def test_write_creates_valid_netcdf_structure(
     ):
         """
         Test that write() creates a valid NetCDF file with correct structure.
-        
+
         Verifies:
         - Required dimensions exist
         - Main variable exists with correct shape
@@ -542,28 +449,28 @@ def test_write_creates_valid_netcdf_structure(
                 total=32 * 1024**3,
                 available=16 * 1024**3,
             )
-            
+
             with patch(
                 "dask.distributed.get_client", side_effect=ValueError("No client")
             ):
                 cmoriser_with_dataset.write()
-                
+
                 output_files = list(Path(temp_dir).glob("*.nc"))
                 output_file = output_files[0]
-                
+
                 # Read back and verify structure
                 ds_out = xr.open_dataset(output_file)
-                
+
                 try:
                     # Check dimensions
                     assert "time" in ds_out.dims
                     assert "lat" in ds_out.dims
                     assert "lon" in ds_out.dims
-                    
+
                     # Check main variable
                     assert "tas" in ds_out.data_vars
                     assert ds_out["tas"].dims == ("time", "lat", "lon")
-                    
+
                     # Check global attributes
                     assert ds_out.attrs["variable_id"] == "tas"
                     assert ds_out.attrs["table_id"] == "Amon"
@@ -578,7 +485,7 @@ def test_write_creates_valid_netcdf_structure(
     def test_write_preserves_data_values(self, cmoriser_with_dataset, temp_dir):
         """
         Test that write() preserves data values correctly.
-        
+
         Verifies that data written to file matches original data.
         """
         with patch("psutil.virtual_memory") as mock_mem:
@@ -586,17 +493,17 @@ def test_write_preserves_data_values(self, cmoriser_with_dataset, temp_dir):
                 total=32 * 1024**3,
                 available=16 * 1024**3,
             )
-            
+
             with patch(
                 "dask.distributed.get_client", side_effect=ValueError("No client")
             ):
                 original_data = cmoriser_with_dataset.ds["tas"].values.copy()
-                
+
                 cmoriser_with_dataset.write()
-                
+
                 output_files = list(Path(temp_dir).glob("*.nc"))
                 ds_out = xr.open_dataset(output_files[0])
-                
+
                 try:
                     np.testing.assert_array_almost_equal(
                         ds_out["tas"].values, original_data
@@ -606,30 +513,6 @@ def test_write_preserves_data_values(self, cmoriser_with_dataset, temp_dir):
 
     # ==================== Logging Tests ====================
 
-    @pytest.mark.unit
-    def test_write_prints_memory_info(self, cmoriser_with_dataset, temp_dir, capsys):
-        """
-        Test that write() prints memory information to stdout.
-        
-        Expected output should contain data size and available memory info.
-        """
-        with patch("psutil.virtual_memory") as mock_mem:
-            mock_mem.return_value = MagicMock(
-                total=32 * 1024**3,
-                available=16 * 1024**3,
-            )
-            
-            with patch(
-                "dask.distributed.get_client", side_effect=ValueError("No client")
-            ):
-                cmoriser_with_dataset.write()
-                
-                captured = capsys.readouterr()
-                
-                assert "Data size:" in captured.out
-                assert "Available memory:" in captured.out
-                assert "GB" in captured.out
-
     @pytest.mark.unit
     def test_write_prints_output_path(self, cmoriser_with_dataset, temp_dir, capsys):
         """
@@ -640,46 +523,13 @@ def test_write_prints_output_path(self, cmoriser_with_dataset, temp_dir, capsys)
                 total=32 * 1024**3,
                 available=16 * 1024**3,
             )
-            
+
             with patch(
                 "dask.distributed.get_client", side_effect=ValueError("No client")
             ):
                 cmoriser_with_dataset.write()
-                
+
                 captured = capsys.readouterr()
-                
+
                 assert "CMORised output written to" in captured.out
                 assert str(temp_dir) in captured.out
-
-    @pytest.mark.unit
-    def test_write_prints_warning_when_closing_client(
-        self, cmoriser_with_dataset, capsys
-    ):
-        """
-        Test that write() prints a warning when closing Dask client.
-        """
-        with patch("psutil.virtual_memory") as mock_mem:
-            mock_mem.return_value = MagicMock(
-                total=32 * 1024**3,
-                available=16 * 1024**3,
-            )
-            
-            with patch("dask.distributed.get_client") as mock_get_client:
-                mock_client = MagicMock()
-                mock_client.scheduler_info.return_value = {
-                    "workers": {
-                        "worker1": {"memory_limit": 100},
-                        "worker2": {"memory_limit": 10 * 1024**3},
-                    }
-                }
-                mock_get_client.return_value = mock_client
-                
-                try:
-                    cmoriser_with_dataset.write()
-                except Exception:
-                    pass
-                
-                captured = capsys.readouterr()
-                
-                assert "Warning:" in captured.out
-                assert "Closing Dask client" in captured.out
\ No newline at end of file

From ea6e46d780e9f2234b5f3ccd7ceed8947ac330b7 Mon Sep 17 00:00:00 2001
From: rhaegar325 <rhaegar227@gmail.com>
Date: Fri, 5 Dec 2025 16:57:02 +1100
Subject: [PATCH 3/3] add distributed in pyproject.toml

---
 pyproject.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pyproject.toml b/pyproject.toml
index efe54fd..5a1e833 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ dependencies = [
     "netCDF4",
     "cftime",
     "dask",
+    "distributed>=2024.0.0",
     "pyyaml",
     "tqdm",
     "requests",