kedro-org · SajidAlamQB · Jan 8, 2024 · Jan 3, 2024 · Jan 3, 2024 · Jan 3, 2024
@@ -7,6 +7,7 @@
 * Allowed modern versions of JupyterLab and Jupyter Notebooks.
 * Removed setuptools dependency
 * Added `source_dir` explicitly in `pyproject.toml` for non-src layout project.
+* `MemoryDataset` entries are now included in free outputs.
 
 ## Breaking changes to the API
 * Added logging about not using async mode in `SequentiallRunner` and `ParallelRunner`.

@@ -94,9 +94,16 @@ def run(
                 f"Pipeline input(s) {unsatisfied} not found in the DataCatalog"
             )
 
+        # Identify MemoryDataset in the catalog
+        memory_datasets = {
+            ds_name
+            for ds_name, ds in catalog._datasets.items()
+            if isinstance(ds, MemoryDataset)
+        }
+
         # Check if there's any output datasets that aren't in the catalog and don't match a pattern
-        # in the catalog.
-        free_outputs = pipeline.outputs() - set(registered_ds)
+        # in the catalog and include MemoryDataset.
+        free_outputs = pipeline.outputs() - (set(registered_ds) - memory_datasets)
 
         # Register the default dataset pattern with the catalog
         catalog = catalog.shallow_copy(

@@ -165,3 +165,13 @@ def two_branches_crossed_pipeline():
             node(identity, "ds3_B", "ds4_B", name="node4_B"),
         ]
     )
+
+
+@pytest.fixture
+def pipeline_with_memory_datasets():
+    return pipeline(
+        [
+            node(func=identity, inputs="Input1", outputs="MemOutput1", name="node1"),
+            node(func=identity, inputs="Input2", outputs="MemOutput2", name="node2"),
+        ]
+    )
@@ -7,7 +7,13 @@
 import pytest
 
 from kedro.framework.hooks import _create_hook_manager
-from kedro.io import AbstractDataset, DataCatalog, DatasetError, LambdaDataset
+from kedro.io import (
+    AbstractDataset,
+    DataCatalog,
+    DatasetError,
+    LambdaDataset,
+    MemoryDataset,
+)
 from kedro.pipeline import node
 from kedro.pipeline.modular_pipeline import pipeline as modular_pipeline
 from kedro.runner import SequentialRunner
@@ -279,3 +285,29 @@ def test_suggest_resume_scenario(
                 hook_manager=_create_hook_manager(),
             )
         assert re.search(expected_pattern, caplog.text)
+
+
+class TestMemoryDatasetBehaviour:
+    def test_run_includes_memory_datasets(self, pipeline_with_memory_datasets):
+        # Create a catalog with MemoryDataset entries and inputs for the pipeline
+        catalog = DataCatalog(
+            {
+                "Input1": LambdaDataset(load=lambda: "data1", save=lambda data: None),
+                "Input2": LambdaDataset(load=lambda: "data2", save=lambda data: None),
+                "MemOutput1": MemoryDataset(),
+                "MemOutput2": MemoryDataset(),
+            }
+        )
+
+        # Add a regular dataset to the catalog
+        catalog.add("RegularOutput", LambdaDataset(None, None, lambda: True))
+
+        # Run the pipeline
+        output = SequentialRunner().run(pipeline_with_memory_datasets, catalog)
+
+        # Check that MemoryDataset outputs are included in the run results
+        assert "MemOutput1" in output
+        assert "MemOutput2" in output
+        assert (
+            "RegularOutput" not in output
+        )  # This output is registered in DataCatalog and so should not be in free outputs