From 6c4b21c4d203aba1ca0c110514d2d54c9b342057 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Thu, 21 Jul 2022 17:39:20 +0200
Subject: [PATCH 01/12] Cherry pick compact_json_dump and extra_info

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py |  75 ++++++++++++++---
 tests/unit/test_0Z_model_validation.py |  21 ++---
 tests/unit/test_manual_testing.py      | 109 ++++++++++++++++++++++++-
 tests/unit/utils.py                    |   5 +-
 4 files changed, 185 insertions(+), 25 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 56aec1f49..c9197d5de 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -8,7 +8,7 @@
 
 import json
 from pathlib import Path
-from typing import Dict, List, Union
+from typing import Any, Dict, List, IO, Optional, Union
 
 import numpy as np
 
@@ -34,7 +34,7 @@ def is_nan(data) -> bool:
 
 
 def convert_list_to_batch_data(
-    list_data: List[Dict[str, np.ndarray]]
+        list_data: List[Dict[str, np.ndarray]]
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert list of dataset to one single batch dataset
@@ -71,7 +71,7 @@ def convert_list_to_batch_data(
 
 
 def convert_python_to_numpy(
-    data: Union[Dict, List], data_type: str
+        data: Union[Dict, List], data_type: str
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert native python data to internal numpy
@@ -89,6 +89,8 @@ def convert_python_to_numpy(
             arr: np.ndarray = initialize_array(data_type, component_name, len(component_list))
             for i, component in enumerate(component_list):
                 for property_name, value in component.items():
+                    if property_name == "extra":
+                        continue
                     if property_name not in arr[i].dtype.names:
                         raise ValueError(f"Invalid property '{property_name}' for {component_name} {data_type} data.")
                     try:
@@ -107,7 +109,7 @@ def convert_python_to_numpy(
 
 
 def convert_batch_to_list_data(
-    batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+        batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> List[Dict[str, np.ndarray]]:
     """
     Convert list of dataset to one single batch dataset
@@ -132,14 +134,16 @@ def convert_batch_to_list_data(
         single_dataset = {}
         for key, batch in batch_data.items():
             if isinstance(batch, dict):
-                single_dataset[key] = batch["data"][batch["indptr"][i] : batch["indptr"][i + 1]]
+                single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]]
             else:
                 single_dataset[key] = batch[i, ...]
         list_data.append(single_dataset)
     return list_data
 
 
-def convert_numpy_to_python(data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]) -> Union[Dict, List]:
+def convert_numpy_to_python(
+        data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]:
     """
     Convert internal numpy arrays to native python data
     If an attribute is not available (NaN value), it will not be exported.
@@ -181,18 +185,67 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar
     return convert_python_to_numpy(json_data, data_type)
 
 
-def export_json_data(json_file: Path, data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], indent=2):
+def export_json_data(
+        json_file: Path,
+        data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
+        indent: Optional[int] = 2,
+        compact: bool = False,
+        extra_info: Optional[Dict[int, Any]] = None,
+):
     """
     export json data
     Args:
         json_file: path to json file
-        data: A single or batch dataset for power-grid-model
-        indent:
-            indent of the file, default 2
+        data: a single or batch dataset for power-grid-model
+        indent: indent of the file, default 2
+        compact: write components on a single line
+        extra_info: extra information (in any json-serializable format), indexed on the object ids
+                    e.g. a string representing the original id, or a dictionary storing even more information.
 
     Returns:
         Save to file
     """
     json_data = convert_numpy_to_python(data)
+
+    # Inject extra info
+    if extra_info is not None:
+        for component, objects in json_data.items():
+            for obj in objects:
+                if obj["id"] in extra_info:
+                    obj["extra"] = extra_info[obj["id"]]
+
     with open(json_file, mode="w", encoding="utf-8") as file_pointer:
-        json.dump(json_data, file_pointer, indent=indent)
+        if compact and indent:
+            max_level = 4 if isinstance(json_data, list) else 3
+            compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level)
+        else:
+            json.dump(json_data, file_pointer, indent=indent)
+
+
+def compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0):
+    tab = " " * level * indent
+    if level >= max_level:
+        io_stream.write(tab)
+        json.dump(data, io_stream, indent=None)
+    elif isinstance(data, list):
+        io_stream.write(tab + "[\n")
+        n_obj = len(data)
+        for i, obj in enumerate(data, start=1):
+            compact_json_dump(obj, io_stream, indent, max_level, level + 1)
+            io_stream.write(",\n" if i < n_obj else "\n")
+        io_stream.write(tab + "]")
+    elif isinstance(data, dict):
+        io_stream.write(tab + "{\n")
+        n_obj = len(data)
+        for i, (key, obj) in enumerate(data.items(), start=1):
+            if level == max_level - 1 or not isinstance(obj, (list, dict)):
+                io_stream.write(tab + " " * indent + f'"{key}": ')
+                json.dump(obj, io_stream, indent=None)
+            else:
+                io_stream.write(tab + " " * indent + f'"{key}":\n')
+                compact_json_dump(obj, io_stream, indent, max_level, level + 2)
+            io_stream.write(",\n" if i < n_obj else "\n")
+        io_stream.write(tab + "}")
+    else:
+        io_stream.write(tab)
+        json.dump(data, io_stream, indent=None)
diff --git a/tests/unit/test_0Z_model_validation.py b/tests/unit/test_0Z_model_validation.py
index e6676dde8..f6cf5d549 100644
--- a/tests/unit/test_0Z_model_validation.py
+++ b/tests/unit/test_0Z_model_validation.py
@@ -23,7 +23,8 @@
     pytest_cases(get_batch_cases=False),
 )
 def test_single_validation(
-    case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float, atol: float
+        case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float,
+        atol: float
 ):
     # Initialization
     case_data = import_case_data(case_path, sym=sym)
@@ -56,15 +57,15 @@ def test_single_validation(
     pytest_cases(get_batch_cases=True),
 )
 def test_batch_validation(
-    case_id: str,
-    case_path: Path,
-    sym: bool,
-    calculation_type: str,
-    calculation_method: str,
-    rtol: float,
-    atol: float,
-    independent: bool,
-    cache_topology: bool,
+        case_id: str,
+        case_path: Path,
+        sym: bool,
+        calculation_type: str,
+        calculation_method: str,
+        rtol: float,
+        atol: float,
+        independent: bool,
+        cache_topology: bool,
 ):
     # Initialization
     case_data = import_case_data(case_path, sym=sym)
diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py
index e9de242c0..82a24bd71 100644
--- a/tests/unit/test_manual_testing.py
+++ b/tests/unit/test_manual_testing.py
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: MPL-2.0
 
+import io
 from pathlib import Path
 from unittest.mock import patch, mock_open, MagicMock
 
@@ -13,6 +14,7 @@
     convert_python_to_numpy,
     export_json_data,
     is_nan,
+    compact_json_dump,
 )
 
 
@@ -75,9 +77,9 @@ def test_is_nan():
     assert is_nan(single_value)
     array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8"))
     assert not is_nan(array_f8)
-    array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4"))
+    array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4"))
     assert not is_nan(array_i4)
-    array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1"))
+    array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1"))
     assert not is_nan(array_i1)
     nan_array = np.array([np.nan, np.nan, np.nan])
     assert is_nan(nan_array)
@@ -132,3 +134,106 @@ def test_export_json_data(convert_mock: MagicMock, open_mock: MagicMock, json_du
     export_json_data(json_file=Path("output.json"), data={}, indent=2)
     convert_mock.assert_called_once()
     json_dump_mock.assert_called_once_with({"foo": [{"val": 123}]}, open_mock(), indent=2)
+
+
+def test_compact_json_dump():
+    data = {
+        "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
+        "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}],
+    }
+
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=0)
+    assert string_stream.getvalue() == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
+
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=1)
+    assert string_stream.getvalue() == """{
+  "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
+  "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]
+}"""
+
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=2)
+    assert string_stream.getvalue() == """{
+  "node":
+    [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
+  "line":
+    [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]
+}"""
+
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=3)
+    assert string_stream.getvalue() == """{
+  "node":
+    [
+      {"id": 1, "x": 2},
+      {"id": 3, "x": 4}
+    ],
+  "line":
+    [
+      {"id": 5, "x": 6},
+      {"id": 7, "x": {"y": 8.1, "z": 8.2}}
+    ]
+}"""
+
+
+def test_compact_json_dump_string():
+    data = "test"
+
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=2)
+    assert string_stream.getvalue() == "\"test\""
+
+
+def test_compact_json_dump_deep():
+    data = {
+        "foo": 1,
+        "bar": {"x": 2, "y": 3},
+    }
+
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=10)
+    assert string_stream.getvalue() == """{
+  "foo": 1,
+  "bar":
+    {
+      "x": 2,
+      "y": 3
+    }
+}"""
+
+
+def test_compact_json_dump_batch():
+    data = [
+        {
+            "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
+            "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}],
+        },
+        {
+            "line": [{"id": 9, "x": 10}, {"id": 11, "x": 12}],
+        },
+    ]
+    string_stream = io.StringIO()
+    compact_json_dump(data, string_stream, indent=2, max_level=4)
+    assert string_stream.getvalue() == """[
+  {
+    "node":
+      [
+        {"id": 1, "x": 2},
+        {"id": 3, "x": 4}
+      ],
+    "line":
+      [
+        {"id": 5, "x": 6},
+        {"id": 7, "x": {"y": 8.1, "z": 8.2}}
+      ]
+  },
+  {
+    "line":
+      [
+        {"id": 9, "x": 10},
+        {"id": 11, "x": 12}
+      ]
+  }
+]"""
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index b0d7aa2d4..a905d5186 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -16,7 +16,7 @@
 DATA_PATH = BASE_PATH / "data"
 OUPUT_PATH = BASE_PATH / "output"
 EXPORT_OUTPUT = ("POWER_GRID_MODEL_VALIDATION_TEST_EXPORT" in os.environ) and (
-    os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON"
+        os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON"
 )
 
 
@@ -108,7 +108,8 @@ def save_json_data(json_file: str, data: Union[dict, list]):
 
 
 def compare_result(
-    actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, atol: Union[float, Dict[str, float]]
+        actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float,
+        atol: Union[float, Dict[str, float]]
 ):
     for key, expected_data in expected.items():
         for col_name in expected_data.dtype.names:

From 48efacab528485f143a83872afeb7dd5df122c76 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Thu, 21 Jul 2022 17:43:20 +0200
Subject: [PATCH 02/12] Black formatting

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 20 +++++++-------
 tests/unit/test_0Z_model_validation.py | 21 +++++++--------
 tests/unit/test_manual_testing.py      | 36 +++++++++++++++++++-------
 tests/unit/utils.py                    |  5 ++--
 4 files changed, 49 insertions(+), 33 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index c9197d5de..7375e0d0e 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -34,7 +34,7 @@ def is_nan(data) -> bool:
 
 
 def convert_list_to_batch_data(
-        list_data: List[Dict[str, np.ndarray]]
+    list_data: List[Dict[str, np.ndarray]]
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert list of dataset to one single batch dataset
@@ -71,7 +71,7 @@ def convert_list_to_batch_data(
 
 
 def convert_python_to_numpy(
-        data: Union[Dict, List], data_type: str
+    data: Union[Dict, List], data_type: str
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert native python data to internal numpy
@@ -109,7 +109,7 @@ def convert_python_to_numpy(
 
 
 def convert_batch_to_list_data(
-        batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+    batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> List[Dict[str, np.ndarray]]:
     """
     Convert list of dataset to one single batch dataset
@@ -134,7 +134,7 @@ def convert_batch_to_list_data(
         single_dataset = {}
         for key, batch in batch_data.items():
             if isinstance(batch, dict):
-                single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]]
+                single_dataset[key] = batch["data"][batch["indptr"][i] : batch["indptr"][i + 1]]
             else:
                 single_dataset[key] = batch[i, ...]
         list_data.append(single_dataset)
@@ -142,7 +142,7 @@ def convert_batch_to_list_data(
 
 
 def convert_numpy_to_python(
-        data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+    data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]:
     """
     Convert internal numpy arrays to native python data
@@ -186,11 +186,11 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar
 
 
 def export_json_data(
-        json_file: Path,
-        data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
-        indent: Optional[int] = 2,
-        compact: bool = False,
-        extra_info: Optional[Dict[int, Any]] = None,
+    json_file: Path,
+    data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
+    indent: Optional[int] = 2,
+    compact: bool = False,
+    extra_info: Optional[Dict[int, Any]] = None,
 ):
     """
     export json data
diff --git a/tests/unit/test_0Z_model_validation.py b/tests/unit/test_0Z_model_validation.py
index f6cf5d549..e6676dde8 100644
--- a/tests/unit/test_0Z_model_validation.py
+++ b/tests/unit/test_0Z_model_validation.py
@@ -23,8 +23,7 @@
     pytest_cases(get_batch_cases=False),
 )
 def test_single_validation(
-        case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float,
-        atol: float
+    case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float, atol: float
 ):
     # Initialization
     case_data = import_case_data(case_path, sym=sym)
@@ -57,15 +56,15 @@ def test_single_validation(
     pytest_cases(get_batch_cases=True),
 )
 def test_batch_validation(
-        case_id: str,
-        case_path: Path,
-        sym: bool,
-        calculation_type: str,
-        calculation_method: str,
-        rtol: float,
-        atol: float,
-        independent: bool,
-        cache_topology: bool,
+    case_id: str,
+    case_path: Path,
+    sym: bool,
+    calculation_type: str,
+    calculation_method: str,
+    rtol: float,
+    atol: float,
+    independent: bool,
+    cache_topology: bool,
 ):
     # Initialization
     case_data = import_case_data(case_path, sym=sym)
diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py
index 82a24bd71..8e4235dfb 100644
--- a/tests/unit/test_manual_testing.py
+++ b/tests/unit/test_manual_testing.py
@@ -77,9 +77,9 @@ def test_is_nan():
     assert is_nan(single_value)
     array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8"))
     assert not is_nan(array_f8)
-    array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4"))
+    array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4"))
     assert not is_nan(array_i4)
-    array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1"))
+    array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1"))
     assert not is_nan(array_i1)
     nan_array = np.array([np.nan, np.nan, np.nan])
     assert is_nan(nan_array)
@@ -144,27 +144,38 @@ def test_compact_json_dump():
 
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=0)
-    assert string_stream.getvalue() == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
+    assert (
+        string_stream.getvalue()
+        == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
+    )
 
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=1)
-    assert string_stream.getvalue() == """{
+    assert (
+        string_stream.getvalue()
+        == """{
   "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
   "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]
 }"""
+    )
 
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=2)
-    assert string_stream.getvalue() == """{
+    assert (
+        string_stream.getvalue()
+        == """{
   "node":
     [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
   "line":
     [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]
 }"""
+    )
 
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=3)
-    assert string_stream.getvalue() == """{
+    assert (
+        string_stream.getvalue()
+        == """{
   "node":
     [
       {"id": 1, "x": 2},
@@ -176,6 +187,7 @@ def test_compact_json_dump():
       {"id": 7, "x": {"y": 8.1, "z": 8.2}}
     ]
 }"""
+    )
 
 
 def test_compact_json_dump_string():
@@ -183,7 +195,7 @@ def test_compact_json_dump_string():
 
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=2)
-    assert string_stream.getvalue() == "\"test\""
+    assert string_stream.getvalue() == '"test"'
 
 
 def test_compact_json_dump_deep():
@@ -194,7 +206,9 @@ def test_compact_json_dump_deep():
 
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=10)
-    assert string_stream.getvalue() == """{
+    assert (
+        string_stream.getvalue()
+        == """{
   "foo": 1,
   "bar":
     {
@@ -202,6 +216,7 @@ def test_compact_json_dump_deep():
       "y": 3
     }
 }"""
+    )
 
 
 def test_compact_json_dump_batch():
@@ -216,7 +231,9 @@ def test_compact_json_dump_batch():
     ]
     string_stream = io.StringIO()
     compact_json_dump(data, string_stream, indent=2, max_level=4)
-    assert string_stream.getvalue() == """[
+    assert (
+        string_stream.getvalue()
+        == """[
   {
     "node":
       [
@@ -237,3 +254,4 @@ def test_compact_json_dump_batch():
       ]
   }
 ]"""
+    )
diff --git a/tests/unit/utils.py b/tests/unit/utils.py
index a905d5186..b0d7aa2d4 100644
--- a/tests/unit/utils.py
+++ b/tests/unit/utils.py
@@ -16,7 +16,7 @@
 DATA_PATH = BASE_PATH / "data"
 OUPUT_PATH = BASE_PATH / "output"
 EXPORT_OUTPUT = ("POWER_GRID_MODEL_VALIDATION_TEST_EXPORT" in os.environ) and (
-        os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON"
+    os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON"
 )
 
 
@@ -108,8 +108,7 @@ def save_json_data(json_file: str, data: Union[dict, list]):
 
 
 def compare_result(
-        actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float,
-        atol: Union[float, Dict[str, float]]
+    actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, atol: Union[float, Dict[str, float]]
 ):
     for key, expected_data in expected.items():
         for col_name in expected_data.dtype.names:

From 5409cd50410cad714886e2ebacd2d5528865118e Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 11:11:09 +0200
Subject: [PATCH 03/12] Comments and restructure convert_list_to_batch_data

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 61 ++++++++++++++++----------
 1 file changed, 37 insertions(+), 24 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 7375e0d0e..0854bcd66 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -34,12 +34,12 @@ def is_nan(data) -> bool:
 
 
 def convert_list_to_batch_data(
-    list_data: List[Dict[str, np.ndarray]]
+        datasets: List[Dict[str, np.ndarray]]
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
-    Convert list of dataset to one single batch dataset
+    Convert a list of datasets to one single batch dataset
     Args:
-        list_data: list of dataset
+        datasets: list of dataset
 
     Returns:
         batch dataset
@@ -48,30 +48,43 @@ def convert_list_to_batch_data(
     """
 
     # List all *unique* types
-    all_types = list({x for single_batch in list_data for x in single_batch.keys()})
+    components = {x for dataset in datasets for x in dataset.keys()}
 
     batch_data = {}
-    for comp_type in all_types:
-        # use 2D array if the type exists in all single dataset and the size is the same
-        if np.all([comp_type in x for x in list_data]) and np.unique([x[comp_type].size for x in list_data]).size == 1:
-            batch_data[comp_type] = np.stack([x[comp_type] for x in list_data], axis=0)
+    for component in components:
+
+        # Create a 2D array if the component exists in all datasets and number of objects is the same in each dataset
+        comp_exists_in_all_datasets = all(component in x for x in datasets)
+        all_sizes_are_the_same = lambda: all(x[component].size == datasets[0][component].size for x in datasets)
+        if comp_exists_in_all_datasets and all_sizes_are_the_same():
+            batch_data[component] = np.stack([x[component] for x in datasets], axis=0)
             continue
+
         # otherwise use indptr/data dict
         indptr = [0]
         data = []
-        for single_batch in list_data:
-            if comp_type not in single_batch:
-                indptr.append(indptr[-1])
+        for dataset in datasets:
+
+            # If the current dataset contains the component, increase the indptr for this batch and append the data
+            if component in dataset:
+                objects = dataset[component]
+                indptr.append(indptr[-1] + len(objects))
+                data.append(objects)
+
+            # If the current dataset does not contain the component, add the last indptr again.
             else:
-                single_data = single_batch[comp_type]
-                indptr.append(indptr[-1] + single_data.shape[0])
-                data.append(single_data)
-        batch_data[comp_type] = {"indptr": np.array(indptr, dtype=np.int32), "data": np.concatenate(data, axis=0)}
+                indptr.append(indptr[-1])
+
+            # Convert the index pointers to a numpy array and combine the list of object numpy arrays into a singe
+            # numpy array. All objects of all batches are now stores in one large array, the index pointers define
+            # which elemets of the array (rows) belong to which batch.
+            batch_data[component] = {"indptr": np.array(indptr, dtype=np.int32), "data": np.concatenate(data, axis=0)}
+
     return batch_data
 
 
 def convert_python_to_numpy(
-    data: Union[Dict, List], data_type: str
+        data: Union[Dict, List], data_type: str
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert native python data to internal numpy
@@ -109,7 +122,7 @@ def convert_python_to_numpy(
 
 
 def convert_batch_to_list_data(
-    batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+        batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> List[Dict[str, np.ndarray]]:
     """
     Convert list of dataset to one single batch dataset
@@ -134,7 +147,7 @@ def convert_batch_to_list_data(
         single_dataset = {}
         for key, batch in batch_data.items():
             if isinstance(batch, dict):
-                single_dataset[key] = batch["data"][batch["indptr"][i] : batch["indptr"][i + 1]]
+                single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]]
             else:
                 single_dataset[key] = batch[i, ...]
         list_data.append(single_dataset)
@@ -142,7 +155,7 @@ def convert_batch_to_list_data(
 
 
 def convert_numpy_to_python(
-    data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+        data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]:
     """
     Convert internal numpy arrays to native python data
@@ -186,11 +199,11 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar
 
 
 def export_json_data(
-    json_file: Path,
-    data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
-    indent: Optional[int] = 2,
-    compact: bool = False,
-    extra_info: Optional[Dict[int, Any]] = None,
+        json_file: Path,
+        data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
+        indent: Optional[int] = 2,
+        compact: bool = False,
+        extra_info: Optional[Dict[int, Any]] = None,
 ):
     """
     export json data

From 65b086064f1e145d6093c7a21bd0c5296303f2d4 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 11:11:27 +0200
Subject: [PATCH 04/12] Comments and restructure convert_batch_to_list_data

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 63 ++++++++++++++++++--------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 0854bcd66..21b2d5442 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -38,6 +38,11 @@ def convert_list_to_batch_data(
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert a list of datasets to one single batch dataset
+
+    Example data formats:
+        input:  [{"node": <1d-array>, "line": <1d-array>}, {"node": <1d-array>, "line": <1d-array>}]
+        output: {"node": <2d-array>, "line": <2d-array>}
+         -or-:  {"indptr": <1d-array>, "data": <1d-array>}
     Args:
         datasets: list of dataset
 
@@ -65,14 +70,14 @@ def convert_list_to_batch_data(
         data = []
         for dataset in datasets:
 
-            # If the current dataset contains the component, increase the indptr for this batch and append the data
             if component in dataset:
+                # If the current dataset contains the component, increase the indptr for this batch and append the data
                 objects = dataset[component]
                 indptr.append(indptr[-1] + len(objects))
                 data.append(objects)
 
-            # If the current dataset does not contain the component, add the last indptr again.
             else:
+                # If the current dataset does not contain the component, add the last indptr again.
                 indptr.append(indptr[-1])
 
             # Convert the index pointers to a numpy array and combine the list of object numpy arrays into a singe
@@ -132,25 +137,45 @@ def convert_batch_to_list_data(
     Returns:
         list of single dataset
     """
-    list_data = []
-    # return empty list
+
+    # If the batch data is empty, return an empty list
     if not batch_data:
-        return list_data
-    # get n_batch
-    one_data = next(iter(batch_data.values()))
-    if isinstance(one_data, dict):
-        n_batch = one_data["indptr"].size - 1
+        return []
+
+    # Get the data for an arbitrary component; assuming that the number of batches of each component is the same.
+    # The structure may differ per component
+    example_batch_data = next(iter(batch_data.values()))
+
+    if isinstance(example_batch_data, np.ndarray):
+        # We expect the batch data to be a 2d numpy array of n_batches x n_objects
+        if len(example_batch_data.shape) != 2:
+            raise ValueError("Invalid batch data format")
+        n_batches = example_batch_data.shape[0]
+    elif isinstance(example_batch_data, dict):
+        # If the batch data is a dictionary, we assume that it is an indptr/data structure (otherwise it is an
+        # invalid dictionary). There is always one indptr more than there are batches.
+        if "indptr" not in example_batch_data:
+            raise ValueError("Invalid batch data format")
+        n_batches = example_batch_data["indptr"].size - 1
     else:
-        n_batch = one_data.shape[0]
-    # convert
-    for i in range(n_batch):
-        single_dataset = {}
-        for key, batch in batch_data.items():
-            if isinstance(batch, dict):
-                single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]]
-            else:
-                single_dataset[key] = batch[i, ...]
-        list_data.append(single_dataset)
+        # If the batch data is not a numpy array and not a dictionary, it is invalid
+        raise ValueError("Invalid batch data format")
+
+    # Initialize an empty list with dictionaries
+    # Note that [{}] * n_batches would result in n copies of the same dict.
+    list_data = [{} for _ in range(n_batches)]
+
+    # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data)
+    # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly.
+    for component, data in batch_data.items():
+        if isinstance(data, np.ndarray):
+            # For 2d numpy arrays, copy each batch into an element of the list
+            for i, batch in enumerate(data):
+                list_data[i][component] = batch
+        else:
+            # For indptr/data structures,
+            for i, (idx0, idx1) in enumerate(zip(data["indptr"][:-1], data["indptr"][1:])):
+                list_data[i][component] = data["data"][idx0:idx1]
     return list_data
 
 

From 5c501f375e3e7a6ad788fdfb6cf42fcfd956bd7b Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 11:32:08 +0200
Subject: [PATCH 05/12] Comments and restructure convert_python_to_numpy

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 53 +++++++++++++++++---------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 21b2d5442..72dae6b93 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -101,29 +101,46 @@ def convert_python_to_numpy(
         A single or batch dataset for power-grid-model
 
     """
-    if isinstance(data, dict):
-        return_dict = {}
-        for component_name, component_list in data.items():
-            arr: np.ndarray = initialize_array(data_type, component_name, len(component_list))
-            for i, component in enumerate(component_list):
-                for property_name, value in component.items():
-                    if property_name == "extra":
-                        continue
-                    if property_name not in arr[i].dtype.names:
-                        raise ValueError(f"Invalid property '{property_name}' for {component_name} {data_type} data.")
-                    try:
-                        arr[i][property_name] = value
-                    except ValueError as ex:
-                        raise ValueError(f"Invalid '{property_name}' value for {component_name} {data_type} data: {ex}")
-
-            return_dict[component_name] = arr
-        return return_dict
 
+    # If the inpute data is a list, we are dealing with batch data. Each element in the list is a batch. We'll
+    # first convert each batch seperately, by recusively calling this function for each batch. Then the numpy
+    # data for all batches in converted into a proper and compact numpy structure.
     if isinstance(data, list):
         list_data = [convert_python_to_numpy(json_dict, data_type=data_type) for json_dict in data]
         return convert_list_to_batch_data(list_data)
 
-    raise TypeError("Only list or dict is allowed in JSON data!")
+    # This should be a normal (non-batch) structure, with a list of objects (dictionaries) per component.
+    if not isinstance(data, dict):
+        raise TypeError("Only list or dict is allowed in JSON data!")
+
+    dataset: Dict[str, np.ndarray] = {}
+    for component, objects in data.items():
+
+        # We'll initialize an 1d-array with NaN values for all the objects of this component type
+        dataset[component] = initialize_array(data_type, component, len(objects))
+
+        for i, obj in enumerate(objects):
+            # As each object is a separate dictionary, and the properties may differ per object, we need to check
+            # all properties. Non-existing properties
+            for property, value in obj.items():
+                if property == "extra":
+                    # The "extra" property is a special one. It can store any type of information associated with
+                    # an object, but it will not be used in the calculations. Therefore it is not included in the
+                    # numpy array, so we can skip this property
+                    continue
+
+                if property not in dataset[component].dtype.names:
+                    # If a property doen't exist, the user made a mistake. Let's be merciless in that case,
+                    # for their own good.
+                    raise ValueError(f"Invalid property '{property}' for {component} {data_type} data.")
+
+                # Now just assign the value and raise an error if the value cannot be stored in the specific
+                # numpy array data format for this property.
+                try:
+                    dataset[component][i][property] = value
+                except ValueError as ex:
+                    raise ValueError(f"Invalid '{property}' value for {component} {data_type} data: {ex}")
+    return dataset
 
 
 def convert_batch_to_list_data(

From 89c458d66eb78e322fb91337bad2d043336bb55c Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 11:45:06 +0200
Subject: [PATCH 06/12] Comments and restructure convert_numpy_to_python

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 37 ++++++++++++++++++--------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 72dae6b93..210840fcb 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -209,18 +209,33 @@ def convert_numpy_to_python(
         A json list for batch dataset
 
     """
-    # check the dataset is single or batch
-    if data:
-        one_data = next(iter(data.values()))
-        # it is batch dataset if it is 2D array of a dict of indptr/data
-        if isinstance(one_data, dict) or one_data.ndim == 2:
-            list_data = convert_batch_to_list_data(data)
-            return [convert_numpy_to_python(x) for x in list_data]
-    # otherwise it is single dataset
-    single_dataset: Dict[str, np.ndarray] = data
+    # Check if the dataset is a single dataset or batch dataset
+    # It is batch dataset if it is 2D array or a indptr/data structure
+    example_data = next(iter(data.values()))
+    is_dense_batch = isinstance(example_data, np.ndarray) and example_data.ndim == 2
+    is_sparse_batch = isinstance(example_data, dict) and "indptr" in example_data and "data" in example_data
+
+    # If it is a batch, convert the batch data to a list of batches, then convert each batch individually.
+    if is_dense_batch or is_sparse_batch:
+        list_data = convert_batch_to_list_data(data)
+        return [convert_numpy_to_python(x) for x in list_data]
+
+    # Otherwise it should be a single data set
+    if not isinstance(example_data, np.ndarray) or example_data.ndim != 1:
+        raise ValueError("Invalid data format")
+
+    # Convert each numpy array to a list of objects, which contains only the non-NaN properties:
+    # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]}
     return {
-        name: [{k: item[k].tolist() for k in array.dtype.names if not is_nan(item[k])} for item in array]
-        for name, array in single_dataset.items()
+        component: [
+            {
+                property: obj[property].tolist()
+                for property in objects.dtype.names
+                if not is_nan(obj[property])
+            }
+            for obj in objects
+        ]
+        for component, objects in data.items()
     }
 
 

From 049105ec3aa7a50438feb10ea7998ed7f21b6139 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 11:53:31 +0200
Subject: [PATCH 07/12] Black formatting

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 210840fcb..96786c649 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -34,7 +34,7 @@ def is_nan(data) -> bool:
 
 
 def convert_list_to_batch_data(
-        datasets: List[Dict[str, np.ndarray]]
+    datasets: List[Dict[str, np.ndarray]]
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert a list of datasets to one single batch dataset
@@ -89,7 +89,7 @@ def convert_list_to_batch_data(
 
 
 def convert_python_to_numpy(
-        data: Union[Dict, List], data_type: str
+    data: Union[Dict, List], data_type: str
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert native python data to internal numpy
@@ -144,7 +144,7 @@ def convert_python_to_numpy(
 
 
 def convert_batch_to_list_data(
-        batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+    batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> List[Dict[str, np.ndarray]]:
     """
     Convert list of dataset to one single batch dataset
@@ -197,7 +197,7 @@ def convert_batch_to_list_data(
 
 
 def convert_numpy_to_python(
-        data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
+    data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]
 ) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]:
     """
     Convert internal numpy arrays to native python data
@@ -228,11 +228,7 @@ def convert_numpy_to_python(
     # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]}
     return {
         component: [
-            {
-                property: obj[property].tolist()
-                for property in objects.dtype.names
-                if not is_nan(obj[property])
-            }
+            {property: obj[property].tolist() for property in objects.dtype.names if not is_nan(obj[property])}
             for obj in objects
         ]
         for component, objects in data.items()
@@ -256,11 +252,11 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar
 
 
 def export_json_data(
-        json_file: Path,
-        data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
-        indent: Optional[int] = 2,
-        compact: bool = False,
-        extra_info: Optional[Dict[int, Any]] = None,
+    json_file: Path,
+    data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
+    indent: Optional[int] = 2,
+    compact: bool = False,
+    extra_info: Optional[Dict[int, Any]] = None,
 ):
     """
     export json data

From e72e5fd3d51f72184fa62341a191d45aaf0ff7d5 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 12:23:47 +0200
Subject: [PATCH 08/12] Comments and restructure _compact_json_dump

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 80 +++++++++++++++++++-------
 tests/unit/test_manual_testing.py      | 44 +++++++-------
 2 files changed, 80 insertions(+), 44 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 96786c649..008c693d3 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -282,36 +282,72 @@ def export_json_data(
 
     with open(json_file, mode="w", encoding="utf-8") as file_pointer:
         if compact and indent:
-            max_level = 4 if isinstance(json_data, list) else 3
-            compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level)
+            is_batch_data = isinstance(json_data, list)
+            max_level = 4 if is_batch_data else 3
+            _compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level)
         else:
             json.dump(json_data, file_pointer, indent=indent)
 
 
-def compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0):
+def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0):
+    """Custom compact JSON writer that is intended to put data belonging to a single object on a single line.
+
+    For example:
+    {
+        "node": [
+            {"id": 0, "u_rated": 10500.0, "extra": {"original_id": 123}},
+            {"id": 1, "u_rated": 10500.0, "extra": {"original_id": 456}},
+        ],
+        "line": [
+            {"id": 0, "node_from": 0, "node_to": 1, ...}
+        ]
+    }
+
+    The function is being called recursively, starting at level 0 and recursing until max_level is reached. It is
+    basically a full json writer, but for efficiency reasons, on the last levels the native json.dump method is used.
+    """
+
+    # Let's define a 'tab' indent, depending on the level
     tab = " " * level * indent
-    if level >= max_level:
+
+    # If we are at the max_level, or the data simply doesn't contain any more levels, write the indent and serialize
+    # the data on a single line.
+    if level >= max_level or not isinstance(data, (list, dict)):
         io_stream.write(tab)
         json.dump(data, io_stream, indent=None)
-    elif isinstance(data, list):
+        return
+
+    # We'll need the number of objects later on
+    n_obj = len(data)
+
+    # If the data is a list:
+    # 1. start with an opening bracket
+    # 2. dump each element in the list
+    # 3. add a comma and a new line after each element, except for the last element, there we don't need a comma.
+    # 4. finish with a closing bracket
+    if isinstance(data, list):
         io_stream.write(tab + "[\n")
-        n_obj = len(data)
         for i, obj in enumerate(data, start=1):
-            compact_json_dump(obj, io_stream, indent, max_level, level + 1)
+            _compact_json_dump(obj, io_stream, indent, max_level, level + 1)
             io_stream.write(",\n" if i < n_obj else "\n")
         io_stream.write(tab + "]")
-    elif isinstance(data, dict):
-        io_stream.write(tab + "{\n")
-        n_obj = len(data)
-        for i, (key, obj) in enumerate(data.items(), start=1):
-            if level == max_level - 1 or not isinstance(obj, (list, dict)):
-                io_stream.write(tab + " " * indent + f'"{key}": ')
-                json.dump(obj, io_stream, indent=None)
-            else:
-                io_stream.write(tab + " " * indent + f'"{key}":\n')
-                compact_json_dump(obj, io_stream, indent, max_level, level + 2)
-            io_stream.write(",\n" if i < n_obj else "\n")
-        io_stream.write(tab + "}")
-    else:
-        io_stream.write(tab)
-        json.dump(data, io_stream, indent=None)
+        return
+
+    # If the data is a dictionary:
+    # 1. start with an opening curly bracket
+    # 2. for each element: write it's key, plus a colon ':'
+    # 3. if the next level would be the max_level, add a space and dump the element on a single,
+    #    else add a new line before dumping the element recursively.
+    # 4. add a comma and a new line after each element, except for the last element, there we don't need a comma.
+    # 5. finish with a closing curly bracket
+    io_stream.write(tab + "{\n")
+    for i, (key, obj) in enumerate(data.items(), start=1):
+        io_stream.write(tab + " " * indent + f'"{key}":')
+        if level == max_level - 1 or not isinstance(obj, (list, dict)):
+            io_stream.write(" ")
+            json.dump(obj, io_stream, indent=None)
+        else:
+            io_stream.write("\n")
+            _compact_json_dump(obj, io_stream, indent, max_level, level + 2)
+        io_stream.write(",\n" if i < n_obj else "\n")
+    io_stream.write(tab + "}")
diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py
index 8e4235dfb..3ad531718 100644
--- a/tests/unit/test_manual_testing.py
+++ b/tests/unit/test_manual_testing.py
@@ -14,7 +14,7 @@
     convert_python_to_numpy,
     export_json_data,
     is_nan,
-    compact_json_dump,
+    _compact_json_dump,
 )
 
 
@@ -77,9 +77,9 @@ def test_is_nan():
     assert is_nan(single_value)
     array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8"))
     assert not is_nan(array_f8)
-    array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4"))
+    array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4"))
     assert not is_nan(array_i4)
-    array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1"))
+    array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1"))
     assert not is_nan(array_i1)
     nan_array = np.array([np.nan, np.nan, np.nan])
     assert is_nan(nan_array)
@@ -143,27 +143,27 @@ def test_compact_json_dump():
     }
 
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=0)
+    _compact_json_dump(data, string_stream, indent=2, max_level=0)
     assert (
-        string_stream.getvalue()
-        == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
+            string_stream.getvalue()
+            == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
     )
 
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=1)
+    _compact_json_dump(data, string_stream, indent=2, max_level=1)
     assert (
-        string_stream.getvalue()
-        == """{
+            string_stream.getvalue()
+            == """{
   "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
   "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]
 }"""
     )
 
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=2)
+    _compact_json_dump(data, string_stream, indent=2, max_level=2)
     assert (
-        string_stream.getvalue()
-        == """{
+            string_stream.getvalue()
+            == """{
   "node":
     [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
   "line":
@@ -172,10 +172,10 @@ def test_compact_json_dump():
     )
 
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=3)
+    _compact_json_dump(data, string_stream, indent=2, max_level=3)
     assert (
-        string_stream.getvalue()
-        == """{
+            string_stream.getvalue()
+            == """{
   "node":
     [
       {"id": 1, "x": 2},
@@ -194,7 +194,7 @@ def test_compact_json_dump_string():
     data = "test"
 
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=2)
+    _compact_json_dump(data, string_stream, indent=2, max_level=2)
     assert string_stream.getvalue() == '"test"'
 
 
@@ -205,10 +205,10 @@ def test_compact_json_dump_deep():
     }
 
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=10)
+    _compact_json_dump(data, string_stream, indent=2, max_level=10)
     assert (
-        string_stream.getvalue()
-        == """{
+            string_stream.getvalue()
+            == """{
   "foo": 1,
   "bar":
     {
@@ -230,10 +230,10 @@ def test_compact_json_dump_batch():
         },
     ]
     string_stream = io.StringIO()
-    compact_json_dump(data, string_stream, indent=2, max_level=4)
+    _compact_json_dump(data, string_stream, indent=2, max_level=4)
     assert (
-        string_stream.getvalue()
-        == """[
+            string_stream.getvalue()
+            == """[
   {
     "node":
       [

From 651119b7ce4ce1a338900e9d9076781f7fbe4f65 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 12:27:21 +0200
Subject: [PATCH 09/12] Undo accidental rename of function argument

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 008c693d3..d51790843 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -34,7 +34,7 @@ def is_nan(data) -> bool:
 
 
 def convert_list_to_batch_data(
-    datasets: List[Dict[str, np.ndarray]]
+    list_data: List[Dict[str, np.ndarray]]
 ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]:
     """
     Convert a list of datasets to one single batch dataset
@@ -44,7 +44,7 @@ def convert_list_to_batch_data(
         output: {"node": <2d-array>, "line": <2d-array>}
          -or-:  {"indptr": <1d-array>, "data": <1d-array>}
     Args:
-        datasets: list of dataset
+        list_data: list of dataset
 
     Returns:
         batch dataset
@@ -53,22 +53,22 @@ def convert_list_to_batch_data(
     """
 
     # List all *unique* types
-    components = {x for dataset in datasets for x in dataset.keys()}
+    components = {x for dataset in list_data for x in dataset.keys()}
 
     batch_data = {}
     for component in components:
 
         # Create a 2D array if the component exists in all datasets and number of objects is the same in each dataset
-        comp_exists_in_all_datasets = all(component in x for x in datasets)
-        all_sizes_are_the_same = lambda: all(x[component].size == datasets[0][component].size for x in datasets)
+        comp_exists_in_all_datasets = all(component in x for x in list_data)
+        all_sizes_are_the_same = lambda: all(x[component].size == list_data[0][component].size for x in list_data)
         if comp_exists_in_all_datasets and all_sizes_are_the_same():
-            batch_data[component] = np.stack([x[component] for x in datasets], axis=0)
+            batch_data[component] = np.stack([x[component] for x in list_data], axis=0)
             continue
 
         # otherwise use indptr/data dict
         indptr = [0]
         data = []
-        for dataset in datasets:
+        for dataset in list_data:
 
             if component in dataset:
                 # If the current dataset contains the component, increase the indptr for this batch and append the data

From e4230d5f74c149b27706cc088d25cc4b57ea9bb5 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 13:07:52 +0200
Subject: [PATCH 10/12] Black formatting

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py |  2 +-
 tests/unit/test_manual_testing.py      | 28 +++++++++++++-------------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index d51790843..705540c64 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -299,7 +299,7 @@ def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: in
             {"id": 1, "u_rated": 10500.0, "extra": {"original_id": 456}},
         ],
         "line": [
-            {"id": 0, "node_from": 0, "node_to": 1, ...}
+            {"id": 2, "node_from": 0, "node_to": 1, ...}
         ]
     }
 
diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py
index 3ad531718..3814c7deb 100644
--- a/tests/unit/test_manual_testing.py
+++ b/tests/unit/test_manual_testing.py
@@ -77,9 +77,9 @@ def test_is_nan():
     assert is_nan(single_value)
     array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8"))
     assert not is_nan(array_f8)
-    array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4"))
+    array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4"))
     assert not is_nan(array_i4)
-    array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1"))
+    array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1"))
     assert not is_nan(array_i1)
     nan_array = np.array([np.nan, np.nan, np.nan])
     assert is_nan(nan_array)
@@ -145,15 +145,15 @@ def test_compact_json_dump():
     string_stream = io.StringIO()
     _compact_json_dump(data, string_stream, indent=2, max_level=0)
     assert (
-            string_stream.getvalue()
-            == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
+        string_stream.getvalue()
+        == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}"""
     )
 
     string_stream = io.StringIO()
     _compact_json_dump(data, string_stream, indent=2, max_level=1)
     assert (
-            string_stream.getvalue()
-            == """{
+        string_stream.getvalue()
+        == """{
   "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
   "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]
 }"""
@@ -162,8 +162,8 @@ def test_compact_json_dump():
     string_stream = io.StringIO()
     _compact_json_dump(data, string_stream, indent=2, max_level=2)
     assert (
-            string_stream.getvalue()
-            == """{
+        string_stream.getvalue()
+        == """{
   "node":
     [{"id": 1, "x": 2}, {"id": 3, "x": 4}],
   "line":
@@ -174,8 +174,8 @@ def test_compact_json_dump():
     string_stream = io.StringIO()
     _compact_json_dump(data, string_stream, indent=2, max_level=3)
     assert (
-            string_stream.getvalue()
-            == """{
+        string_stream.getvalue()
+        == """{
   "node":
     [
       {"id": 1, "x": 2},
@@ -207,8 +207,8 @@ def test_compact_json_dump_deep():
     string_stream = io.StringIO()
     _compact_json_dump(data, string_stream, indent=2, max_level=10)
     assert (
-            string_stream.getvalue()
-            == """{
+        string_stream.getvalue()
+        == """{
   "foo": 1,
   "bar":
     {
@@ -232,8 +232,8 @@ def test_compact_json_dump_batch():
     string_stream = io.StringIO()
     _compact_json_dump(data, string_stream, indent=2, max_level=4)
     assert (
-            string_stream.getvalue()
-            == """[
+        string_stream.getvalue()
+        == """[
   {
     "node":
       [

From 47a953f8d2936c57bef835e467502220da681409 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 15:56:04 +0200
Subject: [PATCH 11/12] Create separate function for inject_extra_info and
 handle batch data

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py | 43 ++++++++++++---
 tests/unit/test_manual_testing.py      | 73 +++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 10 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 705540c64..1c489de3d 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -8,7 +8,7 @@
 
 import json
 from pathlib import Path
-from typing import Any, Dict, List, IO, Optional, Union
+from typing import IO, Any, Dict, List, Optional, Union
 
 import numpy as np
 
@@ -256,7 +256,7 @@ def export_json_data(
     data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]],
     indent: Optional[int] = 2,
     compact: bool = False,
-    extra_info: Optional[Dict[int, Any]] = None,
+    extra_info: Optional[Union[Dict[int, Any], List[Dict[int, Any]]]] = None,
 ):
     """
     export json data
@@ -272,13 +272,8 @@ def export_json_data(
         Save to file
     """
     json_data = convert_numpy_to_python(data)
-
-    # Inject extra info
     if extra_info is not None:
-        for component, objects in json_data.items():
-            for obj in objects:
-                if obj["id"] in extra_info:
-                    obj["extra"] = extra_info[obj["id"]]
+        _inject_extra_info(data=json_data, extra_info=extra_info)
 
     with open(json_file, mode="w", encoding="utf-8") as file_pointer:
         if compact and indent:
@@ -289,6 +284,38 @@ def export_json_data(
             json.dump(json_data, file_pointer, indent=indent)
 
 
+def _inject_extra_info(
+    data: Union[Dict[str, List[Dict[str, Union[float, int]]]], List[Dict[str, List[Dict[str, Union[float, int]]]]]],
+    extra_info: Union[Dict[int, Any], List[Dict[int, Any]]],
+):
+    """
+    Injects extra info to the objects by ID
+
+    Args:
+        data: Power Grid Model Python data, as written to pgm json files.
+        extra_info: A dictionary indexed by object id. The value may be anything.
+
+    """
+    if isinstance(data, list):
+        if isinstance(extra_info, list):
+            # If both data and extra_info are lists, expect one extra info set per batch
+            for batch, info in zip(data, extra_info):
+                _inject_extra_info(batch, info)
+        else:
+            # If only data is a list, copy extra_info for each batch
+            for batch in data:
+                _inject_extra_info(batch, extra_info)
+    elif isinstance(data, dict):
+        if not isinstance(extra_info, dict):
+            raise TypeError("Invalid extra info data type")
+        for component, objects in data.items():
+            for obj in objects:
+                if obj["id"] in extra_info:
+                    obj["extra"] = extra_info[obj["id"]]
+    else:
+        raise TypeError("Invalid data type")
+
+
 def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0):
     """Custom compact JSON writer that is intended to put data belonging to a single object on a single line.
 
diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py
index 3814c7deb..488cdcaad 100644
--- a/tests/unit/test_manual_testing.py
+++ b/tests/unit/test_manual_testing.py
@@ -4,17 +4,19 @@
 
 import io
 from pathlib import Path
-from unittest.mock import patch, mock_open, MagicMock
+from unittest.mock import MagicMock, mock_open, patch
 
 import numpy as np
 import pytest
+
 from power_grid_model.manual_testing import (
+    _compact_json_dump,
+    _inject_extra_info,
     convert_batch_to_list_data,
     convert_numpy_to_python,
     convert_python_to_numpy,
     export_json_data,
     is_nan,
-    _compact_json_dump,
 )
 
 
@@ -136,6 +138,73 @@ def test_export_json_data(convert_mock: MagicMock, open_mock: MagicMock, json_du
     json_dump_mock.assert_called_once_with({"foo": [{"val": 123}]}, open_mock(), indent=2)
 
 
+@patch("json.dump")
+@patch("builtins.open", new_callable=mock_open)
+@patch("power_grid_model.manual_testing.convert_numpy_to_python")
+@patch("power_grid_model.manual_testing._inject_extra_info")
+def test_export_json_data_extra_info(
+    extra_info_mock: MagicMock, convert_mock: MagicMock, _open_mock: MagicMock, _json_dump_mock: MagicMock
+):
+    convert_mock.return_value = {"foo": [{"id": 123}]}
+    export_json_data(json_file=Path(), data={}, extra_info={123: "Extra information"})
+    extra_info_mock.assert_called_once_with(data={"foo": [{"id": 123}]}, extra_info={123: "Extra information"})
+
+
+def test_inject_extra_info_single():
+    data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]}
+    extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}
+    _inject_extra_info(data=data, extra_info=extra_info)
+    assert data == {
+        "node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456, "extra": {"sheet": "Nodes", "Number": "00123"}}],
+        "line": [{"id": 2, "baz": 789, "extra": 42}],
+    }
+
+
+def test_inject_extra_info_batch():
+    data = [
+        {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]},
+        {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]},
+    ]
+    extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}]
+    _inject_extra_info(data=data, extra_info=extra_info)
+    assert data == [
+        {
+            "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}],
+            "line": [{"id": 2, "baz": 333, "extra": 42}],
+        },
+        {
+            "node": [{"id": 0, "foo": 444, "extra": None}, {"id": 1, "bar": 555}],
+            "line": [{"id": 2, "baz": 666, "extra": 43}],
+        },
+    ]
+
+
+def test_inject_extra_info_batch_copy_info():
+    data = [
+        {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]},
+        {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]},
+    ]
+    extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}
+    _inject_extra_info(data=data, extra_info=extra_info)
+    assert data == [
+        {
+            "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}],
+            "line": [{"id": 2, "baz": 333, "extra": 42}],
+        },
+        {
+            "node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555, "extra": {"sheet": "Nodes", "Number": "00123"}}],
+            "line": [{"id": 2, "baz": 666, "extra": 42}],
+        },
+    ]
+
+
+def test_inject_extra_info_single_dataset_with_batch_info():
+    data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]}
+    extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}]
+    with pytest.raises(TypeError):
+        _inject_extra_info(data=data, extra_info=extra_info)
+
+
 def test_compact_json_dump():
     data = {
         "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}],

From d9dde069389034f2c88b5a819a6818208e85bf96 Mon Sep 17 00:00:00 2001
From: Bram Stoeller <bram.stoeller@alliander.com>
Date: Tue, 26 Jul 2022 16:36:46 +0200
Subject: [PATCH 12/12] Skip 'extra info' in C++ validation tests

Signed-off-by: Bram Stoeller <bram.stoeller@alliander.com>
---
 src/power_grid_model/manual_testing.py      | 5 +++--
 tests/cpp_unit_tests/test_validation.cpp    | 4 ++++
 tests/data/power_flow/dummy-test/input.json | 9 ++++++---
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py
index 1c489de3d..8480b7356 100644
--- a/src/power_grid_model/manual_testing.py
+++ b/src/power_grid_model/manual_testing.py
@@ -190,8 +190,9 @@ def convert_batch_to_list_data(
             for i, batch in enumerate(data):
                 list_data[i][component] = batch
         else:
-            # For indptr/data structures,
-            for i, (idx0, idx1) in enumerate(zip(data["indptr"][:-1], data["indptr"][1:])):
+            # For indptr/data structures, use the indptr to select the items for each batch.
+            indptr = data["indptr"]
+            for i, (idx0, idx1) in enumerate(zip(indptr[:-1], indptr[1:])):
                 list_data[i][component] = data["data"][idx0:idx1]
     return list_data
 
diff --git a/tests/cpp_unit_tests/test_validation.cpp b/tests/cpp_unit_tests/test_validation.cpp
index f8fb9effb..4b86d3caa 100644
--- a/tests/cpp_unit_tests/test_validation.cpp
+++ b/tests/cpp_unit_tests/test_validation.cpp
@@ -50,6 +50,10 @@ struct Buffer {
 void parse_single_object(void* ptr, json const& j, MetaData const& meta, Idx position) {
     meta.set_nan(ptr, position);
     for (auto const& it : j.items()) {
+        // skip extra info
+        if (it.key() == "extra") {
+            continue;
+        }
         DataAttribute const& attr = meta.find_attr(it.key());
         if (attr.numpy_type == "i1") {
             int8_t const value = it.value().get<int8_t>();
diff --git a/tests/data/power_flow/dummy-test/input.json b/tests/data/power_flow/dummy-test/input.json
index 295168267..28bbfb4eb 100644
--- a/tests/data/power_flow/dummy-test/input.json
+++ b/tests/data/power_flow/dummy-test/input.json
@@ -2,15 +2,18 @@
   "node": [
     {
       "id": 1,
-      "u_rated": 10e3
+      "u_rated": 10e3,
+      "extra": "First Node"
     },
     {
       "id": 2,
-      "u_rated": 10e3
+      "u_rated": 10e3,
+      "extra": "Second Node"
     },
     {
       "id": 3,
-      "u_rated": 10e3
+      "u_rated": 10e3,
+      "extra": "Third Node"
     }
   ],
   "line": [