From 6c4b21c4d203aba1ca0c110514d2d54c9b342057 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Thu, 21 Jul 2022 17:39:20 +0200 Subject: [PATCH 01/12] Cherry pick compact_json_dump and extra_info Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 75 ++++++++++++++--- tests/unit/test_0Z_model_validation.py | 21 ++--- tests/unit/test_manual_testing.py | 109 ++++++++++++++++++++++++- tests/unit/utils.py | 5 +- 4 files changed, 185 insertions(+), 25 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 56aec1f49..c9197d5de 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -8,7 +8,7 @@ import json from pathlib import Path -from typing import Dict, List, Union +from typing import Any, Dict, List, IO, Optional, Union import numpy as np @@ -34,7 +34,7 @@ def is_nan(data) -> bool: def convert_list_to_batch_data( - list_data: List[Dict[str, np.ndarray]] + list_data: List[Dict[str, np.ndarray]] ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert list of dataset to one single batch dataset @@ -71,7 +71,7 @@ def convert_list_to_batch_data( def convert_python_to_numpy( - data: Union[Dict, List], data_type: str + data: Union[Dict, List], data_type: str ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert native python data to internal numpy @@ -89,6 +89,8 @@ def convert_python_to_numpy( arr: np.ndarray = initialize_array(data_type, component_name, len(component_list)) for i, component in enumerate(component_list): for property_name, value in component.items(): + if property_name == "extra": + continue if property_name not in arr[i].dtype.names: raise ValueError(f"Invalid property '{property_name}' for {component_name} {data_type} data.") try: @@ -107,7 +109,7 @@ def convert_python_to_numpy( def convert_batch_to_list_data( - batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> List[Dict[str, np.ndarray]]: """ Convert list of dataset to one single batch dataset @@ -132,14 +134,16 @@ def convert_batch_to_list_data( single_dataset = {} for key, batch in batch_data.items(): if isinstance(batch, dict): - single_dataset[key] = batch["data"][batch["indptr"][i] : batch["indptr"][i + 1]] + single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]] else: single_dataset[key] = batch[i, ...] list_data.append(single_dataset) return list_data -def convert_numpy_to_python(data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]) -> Union[Dict, List]: +def convert_numpy_to_python( + data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] +) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]: """ Convert internal numpy arrays to native python data If an attribute is not available (NaN value), it will not be exported. @@ -181,18 +185,67 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar return convert_python_to_numpy(json_data, data_type) -def export_json_data(json_file: Path, data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], indent=2): +def export_json_data( + json_file: Path, + data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], + indent: Optional[int] = 2, + compact: bool = False, + extra_info: Optional[Dict[int, Any]] = None, +): """ export json data Args: json_file: path to json file - data: A single or batch dataset for power-grid-model - indent: - indent of the file, default 2 + data: a single or batch dataset for power-grid-model + indent: indent of the file, default 2 + compact: write components on a single line + extra_info: extra information (in any json-serializable format), indexed on the object ids + e.g. a string representing the original id, or a dictionary storing even more information. Returns: Save to file """ json_data = convert_numpy_to_python(data) + + # Inject extra info + if extra_info is not None: + for component, objects in json_data.items(): + for obj in objects: + if obj["id"] in extra_info: + obj["extra"] = extra_info[obj["id"]] + with open(json_file, mode="w", encoding="utf-8") as file_pointer: - json.dump(json_data, file_pointer, indent=indent) + if compact and indent: + max_level = 4 if isinstance(json_data, list) else 3 + compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level) + else: + json.dump(json_data, file_pointer, indent=indent) + + +def compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0): + tab = " " * level * indent + if level >= max_level: + io_stream.write(tab) + json.dump(data, io_stream, indent=None) + elif isinstance(data, list): + io_stream.write(tab + "[\n") + n_obj = len(data) + for i, obj in enumerate(data, start=1): + compact_json_dump(obj, io_stream, indent, max_level, level + 1) + io_stream.write(",\n" if i < n_obj else "\n") + io_stream.write(tab + "]") + elif isinstance(data, dict): + io_stream.write(tab + "{\n") + n_obj = len(data) + for i, (key, obj) in enumerate(data.items(), start=1): + if level == max_level - 1 or not isinstance(obj, (list, dict)): + io_stream.write(tab + " " * indent + f'"{key}": ') + json.dump(obj, io_stream, indent=None) + else: + io_stream.write(tab + " " * indent + f'"{key}":\n') + compact_json_dump(obj, io_stream, indent, max_level, level + 2) + io_stream.write(",\n" if i < n_obj else "\n") + io_stream.write(tab + "}") + else: + io_stream.write(tab) + json.dump(data, io_stream, indent=None) diff --git a/tests/unit/test_0Z_model_validation.py b/tests/unit/test_0Z_model_validation.py index e6676dde8..f6cf5d549 100644 --- a/tests/unit/test_0Z_model_validation.py +++ b/tests/unit/test_0Z_model_validation.py @@ -23,7 +23,8 @@ pytest_cases(get_batch_cases=False), ) def test_single_validation( - case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float, atol: float + case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float, + atol: float ): # Initialization case_data = import_case_data(case_path, sym=sym) @@ -56,15 +57,15 @@ def test_single_validation( pytest_cases(get_batch_cases=True), ) def test_batch_validation( - case_id: str, - case_path: Path, - sym: bool, - calculation_type: str, - calculation_method: str, - rtol: float, - atol: float, - independent: bool, - cache_topology: bool, + case_id: str, + case_path: Path, + sym: bool, + calculation_type: str, + calculation_method: str, + rtol: float, + atol: float, + independent: bool, + cache_topology: bool, ): # Initialization case_data = import_case_data(case_path, sym=sym) diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py index e9de242c0..82a24bd71 100644 --- a/tests/unit/test_manual_testing.py +++ b/tests/unit/test_manual_testing.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: MPL-2.0 +import io from pathlib import Path from unittest.mock import patch, mock_open, MagicMock @@ -13,6 +14,7 @@ convert_python_to_numpy, export_json_data, is_nan, + compact_json_dump, ) @@ -75,9 +77,9 @@ def test_is_nan(): assert is_nan(single_value) array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8")) assert not is_nan(array_f8) - array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4")) + array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4")) assert not is_nan(array_i4) - array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1")) + array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1")) assert not is_nan(array_i1) nan_array = np.array([np.nan, np.nan, np.nan]) assert is_nan(nan_array) @@ -132,3 +134,106 @@ def test_export_json_data(convert_mock: MagicMock, open_mock: MagicMock, json_du export_json_data(json_file=Path("output.json"), data={}, indent=2) convert_mock.assert_called_once() json_dump_mock.assert_called_once_with({"foo": [{"val": 123}]}, open_mock(), indent=2) + + +def test_compact_json_dump(): + data = { + "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}], + } + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=0) + assert string_stream.getvalue() == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=1) + assert string_stream.getvalue() == """{ + "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] +}""" + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=2) + assert string_stream.getvalue() == """{ + "node": + [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": + [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] +}""" + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=3) + assert string_stream.getvalue() == """{ + "node": + [ + {"id": 1, "x": 2}, + {"id": 3, "x": 4} + ], + "line": + [ + {"id": 5, "x": 6}, + {"id": 7, "x": {"y": 8.1, "z": 8.2}} + ] +}""" + + +def test_compact_json_dump_string(): + data = "test" + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=2) + assert string_stream.getvalue() == "\"test\"" + + +def test_compact_json_dump_deep(): + data = { + "foo": 1, + "bar": {"x": 2, "y": 3}, + } + + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=10) + assert string_stream.getvalue() == """{ + "foo": 1, + "bar": + { + "x": 2, + "y": 3 + } +}""" + + +def test_compact_json_dump_batch(): + data = [ + { + "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], + "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}], + }, + { + "line": [{"id": 9, "x": 10}, {"id": 11, "x": 12}], + }, + ] + string_stream = io.StringIO() + compact_json_dump(data, string_stream, indent=2, max_level=4) + assert string_stream.getvalue() == """[ + { + "node": + [ + {"id": 1, "x": 2}, + {"id": 3, "x": 4} + ], + "line": + [ + {"id": 5, "x": 6}, + {"id": 7, "x": {"y": 8.1, "z": 8.2}} + ] + }, + { + "line": + [ + {"id": 9, "x": 10}, + {"id": 11, "x": 12} + ] + } +]""" diff --git a/tests/unit/utils.py b/tests/unit/utils.py index b0d7aa2d4..a905d5186 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -16,7 +16,7 @@ DATA_PATH = BASE_PATH / "data" OUPUT_PATH = BASE_PATH / "output" EXPORT_OUTPUT = ("POWER_GRID_MODEL_VALIDATION_TEST_EXPORT" in os.environ) and ( - os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON" + os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON" ) @@ -108,7 +108,8 @@ def save_json_data(json_file: str, data: Union[dict, list]): def compare_result( - actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, atol: Union[float, Dict[str, float]] + actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, + atol: Union[float, Dict[str, float]] ): for key, expected_data in expected.items(): for col_name in expected_data.dtype.names: From 48efacab528485f143a83872afeb7dd5df122c76 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Thu, 21 Jul 2022 17:43:20 +0200 Subject: [PATCH 02/12] Black formatting Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 20 +++++++------- tests/unit/test_0Z_model_validation.py | 21 +++++++-------- tests/unit/test_manual_testing.py | 36 +++++++++++++++++++------- tests/unit/utils.py | 5 ++-- 4 files changed, 49 insertions(+), 33 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index c9197d5de..7375e0d0e 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -34,7 +34,7 @@ def is_nan(data) -> bool: def convert_list_to_batch_data( - list_data: List[Dict[str, np.ndarray]] + list_data: List[Dict[str, np.ndarray]] ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert list of dataset to one single batch dataset @@ -71,7 +71,7 @@ def convert_list_to_batch_data( def convert_python_to_numpy( - data: Union[Dict, List], data_type: str + data: Union[Dict, List], data_type: str ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert native python data to internal numpy @@ -109,7 +109,7 @@ def convert_python_to_numpy( def convert_batch_to_list_data( - batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> List[Dict[str, np.ndarray]]: """ Convert list of dataset to one single batch dataset @@ -134,7 +134,7 @@ def convert_batch_to_list_data( single_dataset = {} for key, batch in batch_data.items(): if isinstance(batch, dict): - single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]] + single_dataset[key] = batch["data"][batch["indptr"][i] : batch["indptr"][i + 1]] else: single_dataset[key] = batch[i, ...] list_data.append(single_dataset) @@ -142,7 +142,7 @@ def convert_batch_to_list_data( def convert_numpy_to_python( - data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]: """ Convert internal numpy arrays to native python data @@ -186,11 +186,11 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar def export_json_data( - json_file: Path, - data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], - indent: Optional[int] = 2, - compact: bool = False, - extra_info: Optional[Dict[int, Any]] = None, + json_file: Path, + data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], + indent: Optional[int] = 2, + compact: bool = False, + extra_info: Optional[Dict[int, Any]] = None, ): """ export json data diff --git a/tests/unit/test_0Z_model_validation.py b/tests/unit/test_0Z_model_validation.py index f6cf5d549..e6676dde8 100644 --- a/tests/unit/test_0Z_model_validation.py +++ b/tests/unit/test_0Z_model_validation.py @@ -23,8 +23,7 @@ pytest_cases(get_batch_cases=False), ) def test_single_validation( - case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float, - atol: float + case_id: str, case_path: Path, sym: bool, calculation_type: str, calculation_method: str, rtol: float, atol: float ): # Initialization case_data = import_case_data(case_path, sym=sym) @@ -57,15 +56,15 @@ def test_single_validation( pytest_cases(get_batch_cases=True), ) def test_batch_validation( - case_id: str, - case_path: Path, - sym: bool, - calculation_type: str, - calculation_method: str, - rtol: float, - atol: float, - independent: bool, - cache_topology: bool, + case_id: str, + case_path: Path, + sym: bool, + calculation_type: str, + calculation_method: str, + rtol: float, + atol: float, + independent: bool, + cache_topology: bool, ): # Initialization case_data = import_case_data(case_path, sym=sym) diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py index 82a24bd71..8e4235dfb 100644 --- a/tests/unit/test_manual_testing.py +++ b/tests/unit/test_manual_testing.py @@ -77,9 +77,9 @@ def test_is_nan(): assert is_nan(single_value) array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8")) assert not is_nan(array_f8) - array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4")) + array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4")) assert not is_nan(array_i4) - array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1")) + array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1")) assert not is_nan(array_i1) nan_array = np.array([np.nan, np.nan, np.nan]) assert is_nan(nan_array) @@ -144,27 +144,38 @@ def test_compact_json_dump(): string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=0) - assert string_stream.getvalue() == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" + assert ( + string_stream.getvalue() + == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" + ) string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=1) - assert string_stream.getvalue() == """{ + assert ( + string_stream.getvalue() + == """{ "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] }""" + ) string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=2) - assert string_stream.getvalue() == """{ + assert ( + string_stream.getvalue() + == """{ "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] }""" + ) string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=3) - assert string_stream.getvalue() == """{ + assert ( + string_stream.getvalue() + == """{ "node": [ {"id": 1, "x": 2}, @@ -176,6 +187,7 @@ def test_compact_json_dump(): {"id": 7, "x": {"y": 8.1, "z": 8.2}} ] }""" + ) def test_compact_json_dump_string(): @@ -183,7 +195,7 @@ def test_compact_json_dump_string(): string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=2) - assert string_stream.getvalue() == "\"test\"" + assert string_stream.getvalue() == '"test"' def test_compact_json_dump_deep(): @@ -194,7 +206,9 @@ def test_compact_json_dump_deep(): string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=10) - assert string_stream.getvalue() == """{ + assert ( + string_stream.getvalue() + == """{ "foo": 1, "bar": { @@ -202,6 +216,7 @@ def test_compact_json_dump_deep(): "y": 3 } }""" + ) def test_compact_json_dump_batch(): @@ -216,7 +231,9 @@ def test_compact_json_dump_batch(): ] string_stream = io.StringIO() compact_json_dump(data, string_stream, indent=2, max_level=4) - assert string_stream.getvalue() == """[ + assert ( + string_stream.getvalue() + == """[ { "node": [ @@ -237,3 +254,4 @@ def test_compact_json_dump_batch(): ] } ]""" + ) diff --git a/tests/unit/utils.py b/tests/unit/utils.py index a905d5186..b0d7aa2d4 100644 --- a/tests/unit/utils.py +++ b/tests/unit/utils.py @@ -16,7 +16,7 @@ DATA_PATH = BASE_PATH / "data" OUPUT_PATH = BASE_PATH / "output" EXPORT_OUTPUT = ("POWER_GRID_MODEL_VALIDATION_TEST_EXPORT" in os.environ) and ( - os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON" + os.environ["POWER_GRID_MODEL_VALIDATION_TEST_EXPORT"] == "ON" ) @@ -108,8 +108,7 @@ def save_json_data(json_file: str, data: Union[dict, list]): def compare_result( - actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, - atol: Union[float, Dict[str, float]] + actual: Dict[str, np.ndarray], expected: Dict[str, np.ndarray], rtol: float, atol: Union[float, Dict[str, float]] ): for key, expected_data in expected.items(): for col_name in expected_data.dtype.names: From 5409cd50410cad714886e2ebacd2d5528865118e Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 11:11:09 +0200 Subject: [PATCH 03/12] Comments and restructure convert_list_to_batch_data Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 61 ++++++++++++++++---------- 1 file changed, 37 insertions(+), 24 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 7375e0d0e..0854bcd66 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -34,12 +34,12 @@ def is_nan(data) -> bool: def convert_list_to_batch_data( - list_data: List[Dict[str, np.ndarray]] + datasets: List[Dict[str, np.ndarray]] ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ - Convert list of dataset to one single batch dataset + Convert a list of datasets to one single batch dataset Args: - list_data: list of dataset + datasets: list of dataset Returns: batch dataset @@ -48,30 +48,43 @@ def convert_list_to_batch_data( """ # List all *unique* types - all_types = list({x for single_batch in list_data for x in single_batch.keys()}) + components = {x for dataset in datasets for x in dataset.keys()} batch_data = {} - for comp_type in all_types: - # use 2D array if the type exists in all single dataset and the size is the same - if np.all([comp_type in x for x in list_data]) and np.unique([x[comp_type].size for x in list_data]).size == 1: - batch_data[comp_type] = np.stack([x[comp_type] for x in list_data], axis=0) + for component in components: + + # Create a 2D array if the component exists in all datasets and number of objects is the same in each dataset + comp_exists_in_all_datasets = all(component in x for x in datasets) + all_sizes_are_the_same = lambda: all(x[component].size == datasets[0][component].size for x in datasets) + if comp_exists_in_all_datasets and all_sizes_are_the_same(): + batch_data[component] = np.stack([x[component] for x in datasets], axis=0) continue + # otherwise use indptr/data dict indptr = [0] data = [] - for single_batch in list_data: - if comp_type not in single_batch: - indptr.append(indptr[-1]) + for dataset in datasets: + + # If the current dataset contains the component, increase the indptr for this batch and append the data + if component in dataset: + objects = dataset[component] + indptr.append(indptr[-1] + len(objects)) + data.append(objects) + + # If the current dataset does not contain the component, add the last indptr again. else: - single_data = single_batch[comp_type] - indptr.append(indptr[-1] + single_data.shape[0]) - data.append(single_data) - batch_data[comp_type] = {"indptr": np.array(indptr, dtype=np.int32), "data": np.concatenate(data, axis=0)} + indptr.append(indptr[-1]) + + # Convert the index pointers to a numpy array and combine the list of object numpy arrays into a singe + # numpy array. All objects of all batches are now stores in one large array, the index pointers define + # which elemets of the array (rows) belong to which batch. + batch_data[component] = {"indptr": np.array(indptr, dtype=np.int32), "data": np.concatenate(data, axis=0)} + return batch_data def convert_python_to_numpy( - data: Union[Dict, List], data_type: str + data: Union[Dict, List], data_type: str ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert native python data to internal numpy @@ -109,7 +122,7 @@ def convert_python_to_numpy( def convert_batch_to_list_data( - batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> List[Dict[str, np.ndarray]]: """ Convert list of dataset to one single batch dataset @@ -134,7 +147,7 @@ def convert_batch_to_list_data( single_dataset = {} for key, batch in batch_data.items(): if isinstance(batch, dict): - single_dataset[key] = batch["data"][batch["indptr"][i] : batch["indptr"][i + 1]] + single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]] else: single_dataset[key] = batch[i, ...] list_data.append(single_dataset) @@ -142,7 +155,7 @@ def convert_batch_to_list_data( def convert_numpy_to_python( - data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]: """ Convert internal numpy arrays to native python data @@ -186,11 +199,11 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar def export_json_data( - json_file: Path, - data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], - indent: Optional[int] = 2, - compact: bool = False, - extra_info: Optional[Dict[int, Any]] = None, + json_file: Path, + data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], + indent: Optional[int] = 2, + compact: bool = False, + extra_info: Optional[Dict[int, Any]] = None, ): """ export json data From 65b086064f1e145d6093c7a21bd0c5296303f2d4 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 11:11:27 +0200 Subject: [PATCH 04/12] Comments and restructure convert_batch_to_list_data Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 63 ++++++++++++++++++-------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 0854bcd66..21b2d5442 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -38,6 +38,11 @@ def convert_list_to_batch_data( ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert a list of datasets to one single batch dataset + + Example data formats: + input: [{"node": <1d-array>, "line": <1d-array>}, {"node": <1d-array>, "line": <1d-array>}] + output: {"node": <2d-array>, "line": <2d-array>} + -or-: {"indptr": <1d-array>, "data": <1d-array>} Args: datasets: list of dataset @@ -65,14 +70,14 @@ def convert_list_to_batch_data( data = [] for dataset in datasets: - # If the current dataset contains the component, increase the indptr for this batch and append the data if component in dataset: + # If the current dataset contains the component, increase the indptr for this batch and append the data objects = dataset[component] indptr.append(indptr[-1] + len(objects)) data.append(objects) - # If the current dataset does not contain the component, add the last indptr again. else: + # If the current dataset does not contain the component, add the last indptr again. indptr.append(indptr[-1]) # Convert the index pointers to a numpy array and combine the list of object numpy arrays into a singe @@ -132,25 +137,45 @@ def convert_batch_to_list_data( Returns: list of single dataset """ - list_data = [] - # return empty list + + # If the batch data is empty, return an empty list if not batch_data: - return list_data - # get n_batch - one_data = next(iter(batch_data.values())) - if isinstance(one_data, dict): - n_batch = one_data["indptr"].size - 1 + return [] + + # Get the data for an arbitrary component; assuming that the number of batches of each component is the same. + # The structure may differ per component + example_batch_data = next(iter(batch_data.values())) + + if isinstance(example_batch_data, np.ndarray): + # We expect the batch data to be a 2d numpy array of n_batches x n_objects + if len(example_batch_data.shape) != 2: + raise ValueError("Invalid batch data format") + n_batches = example_batch_data.shape[0] + elif isinstance(example_batch_data, dict): + # If the batch data is a dictionary, we assume that it is an indptr/data structure (otherwise it is an + # invalid dictionary). There is always one indptr more than there are batches. + if "indptr" not in example_batch_data: + raise ValueError("Invalid batch data format") + n_batches = example_batch_data["indptr"].size - 1 else: - n_batch = one_data.shape[0] - # convert - for i in range(n_batch): - single_dataset = {} - for key, batch in batch_data.items(): - if isinstance(batch, dict): - single_dataset[key] = batch["data"][batch["indptr"][i]: batch["indptr"][i + 1]] - else: - single_dataset[key] = batch[i, ...] - list_data.append(single_dataset) + # If the batch data is not a numpy array and not a dictionary, it is invalid + raise ValueError("Invalid batch data format") + + # Initialize an empty list with dictionaries + # Note that [{}] * n_batches would result in n copies of the same dict. + list_data = [{} for _ in range(n_batches)] + + # While the number of batches must be the same for each component, the structure (2d numpy array or indptr/data) + # doesn't have to be. Therefore, we'll check the structure for each component and copy the data accordingly. + for component, data in batch_data.items(): + if isinstance(data, np.ndarray): + # For 2d numpy arrays, copy each batch into an element of the list + for i, batch in enumerate(data): + list_data[i][component] = batch + else: + # For indptr/data structures, + for i, (idx0, idx1) in enumerate(zip(data["indptr"][:-1], data["indptr"][1:])): + list_data[i][component] = data["data"][idx0:idx1] return list_data From 5c501f375e3e7a6ad788fdfb6cf42fcfd956bd7b Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 11:32:08 +0200 Subject: [PATCH 05/12] Comments and restructure convert_python_to_numpy Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 53 +++++++++++++++++--------- 1 file changed, 35 insertions(+), 18 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 21b2d5442..72dae6b93 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -101,29 +101,46 @@ def convert_python_to_numpy( A single or batch dataset for power-grid-model """ - if isinstance(data, dict): - return_dict = {} - for component_name, component_list in data.items(): - arr: np.ndarray = initialize_array(data_type, component_name, len(component_list)) - for i, component in enumerate(component_list): - for property_name, value in component.items(): - if property_name == "extra": - continue - if property_name not in arr[i].dtype.names: - raise ValueError(f"Invalid property '{property_name}' for {component_name} {data_type} data.") - try: - arr[i][property_name] = value - except ValueError as ex: - raise ValueError(f"Invalid '{property_name}' value for {component_name} {data_type} data: {ex}") - - return_dict[component_name] = arr - return return_dict + # If the inpute data is a list, we are dealing with batch data. Each element in the list is a batch. We'll + # first convert each batch seperately, by recusively calling this function for each batch. Then the numpy + # data for all batches in converted into a proper and compact numpy structure. if isinstance(data, list): list_data = [convert_python_to_numpy(json_dict, data_type=data_type) for json_dict in data] return convert_list_to_batch_data(list_data) - raise TypeError("Only list or dict is allowed in JSON data!") + # This should be a normal (non-batch) structure, with a list of objects (dictionaries) per component. + if not isinstance(data, dict): + raise TypeError("Only list or dict is allowed in JSON data!") + + dataset: Dict[str, np.ndarray] = {} + for component, objects in data.items(): + + # We'll initialize an 1d-array with NaN values for all the objects of this component type + dataset[component] = initialize_array(data_type, component, len(objects)) + + for i, obj in enumerate(objects): + # As each object is a separate dictionary, and the properties may differ per object, we need to check + # all properties. Non-existing properties + for property, value in obj.items(): + if property == "extra": + # The "extra" property is a special one. It can store any type of information associated with + # an object, but it will not be used in the calculations. Therefore it is not included in the + # numpy array, so we can skip this property + continue + + if property not in dataset[component].dtype.names: + # If a property doen't exist, the user made a mistake. Let's be merciless in that case, + # for their own good. + raise ValueError(f"Invalid property '{property}' for {component} {data_type} data.") + + # Now just assign the value and raise an error if the value cannot be stored in the specific + # numpy array data format for this property. + try: + dataset[component][i][property] = value + except ValueError as ex: + raise ValueError(f"Invalid '{property}' value for {component} {data_type} data: {ex}") + return dataset def convert_batch_to_list_data( From 89c458d66eb78e322fb91337bad2d043336bb55c Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 11:45:06 +0200 Subject: [PATCH 06/12] Comments and restructure convert_numpy_to_python Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 37 ++++++++++++++++++-------- 1 file changed, 26 insertions(+), 11 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 72dae6b93..210840fcb 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -209,18 +209,33 @@ def convert_numpy_to_python( A json list for batch dataset """ - # check the dataset is single or batch - if data: - one_data = next(iter(data.values())) - # it is batch dataset if it is 2D array of a dict of indptr/data - if isinstance(one_data, dict) or one_data.ndim == 2: - list_data = convert_batch_to_list_data(data) - return [convert_numpy_to_python(x) for x in list_data] - # otherwise it is single dataset - single_dataset: Dict[str, np.ndarray] = data + # Check if the dataset is a single dataset or batch dataset + # It is batch dataset if it is 2D array or a indptr/data structure + example_data = next(iter(data.values())) + is_dense_batch = isinstance(example_data, np.ndarray) and example_data.ndim == 2 + is_sparse_batch = isinstance(example_data, dict) and "indptr" in example_data and "data" in example_data + + # If it is a batch, convert the batch data to a list of batches, then convert each batch individually. + if is_dense_batch or is_sparse_batch: + list_data = convert_batch_to_list_data(data) + return [convert_numpy_to_python(x) for x in list_data] + + # Otherwise it should be a single data set + if not isinstance(example_data, np.ndarray) or example_data.ndim != 1: + raise ValueError("Invalid data format") + + # Convert each numpy array to a list of objects, which contains only the non-NaN properties: + # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]} return { - name: [{k: item[k].tolist() for k in array.dtype.names if not is_nan(item[k])} for item in array] - for name, array in single_dataset.items() + component: [ + { + property: obj[property].tolist() + for property in objects.dtype.names + if not is_nan(obj[property]) + } + for obj in objects + ] + for component, objects in data.items() } From 049105ec3aa7a50438feb10ea7998ed7f21b6139 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 11:53:31 +0200 Subject: [PATCH 07/12] Black formatting Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 210840fcb..96786c649 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -34,7 +34,7 @@ def is_nan(data) -> bool: def convert_list_to_batch_data( - datasets: List[Dict[str, np.ndarray]] + datasets: List[Dict[str, np.ndarray]] ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert a list of datasets to one single batch dataset @@ -89,7 +89,7 @@ def convert_list_to_batch_data( def convert_python_to_numpy( - data: Union[Dict, List], data_type: str + data: Union[Dict, List], data_type: str ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert native python data to internal numpy @@ -144,7 +144,7 @@ def convert_python_to_numpy( def convert_batch_to_list_data( - batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + batch_data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> List[Dict[str, np.ndarray]]: """ Convert list of dataset to one single batch dataset @@ -197,7 +197,7 @@ def convert_batch_to_list_data( def convert_numpy_to_python( - data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] + data: Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]] ) -> Union[Dict[str, List[Dict[str, Union[int, float]]]], List[Dict[str, List[Dict[str, Union[int, float]]]]]]: """ Convert internal numpy arrays to native python data @@ -228,11 +228,7 @@ def convert_numpy_to_python( # For example: {"node": [{"id": 0, ...}, {"id": 1, ...}], "line": [{"id": 2, ...}]} return { component: [ - { - property: obj[property].tolist() - for property in objects.dtype.names - if not is_nan(obj[property]) - } + {property: obj[property].tolist() for property in objects.dtype.names if not is_nan(obj[property])} for obj in objects ] for component, objects in data.items() @@ -256,11 +252,11 @@ def import_json_data(json_file: Path, data_type: str) -> Union[Dict[str, np.ndar def export_json_data( - json_file: Path, - data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], - indent: Optional[int] = 2, - compact: bool = False, - extra_info: Optional[Dict[int, Any]] = None, + json_file: Path, + data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], + indent: Optional[int] = 2, + compact: bool = False, + extra_info: Optional[Dict[int, Any]] = None, ): """ export json data From e72e5fd3d51f72184fa62341a191d45aaf0ff7d5 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 12:23:47 +0200 Subject: [PATCH 08/12] Comments and restructure _compact_json_dump Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 80 +++++++++++++++++++------- tests/unit/test_manual_testing.py | 44 +++++++------- 2 files changed, 80 insertions(+), 44 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 96786c649..008c693d3 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -282,36 +282,72 @@ def export_json_data( with open(json_file, mode="w", encoding="utf-8") as file_pointer: if compact and indent: - max_level = 4 if isinstance(json_data, list) else 3 - compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level) + is_batch_data = isinstance(json_data, list) + max_level = 4 if is_batch_data else 3 + _compact_json_dump(json_data, file_pointer, indent=indent, max_level=max_level) else: json.dump(json_data, file_pointer, indent=indent) -def compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0): +def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0): + """Custom compact JSON writer that is intended to put data belonging to a single object on a single line. + + For example: + { + "node": [ + {"id": 0, "u_rated": 10500.0, "extra": {"original_id": 123}}, + {"id": 1, "u_rated": 10500.0, "extra": {"original_id": 456}}, + ], + "line": [ + {"id": 0, "node_from": 0, "node_to": 1, ...} + ] + } + + The function is being called recursively, starting at level 0 and recursing until max_level is reached. It is + basically a full json writer, but for efficiency reasons, on the last levels the native json.dump method is used. + """ + + # Let's define a 'tab' indent, depending on the level tab = " " * level * indent - if level >= max_level: + + # If we are at the max_level, or the data simply doesn't contain any more levels, write the indent and serialize + # the data on a single line. + if level >= max_level or not isinstance(data, (list, dict)): io_stream.write(tab) json.dump(data, io_stream, indent=None) - elif isinstance(data, list): + return + + # We'll need the number of objects later on + n_obj = len(data) + + # If the data is a list: + # 1. start with an opening bracket + # 2. dump each element in the list + # 3. add a comma and a new line after each element, except for the last element, there we don't need a comma. + # 4. finish with a closing bracket + if isinstance(data, list): io_stream.write(tab + "[\n") - n_obj = len(data) for i, obj in enumerate(data, start=1): - compact_json_dump(obj, io_stream, indent, max_level, level + 1) + _compact_json_dump(obj, io_stream, indent, max_level, level + 1) io_stream.write(",\n" if i < n_obj else "\n") io_stream.write(tab + "]") - elif isinstance(data, dict): - io_stream.write(tab + "{\n") - n_obj = len(data) - for i, (key, obj) in enumerate(data.items(), start=1): - if level == max_level - 1 or not isinstance(obj, (list, dict)): - io_stream.write(tab + " " * indent + f'"{key}": ') - json.dump(obj, io_stream, indent=None) - else: - io_stream.write(tab + " " * indent + f'"{key}":\n') - compact_json_dump(obj, io_stream, indent, max_level, level + 2) - io_stream.write(",\n" if i < n_obj else "\n") - io_stream.write(tab + "}") - else: - io_stream.write(tab) - json.dump(data, io_stream, indent=None) + return + + # If the data is a dictionary: + # 1. start with an opening curly bracket + # 2. for each element: write it's key, plus a colon ':' + # 3. if the next level would be the max_level, add a space and dump the element on a single, + # else add a new line before dumping the element recursively. + # 4. add a comma and a new line after each element, except for the last element, there we don't need a comma. + # 5. finish with a closing curly bracket + io_stream.write(tab + "{\n") + for i, (key, obj) in enumerate(data.items(), start=1): + io_stream.write(tab + " " * indent + f'"{key}":') + if level == max_level - 1 or not isinstance(obj, (list, dict)): + io_stream.write(" ") + json.dump(obj, io_stream, indent=None) + else: + io_stream.write("\n") + _compact_json_dump(obj, io_stream, indent, max_level, level + 2) + io_stream.write(",\n" if i < n_obj else "\n") + io_stream.write(tab + "}") diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py index 8e4235dfb..3ad531718 100644 --- a/tests/unit/test_manual_testing.py +++ b/tests/unit/test_manual_testing.py @@ -14,7 +14,7 @@ convert_python_to_numpy, export_json_data, is_nan, - compact_json_dump, + _compact_json_dump, ) @@ -77,9 +77,9 @@ def test_is_nan(): assert is_nan(single_value) array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8")) assert not is_nan(array_f8) - array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4")) + array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4")) assert not is_nan(array_i4) - array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1")) + array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1")) assert not is_nan(array_i1) nan_array = np.array([np.nan, np.nan, np.nan]) assert is_nan(nan_array) @@ -143,27 +143,27 @@ def test_compact_json_dump(): } string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=0) + _compact_json_dump(data, string_stream, indent=2, max_level=0) assert ( - string_stream.getvalue() - == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" + string_stream.getvalue() + == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" ) string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=1) + _compact_json_dump(data, string_stream, indent=2, max_level=1) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] }""" ) string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=2) + _compact_json_dump(data, string_stream, indent=2, max_level=2) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": @@ -172,10 +172,10 @@ def test_compact_json_dump(): ) string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=3) + _compact_json_dump(data, string_stream, indent=2, max_level=3) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "node": [ {"id": 1, "x": 2}, @@ -194,7 +194,7 @@ def test_compact_json_dump_string(): data = "test" string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=2) + _compact_json_dump(data, string_stream, indent=2, max_level=2) assert string_stream.getvalue() == '"test"' @@ -205,10 +205,10 @@ def test_compact_json_dump_deep(): } string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=10) + _compact_json_dump(data, string_stream, indent=2, max_level=10) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "foo": 1, "bar": { @@ -230,10 +230,10 @@ def test_compact_json_dump_batch(): }, ] string_stream = io.StringIO() - compact_json_dump(data, string_stream, indent=2, max_level=4) + _compact_json_dump(data, string_stream, indent=2, max_level=4) assert ( - string_stream.getvalue() - == """[ + string_stream.getvalue() + == """[ { "node": [ From 651119b7ce4ce1a338900e9d9076781f7fbe4f65 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 12:27:21 +0200 Subject: [PATCH 09/12] Undo accidental rename of function argument Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 008c693d3..d51790843 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -34,7 +34,7 @@ def is_nan(data) -> bool: def convert_list_to_batch_data( - datasets: List[Dict[str, np.ndarray]] + list_data: List[Dict[str, np.ndarray]] ) -> Dict[str, Union[np.ndarray, Dict[str, np.ndarray]]]: """ Convert a list of datasets to one single batch dataset @@ -44,7 +44,7 @@ def convert_list_to_batch_data( output: {"node": <2d-array>, "line": <2d-array>} -or-: {"indptr": <1d-array>, "data": <1d-array>} Args: - datasets: list of dataset + list_data: list of dataset Returns: batch dataset @@ -53,22 +53,22 @@ def convert_list_to_batch_data( """ # List all *unique* types - components = {x for dataset in datasets for x in dataset.keys()} + components = {x for dataset in list_data for x in dataset.keys()} batch_data = {} for component in components: # Create a 2D array if the component exists in all datasets and number of objects is the same in each dataset - comp_exists_in_all_datasets = all(component in x for x in datasets) - all_sizes_are_the_same = lambda: all(x[component].size == datasets[0][component].size for x in datasets) + comp_exists_in_all_datasets = all(component in x for x in list_data) + all_sizes_are_the_same = lambda: all(x[component].size == list_data[0][component].size for x in list_data) if comp_exists_in_all_datasets and all_sizes_are_the_same(): - batch_data[component] = np.stack([x[component] for x in datasets], axis=0) + batch_data[component] = np.stack([x[component] for x in list_data], axis=0) continue # otherwise use indptr/data dict indptr = [0] data = [] - for dataset in datasets: + for dataset in list_data: if component in dataset: # If the current dataset contains the component, increase the indptr for this batch and append the data From e4230d5f74c149b27706cc088d25cc4b57ea9bb5 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 13:07:52 +0200 Subject: [PATCH 10/12] Black formatting Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 2 +- tests/unit/test_manual_testing.py | 28 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index d51790843..705540c64 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -299,7 +299,7 @@ def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: in {"id": 1, "u_rated": 10500.0, "extra": {"original_id": 456}}, ], "line": [ - {"id": 0, "node_from": 0, "node_to": 1, ...} + {"id": 2, "node_from": 0, "node_to": 1, ...} ] } diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py index 3ad531718..3814c7deb 100644 --- a/tests/unit/test_manual_testing.py +++ b/tests/unit/test_manual_testing.py @@ -77,9 +77,9 @@ def test_is_nan(): assert is_nan(single_value) array_f8 = np.array([0.1, 0.2, np.nan], dtype=np.dtype("f8")) assert not is_nan(array_f8) - array_i4 = np.array([10, 2, -(2 ** 31), 40], dtype=np.dtype("i4")) + array_i4 = np.array([10, 2, -(2**31), 40], dtype=np.dtype("i4")) assert not is_nan(array_i4) - array_i1 = np.array([1, 0, -(2 ** 7), 1], dtype=np.dtype("i1")) + array_i1 = np.array([1, 0, -(2**7), 1], dtype=np.dtype("i1")) assert not is_nan(array_i1) nan_array = np.array([np.nan, np.nan, np.nan]) assert is_nan(nan_array) @@ -145,15 +145,15 @@ def test_compact_json_dump(): string_stream = io.StringIO() _compact_json_dump(data, string_stream, indent=2, max_level=0) assert ( - string_stream.getvalue() - == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" + string_stream.getvalue() + == """{"node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}]}""" ) string_stream = io.StringIO() _compact_json_dump(data, string_stream, indent=2, max_level=1) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": [{"id": 5, "x": 6}, {"id": 7, "x": {"y": 8.1, "z": 8.2}}] }""" @@ -162,8 +162,8 @@ def test_compact_json_dump(): string_stream = io.StringIO() _compact_json_dump(data, string_stream, indent=2, max_level=2) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], "line": @@ -174,8 +174,8 @@ def test_compact_json_dump(): string_stream = io.StringIO() _compact_json_dump(data, string_stream, indent=2, max_level=3) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "node": [ {"id": 1, "x": 2}, @@ -207,8 +207,8 @@ def test_compact_json_dump_deep(): string_stream = io.StringIO() _compact_json_dump(data, string_stream, indent=2, max_level=10) assert ( - string_stream.getvalue() - == """{ + string_stream.getvalue() + == """{ "foo": 1, "bar": { @@ -232,8 +232,8 @@ def test_compact_json_dump_batch(): string_stream = io.StringIO() _compact_json_dump(data, string_stream, indent=2, max_level=4) assert ( - string_stream.getvalue() - == """[ + string_stream.getvalue() + == """[ { "node": [ From 47a953f8d2936c57bef835e467502220da681409 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 15:56:04 +0200 Subject: [PATCH 11/12] Create separate function for inject_extra_info and handle batch data Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 43 ++++++++++++--- tests/unit/test_manual_testing.py | 73 +++++++++++++++++++++++++- 2 files changed, 106 insertions(+), 10 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 705540c64..1c489de3d 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -8,7 +8,7 @@ import json from pathlib import Path -from typing import Any, Dict, List, IO, Optional, Union +from typing import IO, Any, Dict, List, Optional, Union import numpy as np @@ -256,7 +256,7 @@ def export_json_data( data: Union[Dict[str, np.ndarray], List[Dict[str, np.ndarray]]], indent: Optional[int] = 2, compact: bool = False, - extra_info: Optional[Dict[int, Any]] = None, + extra_info: Optional[Union[Dict[int, Any], List[Dict[int, Any]]]] = None, ): """ export json data @@ -272,13 +272,8 @@ def export_json_data( Save to file """ json_data = convert_numpy_to_python(data) - - # Inject extra info if extra_info is not None: - for component, objects in json_data.items(): - for obj in objects: - if obj["id"] in extra_info: - obj["extra"] = extra_info[obj["id"]] + _inject_extra_info(data=json_data, extra_info=extra_info) with open(json_file, mode="w", encoding="utf-8") as file_pointer: if compact and indent: @@ -289,6 +284,38 @@ def export_json_data( json.dump(json_data, file_pointer, indent=indent) +def _inject_extra_info( + data: Union[Dict[str, List[Dict[str, Union[float, int]]]], List[Dict[str, List[Dict[str, Union[float, int]]]]]], + extra_info: Union[Dict[int, Any], List[Dict[int, Any]]], +): + """ + Injects extra info to the objects by ID + + Args: + data: Power Grid Model Python data, as written to pgm json files. + extra_info: A dictionary indexed by object id. The value may be anything. + + """ + if isinstance(data, list): + if isinstance(extra_info, list): + # If both data and extra_info are lists, expect one extra info set per batch + for batch, info in zip(data, extra_info): + _inject_extra_info(batch, info) + else: + # If only data is a list, copy extra_info for each batch + for batch in data: + _inject_extra_info(batch, extra_info) + elif isinstance(data, dict): + if not isinstance(extra_info, dict): + raise TypeError("Invalid extra info data type") + for component, objects in data.items(): + for obj in objects: + if obj["id"] in extra_info: + obj["extra"] = extra_info[obj["id"]] + else: + raise TypeError("Invalid data type") + + def _compact_json_dump(data: Any, io_stream: IO[str], indent: int, max_level: int, level: int = 0): """Custom compact JSON writer that is intended to put data belonging to a single object on a single line. diff --git a/tests/unit/test_manual_testing.py b/tests/unit/test_manual_testing.py index 3814c7deb..488cdcaad 100644 --- a/tests/unit/test_manual_testing.py +++ b/tests/unit/test_manual_testing.py @@ -4,17 +4,19 @@ import io from pathlib import Path -from unittest.mock import patch, mock_open, MagicMock +from unittest.mock import MagicMock, mock_open, patch import numpy as np import pytest + from power_grid_model.manual_testing import ( + _compact_json_dump, + _inject_extra_info, convert_batch_to_list_data, convert_numpy_to_python, convert_python_to_numpy, export_json_data, is_nan, - _compact_json_dump, ) @@ -136,6 +138,73 @@ def test_export_json_data(convert_mock: MagicMock, open_mock: MagicMock, json_du json_dump_mock.assert_called_once_with({"foo": [{"val": 123}]}, open_mock(), indent=2) +@patch("json.dump") +@patch("builtins.open", new_callable=mock_open) +@patch("power_grid_model.manual_testing.convert_numpy_to_python") +@patch("power_grid_model.manual_testing._inject_extra_info") +def test_export_json_data_extra_info( + extra_info_mock: MagicMock, convert_mock: MagicMock, _open_mock: MagicMock, _json_dump_mock: MagicMock +): + convert_mock.return_value = {"foo": [{"id": 123}]} + export_json_data(json_file=Path(), data={}, extra_info={123: "Extra information"}) + extra_info_mock.assert_called_once_with(data={"foo": [{"id": 123}]}, extra_info={123: "Extra information"}) + + +def test_inject_extra_info_single(): + data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]} + extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}} + _inject_extra_info(data=data, extra_info=extra_info) + assert data == { + "node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 789, "extra": 42}], + } + + +def test_inject_extra_info_batch(): + data = [ + {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]}, + {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]}, + ] + extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}] + _inject_extra_info(data=data, extra_info=extra_info) + assert data == [ + { + "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 333, "extra": 42}], + }, + { + "node": [{"id": 0, "foo": 444, "extra": None}, {"id": 1, "bar": 555}], + "line": [{"id": 2, "baz": 666, "extra": 43}], + }, + ] + + +def test_inject_extra_info_batch_copy_info(): + data = [ + {"node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222}], "line": [{"id": 2, "baz": 333}]}, + {"node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555}], "line": [{"id": 2, "baz": 666}]}, + ] + extra_info = {2: 42, 1: {"sheet": "Nodes", "Number": "00123"}} + _inject_extra_info(data=data, extra_info=extra_info) + assert data == [ + { + "node": [{"id": 0, "foo": 111}, {"id": 1, "bar": 222, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 333, "extra": 42}], + }, + { + "node": [{"id": 0, "foo": 444}, {"id": 1, "bar": 555, "extra": {"sheet": "Nodes", "Number": "00123"}}], + "line": [{"id": 2, "baz": 666, "extra": 42}], + }, + ] + + +def test_inject_extra_info_single_dataset_with_batch_info(): + data = {"node": [{"id": 0, "foo": 123}, {"id": 1, "bar": 456}], "line": [{"id": 2, "baz": 789}]} + extra_info = [{2: 42, 1: {"sheet": "Nodes", "Number": "00123"}}, {2: 43, 0: None}] + with pytest.raises(TypeError): + _inject_extra_info(data=data, extra_info=extra_info) + + def test_compact_json_dump(): data = { "node": [{"id": 1, "x": 2}, {"id": 3, "x": 4}], From d9dde069389034f2c88b5a819a6818208e85bf96 Mon Sep 17 00:00:00 2001 From: Bram Stoeller Date: Tue, 26 Jul 2022 16:36:46 +0200 Subject: [PATCH 12/12] Skip 'extra info' in C++ validation tests Signed-off-by: Bram Stoeller --- src/power_grid_model/manual_testing.py | 5 +++-- tests/cpp_unit_tests/test_validation.cpp | 4 ++++ tests/data/power_flow/dummy-test/input.json | 9 ++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/src/power_grid_model/manual_testing.py b/src/power_grid_model/manual_testing.py index 1c489de3d..8480b7356 100644 --- a/src/power_grid_model/manual_testing.py +++ b/src/power_grid_model/manual_testing.py @@ -190,8 +190,9 @@ def convert_batch_to_list_data( for i, batch in enumerate(data): list_data[i][component] = batch else: - # For indptr/data structures, - for i, (idx0, idx1) in enumerate(zip(data["indptr"][:-1], data["indptr"][1:])): + # For indptr/data structures, use the indptr to select the items for each batch. + indptr = data["indptr"] + for i, (idx0, idx1) in enumerate(zip(indptr[:-1], indptr[1:])): list_data[i][component] = data["data"][idx0:idx1] return list_data diff --git a/tests/cpp_unit_tests/test_validation.cpp b/tests/cpp_unit_tests/test_validation.cpp index f8fb9effb..4b86d3caa 100644 --- a/tests/cpp_unit_tests/test_validation.cpp +++ b/tests/cpp_unit_tests/test_validation.cpp @@ -50,6 +50,10 @@ struct Buffer { void parse_single_object(void* ptr, json const& j, MetaData const& meta, Idx position) { meta.set_nan(ptr, position); for (auto const& it : j.items()) { + // skip extra info + if (it.key() == "extra") { + continue; + } DataAttribute const& attr = meta.find_attr(it.key()); if (attr.numpy_type == "i1") { int8_t const value = it.value().get(); diff --git a/tests/data/power_flow/dummy-test/input.json b/tests/data/power_flow/dummy-test/input.json index 295168267..28bbfb4eb 100644 --- a/tests/data/power_flow/dummy-test/input.json +++ b/tests/data/power_flow/dummy-test/input.json @@ -2,15 +2,18 @@ "node": [ { "id": 1, - "u_rated": 10e3 + "u_rated": 10e3, + "extra": "First Node" }, { "id": 2, - "u_rated": 10e3 + "u_rated": 10e3, + "extra": "Second Node" }, { "id": 3, - "u_rated": 10e3 + "u_rated": 10e3, + "extra": "Third Node" } ], "line": [