From 42808934d746956928d27d029dfcdfe246329533 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Herje?= <82032112+jorgenherje@users.noreply.github.com> Date: Thu, 6 Feb 2025 09:48:04 +0100 Subject: [PATCH] 780 per day and per intvl summary vectors (#857) --- .../primary/routers/timeseries/converters.py | 116 ++++- .../primary/routers/timeseries/router.py | 431 +++++++++++----- .../primary/routers/timeseries/schemas.py | 77 ++- .../primary/services/summary_delta_vectors.py | 41 +- .../services/summary_derived_vectors.py | 309 ++++++++++++ .../primary/services/utils/arrow_helpers.py | 29 ++ .../services/test_summary_delta_vectors.py | 72 +-- .../services/test_summary_derived_vectors.py | 476 ++++++++++++++++++ .../unit/services/utils/test_arrow_helpers.py | 78 ++- frontend/src/api/autogen/sdk.gen.ts | 2 + frontend/src/api/autogen/types.gen.ts | 51 +- .../SimulationTimeSeries/dataGenerators.ts | 2 +- .../settings/atoms/derivedAtoms.ts | 55 +- .../settings/atoms/queryAtoms.ts | 2 + .../settings/settings.tsx | 3 + .../utils/ensemblesVectorListHelper.ts | 28 +- .../utils/vectorDescriptionUtils.ts | 21 + .../view/atoms/baseAtoms.ts | 2 +- .../view/atoms/derivedAtoms.ts | 2 +- .../view/hooks/usePlotBuilder.ts | 3 + .../view/utils/PlotBuilder.ts | 96 +++- .../createVectorTracesUtils.ts | 142 +++--- .../vectorSpecificationsAndQueriesUtils.ts | 12 +- .../dataGenerators.ts | 2 +- .../settings/atoms/derivedAtoms.ts | 2 +- .../view/atoms/derivedAtoms.ts | 3 +- ...seTimeSeriesChartTracesDataArrayBuilder.ts | 4 +- .../view/utils/createTracesUtils.ts | 12 +- .../_shared/reservoirSimulationStringUtils.ts | 89 ++-- .../reservoirSimulationStringUtils.test.ts | 26 +- 30 files changed, 1714 insertions(+), 474 deletions(-) create mode 100644 backend_py/primary/primary/services/summary_derived_vectors.py create mode 100644 backend_py/primary/tests/unit/services/test_summary_derived_vectors.py create mode 100644 frontend/src/modules/SimulationTimeSeries/utils/vectorDescriptionUtils.ts diff --git a/backend_py/primary/primary/routers/timeseries/converters.py b/backend_py/primary/primary/routers/timeseries/converters.py index 478fa5102..5df47efe9 100644 --- a/backend_py/primary/primary/routers/timeseries/converters.py +++ b/backend_py/primary/primary/routers/timeseries/converters.py @@ -1,14 +1,92 @@ -from typing import List, Optional, Sequence +from typing import Sequence from primary.services.summary_vector_statistics import VectorStatistics -from primary.services.sumo_access.summary_access import VectorMetadata +from primary.services.sumo_access.summary_access import RealizationVector from primary.services.utils.statistic_function import StatisticFunction +from primary.services.summary_delta_vectors import RealizationDeltaVector +from primary.services.summary_derived_vectors import DerivedVectorType, DerivedRealizationVector from . import schemas +def to_api_derived_vector_type(derived_type: DerivedVectorType) -> schemas.DerivedVectorType: + """ + Create API DerivedVectorType from service layer DerivedVectorType + """ + return schemas.DerivedVectorType(derived_type.value) + + +def to_api_derived_vector_info(derived_type: DerivedVectorType, source_vector: str) -> schemas.DerivedVectorInfo: + """ + Create API DerivedVectorInfo from service layer DerivedVectorInfo + """ + return schemas.DerivedVectorInfo( + type=to_api_derived_vector_type(derived_type), + sourceVector=source_vector, + ) + + +def realization_vector_list_to_api_vector_realization_data_list( + realization_vector_list: list[RealizationVector], +) -> list[schemas.VectorRealizationData]: + """ + Create API VectorRealizationData list from service layer RealizationVector list + """ + return [ + schemas.VectorRealizationData( + realization=real_vec.realization, + timestampsUtcMs=real_vec.timestamps_utc_ms, + values=real_vec.values, + unit=real_vec.metadata.unit, + isRate=real_vec.metadata.is_rate, + ) + for real_vec in realization_vector_list + ] + + +def derived_vector_realizations_to_api_vector_realization_data_list( + derived_realization_vector_list: list[DerivedRealizationVector], derived_vector_info: schemas.DerivedVectorInfo +) -> list[schemas.VectorRealizationData]: + """ + Create API VectorRealizationData list from service layer DerivedRealizationVector list and derived vector info + """ + return [ + schemas.VectorRealizationData( + realization=real_vec.realization, + timestampsUtcMs=real_vec.timestamps_utc_ms, + values=real_vec.values, + unit=real_vec.unit, + isRate=real_vec.is_rate, + derivedVectorInfo=derived_vector_info, + ) + for real_vec in derived_realization_vector_list + ] + + +def realization_delta_vector_list_to_api_vector_realization_data_list( + realization_delta_vector_list: list[RealizationDeltaVector], + derived_vector_info: schemas.DerivedVectorInfo | None = None, +) -> list[schemas.VectorRealizationData]: + """ + Create API VectorRealizationData list from service layer RealizationVector list + + Optional derived_vector_info is included in the API VectorRealizationData if provided + """ + return [ + schemas.VectorRealizationData( + realization=real_vec.realization, + timestampsUtcMs=real_vec.timestamps_utc_ms, + values=real_vec.values, + unit=real_vec.unit, + isRate=real_vec.is_rate, + derivedVectorInfo=derived_vector_info, + ) + for real_vec in realization_delta_vector_list + ] + + def to_service_statistic_functions( - api_stat_funcs: Optional[Sequence[schemas.StatisticFunction]], -) -> Optional[List[StatisticFunction]]: + api_stat_funcs: Sequence[schemas.StatisticFunction] | None = None, +) -> list[StatisticFunction] | None: """ Convert incoming list of API statistic function enum values to service layer StatisticFunction enums, also accounting for the case where the list is None @@ -16,7 +94,7 @@ def to_service_statistic_functions( if api_stat_funcs is None: return None - service_stat_funcs: List[StatisticFunction] = [] + service_stat_funcs: list[StatisticFunction] = [] for api_func_enum in api_stat_funcs: service_func_enum = StatisticFunction.from_string_value(api_func_enum.value) if service_func_enum: @@ -26,7 +104,10 @@ def to_service_statistic_functions( def to_api_vector_statistic_data( - vector_statistics: VectorStatistics, vector_metadata: VectorMetadata + vector_statistics: VectorStatistics, + is_rate: bool, + unit: str, + derived_vector_info: schemas.DerivedVectorInfo | None = None, ) -> schemas.VectorStatisticData: """ Create API VectorStatisticData from service layer VectorStatistics @@ -34,17 +115,21 @@ def to_api_vector_statistic_data( value_objects = _create_statistic_value_object_list(vector_statistics) ret_data = schemas.VectorStatisticData( realizations=vector_statistics.realizations, - timestamps_utc_ms=vector_statistics.timestamps_utc_ms, - value_objects=value_objects, - unit=vector_metadata.unit, - is_rate=vector_metadata.is_rate, + timestampsUtcMs=vector_statistics.timestamps_utc_ms, + valueObjects=value_objects, + unit=unit, + isRate=is_rate, + derivedVectorInfo=derived_vector_info, ) return ret_data def to_api_delta_ensemble_vector_statistic_data( - vector_statistics: VectorStatistics, is_rate: bool, unit: str + vector_statistics: VectorStatistics, + is_rate: bool, + unit: str, + derived_vector_info: schemas.DerivedVectorInfo | None = None, ) -> schemas.VectorStatisticData: """ Create API VectorStatisticData from service layer VectorStatistics @@ -52,10 +137,11 @@ def to_api_delta_ensemble_vector_statistic_data( value_objects = _create_statistic_value_object_list(vector_statistics) ret_data = schemas.VectorStatisticData( realizations=vector_statistics.realizations, - timestamps_utc_ms=vector_statistics.timestamps_utc_ms, - value_objects=value_objects, + timestampsUtcMs=vector_statistics.timestamps_utc_ms, + valueObjects=value_objects, unit=unit, - is_rate=is_rate, + isRate=is_rate, + derivedVectorInfo=derived_vector_info, ) return ret_data @@ -71,6 +157,6 @@ def _create_statistic_value_object_list(vector_statistics: VectorStatistics) -> if service_func_enum is not None: value_arr = vector_statistics.values_dict.get(service_func_enum) if value_arr is not None: - value_objects.append(schemas.StatisticValueObject(statistic_function=api_func_enum, values=value_arr)) + value_objects.append(schemas.StatisticValueObject(statisticFunction=api_func_enum, values=value_arr)) return value_objects diff --git a/backend_py/primary/primary/routers/timeseries/router.py b/backend_py/primary/primary/routers/timeseries/router.py index 2eb1fe86c..9bef3fed3 100644 --- a/backend_py/primary/primary/routers/timeseries/router.py +++ b/backend_py/primary/primary/routers/timeseries/router.py @@ -7,12 +7,28 @@ from primary.auth.auth_helper import AuthHelper from primary.utils.response_perf_metrics import ResponsePerfMetrics -from primary.services.summary_vector_statistics import compute_vector_statistics +from primary.services.summary_vector_statistics import compute_vector_statistics, VectorStatistics from primary.services.sumo_access.generic_types import EnsembleScalarResponse from primary.services.sumo_access.parameter_access import ParameterAccess from primary.services.sumo_access.summary_access import Frequency, SummaryAccess from primary.services.utils.authenticated_user import AuthenticatedUser -from primary.services.summary_delta_vectors import create_delta_vector_table, create_realization_delta_vector_list +from primary.services.summary_delta_vectors import ( + DeltaVectorMetadata, + RealizationDeltaVector, + create_delta_vector_table, + create_realization_delta_vector_list, +) +from primary.services.summary_derived_vectors import ( + create_derived_vector_table_for_type, + create_per_day_vector_name, + create_per_interval_vector_name, + create_derived_vector_unit, + create_derived_realization_vector_list, + get_derived_vector_type, + get_total_vector_name, + is_derived_vector, + is_total_vector, +) from primary.utils.query_string_utils import decode_uint_list_str from . import converters, schemas @@ -29,8 +45,12 @@ async def get_vector_list( authenticated_user: Annotated[AuthenticatedUser, Depends(AuthHelper.get_authenticated_user)], case_uuid: Annotated[str, Query(description="Sumo case uuid")], ensemble_name: Annotated[str, Query(description="Ensemble name")], + include_derived_vectors: Annotated[bool | None, Query(description="Include derived vectors")] = None, ) -> list[schemas.VectorDescription]: - """Get list of all vectors in a given Sumo ensemble, excluding any historical vectors""" + """Get list of all vectors in a given Sumo ensemble, excluding any historical vectors + + Optionally include derived vectors. + """ perf_metrics = ResponsePerfMetrics(response) @@ -40,14 +60,22 @@ async def get_vector_list( vector_info_arr = await access.get_available_vectors_async() perf_metrics.record_lap("get-available-vectors") - ret_arr: list[schemas.VectorDescription] = [ - schemas.VectorDescription(name=vi.name, descriptive_name=vi.name, has_historical=vi.has_historical) - for vi in vector_info_arr - ] + ret_arr: list[schemas.VectorDescription] = [] + vector_names: list[str] = [] + for vi in vector_info_arr: + vector_names.append(vi.name) + ret_arr.append( + schemas.VectorDescription(name=vi.name, descriptiveName=vi.name, hasHistorical=vi.has_historical) + ) + perf_metrics.record_lap("convert-data") - LOGGER.info(f"Got vector list in: {perf_metrics.to_string()}") + # Create derived vectors if requested + if include_derived_vectors: + total_vectors = {vector for vector in vector_names if is_total_vector(vector)} + ret_arr.extend(_create_vector_descriptions_for_derived_vectors(total_vectors)) + LOGGER.info(f"Got vector list in: {perf_metrics.to_string()}") return ret_arr @@ -59,6 +87,7 @@ async def get_delta_ensemble_vector_list( comparison_ensemble_name: Annotated[str, Query(description="Comparison ensemble name")], reference_case_uuid: Annotated[str, Query(description="Sumo case uuid for reference ensemble")], reference_ensemble_name: Annotated[str, Query(description="Reference ensemble name")], + include_derived_vectors: Annotated[bool | None, Query(description="Include derived vectors")] = None, ) -> list[schemas.VectorDescription]: """Get list of all vectors for a delta ensemble based on all vectors in a given Sumo ensemble, excluding any historical vectors @@ -91,13 +120,17 @@ async def get_delta_ensemble_vector_list( # Create vector descriptions, no historical vectors! ret_arr: list[schemas.VectorDescription] = [ - schemas.VectorDescription(name=vi, descriptive_name=vi, has_historical=False) for vi in vector_names + schemas.VectorDescription(name=vi, descriptiveName=vi, hasHistorical=False) for vi in vector_names ] perf_metrics.record_lap("convert-data-to-schema") - LOGGER.info(f"Got delta ensemble vector list in: {perf_metrics.to_string()}") + # Create derived vectors if requested + if include_derived_vectors: + total_vectors = {vector for vector in vector_names if is_total_vector(vector)} + ret_arr.extend(_create_vector_descriptions_for_derived_vectors(total_vectors)) + LOGGER.info(f"Got delta ensemble vector list in: {perf_metrics.to_string()}") return ret_arr @@ -122,29 +155,43 @@ async def get_realizations_vector_data( realizations = decode_uint_list_str(realizations_encoded_as_uint_list_str) access = SummaryAccess.from_case_uuid(authenticated_user.get_sumo_access_token(), case_uuid, ensemble_name) - sumo_freq = Frequency.from_string_value(resampling_frequency.value if resampling_frequency else "dummy") - sumo_vec_arr = await access.get_vector_async( - vector_name=vector_name, - resampling_frequency=sumo_freq, - realizations=realizations, - ) - perf_metrics.record_lap("get-vector") + + is_vector_derived = is_derived_vector(vector_name) + vector_name_to_fetch = vector_name if not is_vector_derived else get_total_vector_name(vector_name) ret_arr: list[schemas.VectorRealizationData] = [] - for vec in sumo_vec_arr: - ret_arr.append( - schemas.VectorRealizationData( - realization=vec.realization, - timestamps_utc_ms=vec.timestamps_utc_ms, - values=vec.values, - unit=vec.metadata.unit, - is_rate=vec.metadata.is_rate, - ) + if not is_vector_derived: + sumo_vec_arr = await access.get_vector_async( + vector_name=vector_name_to_fetch, + resampling_frequency=sumo_freq, + realizations=realizations, + ) + perf_metrics.record_lap("get-vector") + + ret_arr = converters.realization_vector_list_to_api_vector_realization_data_list(sumo_vec_arr) + else: + # Handle derived vectors + vector_table_pa, vector_metadata = await access.get_vector_table_async( + vector_name=vector_name_to_fetch, + resampling_frequency=sumo_freq, + realizations=realizations, ) - LOGGER.info(f"Loaded realization summary data in: {perf_metrics.to_string()}") + derived_vector_type = get_derived_vector_type(vector_name) + derived_vector_unit = create_derived_vector_unit(vector_metadata.unit, derived_vector_type) + derived_vector_info = converters.to_api_derived_vector_info(derived_vector_type, vector_name_to_fetch) + + derived_vector_table_pa = create_derived_vector_table_for_type(vector_table_pa, derived_vector_type) + derived_realization_vector_list = create_derived_realization_vector_list( + derived_vector_table_pa, vector_name, vector_metadata.is_rate, derived_vector_unit + ) + + ret_arr = converters.derived_vector_realizations_to_api_vector_realization_data_list( + derived_realization_vector_list, derived_vector_info + ) + LOGGER.info(f"Loaded realization summary data in: {perf_metrics.to_string()}") return ret_arr @@ -182,69 +229,56 @@ async def get_delta_ensemble_realizations_vector_data( status_code=400, detail="Resampling frequency must be specified to create delta ensemble vector" ) - comparison_ensemble_access = SummaryAccess.from_case_uuid( - authenticated_user.get_sumo_access_token(), comparison_case_uuid, comparison_ensemble_name - ) - reference_ensemble_access = SummaryAccess.from_case_uuid( - authenticated_user.get_sumo_access_token(), reference_case_uuid, reference_ensemble_name + is_vector_derived = is_derived_vector(vector_name) + vector_name_to_fetch = vector_name if not is_vector_derived else get_total_vector_name(vector_name) + + # Create delta ensemble table and metadata: + ( + delta_vector_table_pa, + delta_vector_metadata, + ) = await _get_vector_tables_and_create_delta_vector_table_and_metadata_async( + authenticated_user, + comparison_case_uuid, + comparison_ensemble_name, + reference_case_uuid, + reference_ensemble_name, + vector_name_to_fetch, + realizations, + service_freq, + perf_metrics, ) - # Get tables parallel - # - Resampled data is assumed to be such that dates/timestamps are comparable between ensembles and cases, i.e. timestamps - # for a resampling of a daily vector in both ensembles should be the same - (comparison_vector_table_pa, comparison_metadata), ( - reference_vector_table_pa, - reference_metadata, - ) = await asyncio.gather( - comparison_ensemble_access.get_vector_table_async( - vector_name=vector_name, - resampling_frequency=service_freq, - realizations=realizations, - ), - reference_ensemble_access.get_vector_table_async( - vector_name=vector_name, - resampling_frequency=service_freq, - realizations=realizations, - ), - ) - - perf_metrics.record_lap("get-vector-tables-for-delta") + # Create realization delta vectors + ret_arr: list[schemas.VectorRealizationData] = [] + if not is_vector_derived: + realization_delta_vector_list = create_realization_delta_vector_list( + delta_vector_table_pa, + vector_name_to_fetch, + delta_vector_metadata.is_rate, + delta_vector_metadata.unit, + ) + perf_metrics.record_lap("create-realization-delta-vector-list") - # Check for mismatching metadata - if comparison_metadata.is_rate != reference_metadata.is_rate: - raise HTTPException( - status_code=400, detail="Rate mismatch between ensembles for delta ensemble statistical vector data" + ret_arr = converters.realization_delta_vector_list_to_api_vector_realization_data_list( + realization_delta_vector_list ) - if comparison_metadata.unit != reference_metadata.unit: - raise HTTPException( - status_code=400, detail="Unit mismatch between ensembles for delta ensemble statistical vector data" + else: + derived_vector_type = get_derived_vector_type(vector_name) + derived_vector_unit = create_derived_vector_unit(delta_vector_metadata.unit, derived_vector_type) + derived_vector_info = converters.to_api_derived_vector_info(derived_vector_type, vector_name_to_fetch) + + # Create derived vectors if requested + delta_derived_vector_table_pa = create_derived_vector_table_for_type(delta_vector_table_pa, derived_vector_type) + realization_delta_vector_list = create_realization_delta_vector_list( + delta_derived_vector_table_pa, vector_name, delta_vector_metadata.is_rate, derived_vector_unit ) + perf_metrics.record_lap("create-realization-delta-derived-vector-list") - # Get metadata from reference ensemble - is_rate = reference_metadata.is_rate - unit = reference_metadata.unit - - # Create delta ensemble data - delta_vector_table = create_delta_vector_table(comparison_vector_table_pa, reference_vector_table_pa, vector_name) - perf_metrics.record_lap("create-delta-vector-table") - - realization_delta_vector_list = create_realization_delta_vector_list(delta_vector_table, vector_name, is_rate, unit) - perf_metrics.record_lap("create-realization-delta-vector-list") - - ret_arr: list[schemas.VectorRealizationData] = [] - for vec in realization_delta_vector_list: - ret_arr.append( - schemas.VectorRealizationData( - realization=vec.realization, - timestamps_utc_ms=vec.timestamps_utc_ms, - values=vec.values, - unit=vec.unit, - is_rate=vec.is_rate, - ) + ret_arr = converters.realization_delta_vector_list_to_api_vector_realization_data_list( + realization_delta_vector_list, derived_vector_info ) LOGGER.info(f"Loaded realization delta ensemble summary data in: {perf_metrics.to_string()}") - return ret_arr @@ -287,10 +321,10 @@ async def get_historical_vector_data( raise HTTPException(status_code=404, detail="Could not get historical vector") return schemas.VectorHistoricalData( - timestamps_utc_ms=sumo_hist_vec.timestamps_utc_ms, + timestampsUtcMs=sumo_hist_vec.timestamps_utc_ms, values=sumo_hist_vec.values, unit=sumo_hist_vec.metadata.unit, - is_rate=sumo_hist_vec.metadata.is_rate, + isRate=sumo_hist_vec.metadata.is_rate, ) @@ -320,20 +354,41 @@ async def get_statistical_vector_data( service_freq = Frequency.from_string_value(resampling_frequency.value) service_stat_funcs_to_compute = converters.to_service_statistic_functions(statistic_functions) + is_vector_derived = is_derived_vector(vector_name) + vector_name_to_fetch = vector_name if not is_vector_derived else get_total_vector_name(vector_name) + + # Get vector table vector_table, vector_metadata = await access.get_vector_table_async( - vector_name=vector_name, + vector_name=vector_name_to_fetch, resampling_frequency=service_freq, realizations=realizations, ) perf_metrics.record_lap("get-table") - statistics = compute_vector_statistics(vector_table, vector_name, service_stat_funcs_to_compute) - if not statistics: - raise HTTPException(status_code=404, detail="Could not compute statistics") - perf_metrics.record_lap("calc-stat") + # Calculate statistics + ret_data: schemas.VectorStatisticData | None = None + if not is_vector_derived: + statistics = compute_vector_statistics(vector_table, vector_name, service_stat_funcs_to_compute) + if not statistics: + raise HTTPException(status_code=404, detail="Could not compute statistics") + + ret_data = converters.to_api_vector_statistic_data(statistics, vector_metadata.is_rate, vector_metadata.unit) + else: + derived_vector_type = get_derived_vector_type(vector_name) + derived_vector_unit = create_derived_vector_unit(vector_metadata.unit, derived_vector_type) + derived_vector_info = converters.to_api_derived_vector_info(derived_vector_type, vector_name_to_fetch) + + derived_vector_table_pa = create_derived_vector_table_for_type(vector_table, derived_vector_type) + statistics = compute_vector_statistics(derived_vector_table_pa, vector_name, service_stat_funcs_to_compute) - ret_data: schemas.VectorStatisticData = converters.to_api_vector_statistic_data(statistics, vector_metadata) + if not statistics: + raise HTTPException(status_code=404, detail="Could not compute statistics") + ret_data = converters.to_api_vector_statistic_data( + statistics, vector_metadata.is_rate, derived_vector_unit, derived_vector_info + ) + + perf_metrics.record_lap("calc-stat") LOGGER.info(f"Loaded and computed statistical summary data in: {perf_metrics.to_string()}") return ret_data @@ -376,59 +431,54 @@ async def get_delta_ensemble_statistical_vector_data( status_code=400, detail="Resampling frequency must be specified to create delta ensemble vector" ) - comparison_ensemble_access = SummaryAccess.from_case_uuid( - authenticated_user.get_sumo_access_token(), comparison_case_uuid, comparison_ensemble_name - ) - reference_ensemble_access = SummaryAccess.from_case_uuid( - authenticated_user.get_sumo_access_token(), reference_case_uuid, reference_ensemble_name + is_vector_derived = is_derived_vector(vector_name) + vector_name_to_fetch = vector_name if not is_vector_derived else get_total_vector_name(vector_name) + + # Create delta ensemble table and metadata: + ( + delta_vector_table_pa, + delta_vector_metadata, + ) = await _get_vector_tables_and_create_delta_vector_table_and_metadata_async( + authenticated_user, + comparison_case_uuid, + comparison_ensemble_name, + reference_case_uuid, + reference_ensemble_name, + vector_name_to_fetch, + realizations, + service_freq, + perf_metrics, ) - # Get tables parallel - # - Resampled data is assumed to be such that dates/timestamps are comparable between ensembles and cases, i.e. timestamps - # for a resampling of a daily vector in both ensembles should be the same - (comparison_vector_table_pa, comparison_metadata), ( - reference_vector_table_pa, - reference_metadata, - ) = await asyncio.gather( - comparison_ensemble_access.get_vector_table_async( - vector_name=vector_name, - resampling_frequency=service_freq, - realizations=realizations, - ), - reference_ensemble_access.get_vector_table_async( - vector_name=vector_name, - resampling_frequency=service_freq, - realizations=realizations, - ), - ) + # Calculate statistics + ret_data: schemas.VectorStatisticData | None = None + if not is_vector_derived: + statistics = compute_vector_statistics(delta_vector_table_pa, vector_name, service_stat_funcs_to_compute) - perf_metrics.record_lap("get-vector-tables-for-delta") + if not statistics: + raise HTTPException(status_code=404, detail="Could not compute statistics") - # Check for mismatching metadata - if comparison_metadata.is_rate != reference_metadata.is_rate: - raise HTTPException( - status_code=400, detail="Rate mismatch between ensembles for delta ensemble statistical vector data" + ret_data = converters.to_api_delta_ensemble_vector_statistic_data( + statistics, delta_vector_metadata.is_rate, delta_vector_metadata.unit ) - if comparison_metadata.unit != reference_metadata.unit: - raise HTTPException( - status_code=400, detail="Unit mismatch between ensembles for delta ensemble statistical vector data" + else: + derived_vector_type = get_derived_vector_type(vector_name) + derived_vector_unit = create_derived_vector_unit(delta_vector_metadata.unit, derived_vector_type) + derived_vector_info = converters.to_api_derived_vector_info(derived_vector_type, vector_name_to_fetch) + + delta_derived_vector_table_pa = create_derived_vector_table_for_type(delta_vector_table_pa, derived_vector_type) + statistics = compute_vector_statistics( + delta_derived_vector_table_pa, vector_name, service_stat_funcs_to_compute ) - # Get metadata from reference ensemble - is_rate = reference_metadata.is_rate - unit = reference_metadata.unit + if not statistics: + raise HTTPException(status_code=404, detail="Could not compute statistics") - # Create delta ensemble data and compute statistics - delta_vector_table = create_delta_vector_table(comparison_vector_table_pa, reference_vector_table_pa, vector_name) - statistics = compute_vector_statistics(delta_vector_table, vector_name, service_stat_funcs_to_compute) - if not statistics: - raise HTTPException(status_code=404, detail="Could not compute statistics") - perf_metrics.record_lap("calc-delta-vector-stat") - - ret_data: schemas.VectorStatisticData = converters.to_api_delta_ensemble_vector_statistic_data( - statistics, is_rate, unit - ) + ret_data = converters.to_api_delta_ensemble_vector_statistic_data( + statistics, delta_vector_metadata.is_rate, derived_vector_unit, derived_vector_info + ) + perf_metrics.record_lap("calc-delta-vector-stat") LOGGER.info(f"Loaded and computed statistical delta ensemble summary data in: {perf_metrics.to_string()}") return ret_data @@ -488,16 +538,16 @@ async def get_statistical_vector_data_per_sensitivity( raise HTTPException(status_code=404, detail="Could not compute statistics") statistic_data: schemas.VectorStatisticData = converters.to_api_vector_statistic_data( - statistics, vector_metadata + statistics, vector_metadata.is_rate, vector_metadata.unit, None ) sensitivity_statistic_data = schemas.VectorStatisticSensitivityData( - sensitivity_name=sensitivity.name, - sensitivity_case=case.name, + sensitivityName=sensitivity.name, + sensitivityCase=case.name, realizations=statistic_data.realizations, - timestamps_utc_ms=statistic_data.timestamps_utc_ms, - value_objects=statistic_data.value_objects, + timestampsUtcMs=statistic_data.timestampsUtcMs, + valueObjects=statistic_data.valueObjects, unit=statistic_data.unit, - is_rate=statistic_data.is_rate, + isRate=statistic_data.isRate, ) ret_data.append(sensitivity_statistic_data) return ret_data @@ -518,3 +568,106 @@ async def get_realization_vector_at_timestamp( vector_name=vector_name, timestamp_utc_ms=timestamp_utc_ms, realizations=None ) return ensemble_response + + +def _create_vector_descriptions_for_derived_vectors( + vector_names: list[str] | set[str], +) -> list[schemas.VectorDescription]: + """ + Create vector descriptions for derived vectors from list of vector names + """ + ret_arr: list[schemas.VectorDescription] = [] + for vector_name in vector_names: + if not is_total_vector(vector_name): + continue + + per_day_vector_name = create_per_day_vector_name(vector_name) + per_interval_vector_name = create_per_interval_vector_name(vector_name) + ret_arr.extend( + [ + schemas.VectorDescription( + name=per_day_vector_name, + descriptiveName=per_day_vector_name, + hasHistorical=False, + derivedVectorInfo=schemas.DerivedVectorInfo( + type=schemas.DerivedVectorType.PER_DAY, sourceVector=vector_name + ), + ), + schemas.VectorDescription( + name=per_interval_vector_name, + descriptiveName=per_interval_vector_name, + hasHistorical=False, + derivedVectorInfo=schemas.DerivedVectorInfo( + type=schemas.DerivedVectorType.PER_INTVL, sourceVector=vector_name + ), + ), + ] + ) + return ret_arr + + +async def _get_vector_tables_and_create_delta_vector_table_and_metadata_async( + authenticated_user: AuthenticatedUser, + comparison_case_uuid: str, + comparison_ensemble_name: str, + reference_case_uuid: str, + reference_ensemble_name: str, + vector_name: str, + realizations: list[int] | None, + resampling_frequency: Frequency, + perf_metrics: ResponsePerfMetrics | None, +) -> tuple[pa.Table, DeltaVectorMetadata]: + """ + Get vector tables for comparison and reference ensembles and create delta ensemble vector table and metadata + """ + # Separate summary access to comparison and reference ensemble + comparison_ensemble_access = SummaryAccess.from_case_uuid( + authenticated_user.get_sumo_access_token(), comparison_case_uuid, comparison_ensemble_name + ) + reference_ensemble_access = SummaryAccess.from_case_uuid( + authenticated_user.get_sumo_access_token(), reference_case_uuid, reference_ensemble_name + ) + + # Get tables parallel + # - Resampled data is assumed to be such that dates/timestamps are comparable between ensembles and cases, i.e. timestamps + # for a resampling of a daily vector in both ensembles should be the same + (comparison_vector_table_pa, comparison_metadata), ( + reference_vector_table_pa, + reference_metadata, + ) = await asyncio.gather( + comparison_ensemble_access.get_vector_table_async( + vector_name=vector_name, + resampling_frequency=resampling_frequency, + realizations=realizations, + ), + reference_ensemble_access.get_vector_table_async( + vector_name=vector_name, + resampling_frequency=resampling_frequency, + realizations=realizations, + ), + ) + + if perf_metrics: + perf_metrics.record_lap("get-vector-tables-to-create-delta-vector-table") + + # Check for mismatching metadata + if comparison_metadata.is_rate != reference_metadata.is_rate: + raise HTTPException( + status_code=400, detail="Rate mismatch between ensembles for delta ensemble statistical vector data" + ) + if comparison_metadata.unit != reference_metadata.unit: + raise HTTPException( + status_code=400, detail="Unit mismatch between ensembles for delta ensemble statistical vector data" + ) + + delta_vector_metadata = DeltaVectorMetadata(unit=reference_metadata.unit, is_rate=reference_metadata.is_rate) + + # Create delta ensemble table + delta_vector_table_pa = create_delta_vector_table( + comparison_vector_table_pa, reference_vector_table_pa, vector_name + ) + + if perf_metrics: + perf_metrics.record_lap("create-delta-vector-table") + + return delta_vector_table_pa, delta_vector_metadata diff --git a/backend_py/primary/primary/routers/timeseries/schemas.py b/backend_py/primary/primary/routers/timeseries/schemas.py index 500a7a75c..73fa89bf6 100644 --- a/backend_py/primary/primary/routers/timeseries/schemas.py +++ b/backend_py/primary/primary/routers/timeseries/schemas.py @@ -1,11 +1,9 @@ -import datetime -from enum import Enum -from typing import List +from enum import StrEnum from pydantic import BaseModel -class Frequency(str, Enum): +class Frequency(StrEnum): DAILY = "DAILY" WEEKLY = "WEEKLY" MONTHLY = "MONTHLY" @@ -13,7 +11,7 @@ class Frequency(str, Enum): YEARLY = "YEARLY" -class StatisticFunction(str, Enum): +class StatisticFunction(StrEnum): MEAN = "MEAN" MIN = "MIN" MAX = "MAX" @@ -22,61 +20,58 @@ class StatisticFunction(str, Enum): P50 = "P50" +class DerivedVectorType(StrEnum): + PER_DAY = "PER_DAY" + PER_INTVL = "PER_INTVL" + + +class DerivedVectorInfo(BaseModel): + type: DerivedVectorType + sourceVector: str + + class VectorDescription(BaseModel): name: str - descriptive_name: str - has_historical: bool + descriptiveName: str + hasHistorical: bool + derivedVectorInfo: DerivedVectorInfo | None = None class VectorHistoricalData(BaseModel): - timestamps_utc_ms: List[int] - values: List[float] + timestampsUtcMs: list[int] + values: list[float] unit: str - is_rate: bool + isRate: bool class VectorRealizationData(BaseModel): realization: int - timestamps_utc_ms: List[int] - values: List[float] + timestampsUtcMs: list[int] + values: list[float] unit: str - is_rate: bool + isRate: bool + derivedVectorInfo: DerivedVectorInfo | None = None class StatisticValueObject(BaseModel): - statistic_function: StatisticFunction - values: List[float] + statisticFunction: StatisticFunction + values: list[float] class VectorStatisticData(BaseModel): - realizations: List[int] - timestamps_utc_ms: List[int] - value_objects: List[StatisticValueObject] + realizations: list[int] + timestampsUtcMs: list[int] + valueObjects: list[StatisticValueObject] unit: str - is_rate: bool + isRate: bool + derivedVectorInfo: DerivedVectorInfo | None = None class VectorStatisticSensitivityData(BaseModel): - realizations: List[int] - timestamps_utc_ms: List[int] - value_objects: List[StatisticValueObject] + realizations: list[int] + timestampsUtcMs: list[int] + valueObjects: list[StatisticValueObject] unit: str - is_rate: bool - sensitivity_name: str - sensitivity_case: str - - -class VectorExpressionInfo(BaseModel): - """ - `Description`: - Dictionary with all required items for an expression - - `Required keys`: - expression: str, mathematical expression - variable_names: List[str], list of variable names - vector_names: List[str], list of vector names - """ - - expression: str - variable_names: str - vector_names: str + isRate: bool + sensitivityName: str + sensitivityCase: str diff --git a/backend_py/primary/primary/services/summary_delta_vectors.py b/backend_py/primary/primary/services/summary_delta_vectors.py index 8c75e3601..c474a74ec 100644 --- a/backend_py/primary/primary/services/summary_delta_vectors.py +++ b/backend_py/primary/primary/services/summary_delta_vectors.py @@ -4,7 +4,13 @@ import pyarrow.compute as pc import numpy as np -from primary.services.service_exceptions import InvalidDataError, Service +from primary.services.utils.arrow_helpers import validate_summary_vector_table_pa + + +@dataclass +class DeltaVectorMetadata: + is_rate: bool + unit: str @dataclass @@ -16,33 +22,6 @@ class RealizationDeltaVector: unit: str -def _validate_summary_vector_table_pa( - vector_table: pa.Table, vector_name: str, service: Service = Service.GENERAL -) -> None: - """ - Check if the pyarrow vector table is valid. - - Expect the pyarrow single vector table to only contain the following columns: DATE, REAL, vector_name. - - Raises InvalidDataError if the table does not contain the expected columns. - """ - expected_columns = {"DATE", "REAL", vector_name} - actual_columns = set(vector_table.column_names) - if not expected_columns.issubset(actual_columns) or len(expected_columns) != len(actual_columns): - unexpected_columns = actual_columns - expected_columns - raise InvalidDataError(f"Unexpected columns in table {unexpected_columns}", service) - - # Validate table column types - if vector_table.field("DATE").type != pa.timestamp("ms"): - raise InvalidDataError( - f'DATE column must be of type timestamp(ms), but got {vector_table.field("DATE").type}', service - ) - if vector_table.field("REAL").type != pa.int16(): - raise InvalidDataError("REAL column must be of type int16", service) - if vector_table.field(vector_name).type != pa.float32(): - raise InvalidDataError(f"{vector_name} column must be of type float32", service) - - def create_delta_vector_table( comparison_vector_table: pa.Table, reference_vector_table: pa.Table, vector_name: str ) -> pa.Table: @@ -60,8 +39,8 @@ def create_delta_vector_table( `Note`: Pre-processing of DATE-columns, e.g. resampling, should be done before calling this function. """ - _validate_summary_vector_table_pa(comparison_vector_table, vector_name) - _validate_summary_vector_table_pa(reference_vector_table, vector_name) + validate_summary_vector_table_pa(comparison_vector_table, vector_name) + validate_summary_vector_table_pa(reference_vector_table, vector_name) joined_vector_table = comparison_vector_table.join( reference_vector_table, keys=["DATE", "REAL"], join_type="inner", right_suffix="_reference" @@ -87,7 +66,7 @@ def create_realization_delta_vector_list( """ Create a list of RealizationDeltaVector from the delta vector table. """ - _validate_summary_vector_table_pa(delta_vector_table, vector_name) + validate_summary_vector_table_pa(delta_vector_table, vector_name) real_arr_np = delta_vector_table.column("REAL").to_numpy() unique_reals, first_occurrence_idx, real_counts = np.unique(real_arr_np, return_index=True, return_counts=True) diff --git a/backend_py/primary/primary/services/summary_derived_vectors.py b/backend_py/primary/primary/services/summary_derived_vectors.py new file mode 100644 index 000000000..1db26c685 --- /dev/null +++ b/backend_py/primary/primary/services/summary_derived_vectors.py @@ -0,0 +1,309 @@ +import re +from dataclasses import dataclass +from enum import StrEnum + +import numpy as np +import pyarrow as pa +import polars as pl + +from primary.services.service_exceptions import InvalidDataError, Service +from primary.services.utils.arrow_helpers import validate_summary_vector_table_pa + + +class DerivedVectorType(StrEnum): + PER_DAY = "PER_DAY" + PER_INTERVAL = "PER_INTVL" + + +@dataclass +class DerivedRealizationVector: + realization: int + timestamps_utc_ms: list[int] + values: list[float] + is_rate: bool + unit: str + + +# This code checks in a predefined list whether a certain WGNAMES +# variable represents a total accumulated quantity. Only the last three +# characters in the variable is considered (i.e. the leading 'W', 'G' or +# 'F' is discarded). +# Ref.: https://github.com/equinor/resdata/blob/f82318a84bbefb6a53ed674a04eb2a73d89de02d/lib/ecl/smspec_node.cpp#L332-L335 +TOTAL_VARS = { + "OPT", + "GPT", + "WPT", + "GIT", + "WIT", + "OPTF", + "OPTS", + "OIT", + "OVPT", + "OVIT", + "MWT", + "WVPT", + "WVIT", + "GMT", + "GPTF", + "SGT", + "GST", + "FGT", + "GCT", + "GIMT", + "WGPT", + "WGIT", + "EGT", + "EXGT", + "GVPT", + "GVIT", + "LPT", + "VPT", + "VIT", + "NPT", + "NIT", + "CPT", + "CIT", +} + + +def create_derived_vector_unit(source_unit: str, derived_type: DerivedVectorType) -> str: + if derived_type == DerivedVectorType.PER_DAY: + return f"{source_unit}/DAY" + return source_unit + + +def find_derived_vector_type(vector_name: str) -> DerivedVectorType | None: + """ + Find derived vector type based on vector name. Returns None if not a derived vector. + """ + if is_per_day_vector(vector_name): + return DerivedVectorType.PER_DAY + if is_per_interval_vector(vector_name): + return DerivedVectorType.PER_INTERVAL + return None + + +def get_derived_vector_type(vector_name: str) -> DerivedVectorType: + """ + Get derived vector type based on vector name. Raises InvalidDataError if not a derived vector. + + This function is intended to be used when it is known that the vector is a derived vector, checked by `is_derived_vector`. + """ + vector_type = find_derived_vector_type(vector_name) + if vector_type is None: + raise InvalidDataError(f"Expected {vector_name} to be a derived vector.", Service.GENERAL) + return vector_type + + +def is_derived_vector(vector_name: str) -> bool: + """ + Check if a vector is a derived vector. + """ + return find_derived_vector_type(vector_name) is not None + + +def create_per_day_vector_name(vector: str) -> str: + return f"PER_DAY_{vector}" + + +def create_per_interval_vector_name(vector: str) -> str: + return f"PER_INTVL_{vector}" + + +def is_per_interval_vector(vector_name: str) -> bool: + return vector_name.startswith("PER_INTVL_") + + +def is_per_day_vector(vector_name: str) -> bool: + return vector_name.startswith("PER_DAY_") + + +def get_total_vector_name(vector_name: str) -> str: + if vector_name.startswith("PER_DAY_"): + return vector_name.removeprefix("PER_DAY_") + if vector_name.startswith("PER_INTVL_"): + return vector_name.removeprefix("PER_INTVL_") + raise InvalidDataError(f"Expected {vector_name} to be a derived PER_DAY or PER_INTVL vector!", Service.GENERAL) + + +def is_total_vector(vector_name: str, delimiter: str = ":") -> bool: + """ + Check if a vector is a total vector. + + Provide vector name, which is vector base name (WOPT, WOPR, FOPT, GOPR, ...) and vector node name (well, group, region, etc) + separated by delimiter. + + Based on `bool smspec_node_identify_total()` in resdata/lib/ecl/smspec_node.cpp: + https://github.com/equinor/resdata/blob/f82318a84bbefb6a53ed674a04eb2a73d89de02d/lib/ecl/smspec_node.cpp#L315 + """ + + split = vector_name.split(delimiter) + if len(split) < 1: + return False + + vector_base_name = split[0] + if len(vector_base_name) < 1: + return False + + # Regex to strip leading char and trailing H (historical vector) + total_var_regex = r"^[A-Z]|H$" + vector_base_substring = re.sub(total_var_regex, "", vector_base_name) + + return vector_base_substring in TOTAL_VARS + + +def create_derived_vector_table_for_type(total_vector_table_pa: pa.Table, derived_type: DerivedVectorType) -> pa.Table: + """ + Create derived vector table based on provided type. The source vector should be a total vector. + + Raises InvalidDataError if the provided type is not handled. + """ + if derived_type == DerivedVectorType.PER_INTERVAL: + return create_per_interval_vector_table_pa(total_vector_table_pa) + if derived_type == DerivedVectorType.PER_DAY: + return create_per_day_vector_table_pa(total_vector_table_pa) + raise InvalidDataError(f"Unhandled derived vector type: {derived_type}", Service.GENERAL) + + +def create_per_interval_vector_table_pa(total_vector_table_pa: pa.Table) -> pa.Table: + """ + Calculates interval delta data for vector column in provided table. The source vector should be a total vector. + + The input table must contain columns "DATE", "REAL" and the total vector name. + The output table will contain columns "DATE", "REAL" and the per interval vector name. + + This function assumes data is already resampled according to wanted frequency. The per interval value is calculated as the + difference between two consecutive total values. The value at element `n` is the difference between element `n` and `n+1`, + thereby the last value is defined as null, and set to 0.0. + + Raises InvalidDataError if the input table does not contain the expected columns. + """ + + column_names = set(total_vector_table_pa.column_names) + if len(column_names) != 3: + raise InvalidDataError("Table must contain at least 3 columns", Service.GENERAL) + + if not column_names.issuperset(["DATE", "REAL"]): + raise InvalidDataError("Table must contain columns 'DATE' and 'REAL'", Service.GENERAL) + + total_vector_name: str = (column_names - {"DATE", "REAL"}).pop() + validate_summary_vector_table_pa(total_vector_table_pa, total_vector_name) + + per_interval_vector_name = create_per_interval_vector_name(total_vector_name) + + # Convert to polars DataFrame + # - Utilize polars for efficient group_by operation and expressions + # - Sort by "REAL" thereafter "DATE" + sorted_total_vector_df = pl.DataFrame(total_vector_table_pa).sort(["REAL", "DATE"]) + + # Calculate per interval delta values + # - group_by("REAL") to create per interval delta values per realization for vector. + # - diff() calculates diff between element n and n-1 for a vector and given realization, sorted by date. First element is then null. + # - The resulting interval value for element n yields from n to n+1, thus shift(-1). + # - After shift(-1) the last value is null, thereby fill_null with 0.0. + # - explode() to make per interval dataframe grouped per realization into one dataframe in long format: + # - https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.explode.html#polars-dataframe-explode + + per_interval_expr: pl.Expr = ( + pl.col(total_vector_name).diff().shift(-1).fill_null(0.0).alias(per_interval_vector_name) + ) + + sorted_per_interval_vector_df = ( + sorted_total_vector_df.group_by("REAL") + .agg([pl.col("DATE"), per_interval_expr]) + .explode(["DATE", per_interval_vector_name]) + ) + + return sorted_per_interval_vector_df.to_arrow() + + +def create_per_day_vector_table_pa(total_vector_table_pa: pa.Table) -> pa.Table: + """ + Calculates interval delta per day data for vector column in provided table. The source vector should be a total vector. + + This implies calculating interval delta data, and divide the delta by number of days between each date. + + The input table must contain columns "DATE", "REAL" and the total vector name. + The output table will contain columns "DATE", "REAL" and the per day vector name. + + This function assumes data is already resampled according to wanted frequency. The per day value is calculated as the + difference between two consecutive total values, divided by the number of days between the two dates. The value at element `n` + is the difference between element `n` and `n+1`, divided by the number of days between the two dates, thereby the last value is + defined as null, and set to 0.0. + + Raises InvalidDataError if the input table does not contain the expected columns. + """ + + column_names = set(total_vector_table_pa.column_names) + if len(column_names) != 3: + raise InvalidDataError("Table must contain at least 3 columns", Service.GENERAL) + + if not column_names.issuperset(["DATE", "REAL"]): + raise InvalidDataError("Table must contain columns 'DATE' and 'REAL'", Service.GENERAL) + + total_vector_name: str = (column_names - {"DATE", "REAL"}).pop() + validate_summary_vector_table_pa(total_vector_table_pa, total_vector_name) + + per_day_vector_name = create_per_day_vector_name(total_vector_name) + + # Convert to polars DataFrame + # - Utilize polars for efficient group_by operation and expressions + # - Sort by "REAL" thereafter "DATE" + sorted_total_vector_df = pl.DataFrame(total_vector_table_pa).sort(["REAL", "DATE"]) + + # Calculate per interval delta values + # - group_by("REAL") to create per interval delta values per realization for vector. + # - diff() calculates diff between element n and n-1 for a vector and given realization, sorted by date. First element is then null. + # - The resulting interval value for element n yields from n to n+1, thus shift(-1). + # - After shift(-1) the last value is null, thereby fill_null with 0.0. + # - per day is per interval delta divided by number of days between each date. + # - explode() to make per interval dataframe grouped per realization into one dataframe in long format: + # - https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.explode.html#polars-dataframe-explode + + # Cast to float32 to avoid integer division + per_interval_expr = pl.col(total_vector_name).diff().shift(-1).fill_null(0.0).cast(pl.Float32) + diff_dates_expr = pl.col("DATE").diff().shift(-1).dt.total_days().cast(pl.Float32) + per_day_expr = (per_interval_expr / diff_dates_expr).fill_null(0.0).alias(per_day_vector_name) + + sorted_per_day_vector_df = ( + sorted_total_vector_df.group_by("REAL") + .agg([pl.col("DATE"), per_day_expr]) + .explode(["DATE", per_day_vector_name]) + ) + + return sorted_per_day_vector_df.to_arrow() + + +def create_derived_realization_vector_list( + derived_vector_table: pa.Table, vector_name: str, is_rate: bool, unit: str +) -> list[DerivedRealizationVector]: + """ + Create a list of DerivedRealizationVector from the derived vector table. + """ + validate_summary_vector_table_pa(derived_vector_table, vector_name) + + real_arr_np = derived_vector_table.column("REAL").to_numpy() + unique_reals, first_occurrence_idx, real_counts = np.unique(real_arr_np, return_index=True, return_counts=True) + + whole_date_np_arr = derived_vector_table.column("DATE").to_numpy() + whole_value_np_arr = derived_vector_table.column(vector_name).to_numpy() + + ret_arr: list[DerivedRealizationVector] = [] + for i, real in enumerate(unique_reals): + start_row_idx = first_occurrence_idx[i] + row_count = real_counts[i] + date_np_arr = whole_date_np_arr[start_row_idx : start_row_idx + row_count] + value_np_arr = whole_value_np_arr[start_row_idx : start_row_idx + row_count] + + # Create RealizationDeltaVector + ret_arr.append( + DerivedRealizationVector( + realization=real, + timestamps_utc_ms=date_np_arr.astype(int).tolist(), + values=value_np_arr.tolist(), + is_rate=is_rate, + unit=unit, + ) + ) + + return ret_arr diff --git a/backend_py/primary/primary/services/utils/arrow_helpers.py b/backend_py/primary/primary/services/utils/arrow_helpers.py index a52391742..6eb94b22b 100644 --- a/backend_py/primary/primary/services/utils/arrow_helpers.py +++ b/backend_py/primary/primary/services/utils/arrow_helpers.py @@ -4,6 +4,35 @@ import pyarrow.compute as pc import numpy as np +from primary.services.service_exceptions import InvalidDataError, Service + + +def validate_summary_vector_table_pa( + vector_table: pa.Table, vector_name: str, service: Service = Service.GENERAL +) -> None: + """ + Check if the pyarrow vector table is valid. + + Expect the pyarrow single vector table to only contain the following columns: DATE, REAL, vector_name. + + Raises InvalidDataError if the table does not contain the expected columns. + """ + expected_columns = {"DATE", "REAL", vector_name} + actual_columns = set(vector_table.column_names) + if expected_columns != actual_columns: + unexpected_columns = actual_columns - expected_columns + raise InvalidDataError(f"Unexpected columns in table {unexpected_columns}", service) + + # Validate table column types + if vector_table.field("DATE").type != pa.timestamp("ms"): + raise InvalidDataError( + f'DATE column must be of type timestamp(ms), but got {vector_table.field("DATE").type}', service + ) + if vector_table.field("REAL").type != pa.int16(): + raise InvalidDataError("REAL column must be of type int16", service) + if vector_table.field(vector_name).type != pa.float32(): + raise InvalidDataError(f"{vector_name} column must be of type float32", service) + def sort_table_on_real_then_date(table: pa.Table) -> pa.Table: return table.sort_by([("REAL", "ascending"), ("DATE", "ascending")]) diff --git a/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py b/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py index a206aec68..11907ad06 100644 --- a/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py +++ b/backend_py/primary/tests/unit/services/test_summary_delta_vectors.py @@ -1,12 +1,10 @@ -import pytest import pyarrow as pa -from primary.services.service_exceptions import InvalidDataError, Service + from primary.services.summary_delta_vectors import ( create_delta_vector_table, create_realization_delta_vector_list, RealizationDeltaVector, - _validate_summary_vector_table_pa, ) @@ -122,71 +120,3 @@ def test_create_realization_delta_vector_list_with_empty_table() -> None: # Validate the result assert result == expected_result - - -def test_validate_summary_vector_table_pa_valid() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0]} - schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.float32())]) - table = pa.Table.from_pydict(data, schema=schema) - try: - _validate_summary_vector_table_pa(table, vector_name) - except InvalidDataError: - pytest.fail("validate_summary_vector_table_pa raised InvalidDataError unexpectedly!") - - -def test_validate_summary_vector_table_pa_missing_column() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6]} - schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16())]) - table = pa.Table.from_pydict(data, schema=schema) - with pytest.raises(InvalidDataError): - _validate_summary_vector_table_pa(table, vector_name) - - -def test_validate_summary_vector_table_pa_unexpected_column() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0], "EXTRA": [10.0, 11.0, 12.0]} - schema = pa.schema( - [("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.float32()), ("EXTRA", pa.float32())] - ) - table = pa.Table.from_pydict(data, schema=schema) - with pytest.raises(InvalidDataError): - _validate_summary_vector_table_pa(table, vector_name) - - -def test_validate_summary_vector_table_pa_invalid_date_type() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0]} - schema = pa.schema([("DATE", pa.int32()), ("REAL", pa.int16()), (vector_name, pa.float32())]) - table = pa.Table.from_pydict(data, schema=schema) - with pytest.raises(InvalidDataError): - _validate_summary_vector_table_pa(table, vector_name) - - -def test_validate_summary_vector_table_pa_invalid_real_type() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4.0, 5.0, 6.0], vector_name: [7.0, 8.0, 9.0]} - schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.float32()), (vector_name, pa.float32())]) - table = pa.Table.from_pydict(data, schema=schema) - with pytest.raises(InvalidDataError): - _validate_summary_vector_table_pa(table, vector_name) - - -def test_validate_summary_vector_table_pa_invalid_vector_type() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7, 8, 9]} - schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.int32())]) - table = pa.Table.from_pydict(data, schema=schema) - with pytest.raises(InvalidDataError): - _validate_summary_vector_table_pa(table, vector_name) - - -def test_validate_summary_vector_table_pa_sumo_service() -> None: - vector_name = "VECTOR" - data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6]} - schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16())]) - table = pa.Table.from_pydict(data, schema=schema) - with pytest.raises(InvalidDataError) as excinfo: - _validate_summary_vector_table_pa(table, vector_name, Service.SUMO) - assert excinfo.value.service == Service.SUMO diff --git a/backend_py/primary/tests/unit/services/test_summary_derived_vectors.py b/backend_py/primary/tests/unit/services/test_summary_derived_vectors.py new file mode 100644 index 000000000..18c382e2c --- /dev/null +++ b/backend_py/primary/tests/unit/services/test_summary_derived_vectors.py @@ -0,0 +1,476 @@ +import datetime +import re +import pytest + +import pyarrow as pa + +from primary.services.service_exceptions import InvalidDataError +from primary.services.summary_derived_vectors import ( + DerivedRealizationVector, + DerivedVectorType, + create_derived_realization_vector_list, + create_derived_vector_unit, + create_per_day_vector_table_pa, + create_per_interval_vector_table_pa, + create_per_day_vector_name, + create_per_interval_vector_name, + find_derived_vector_type, + get_derived_vector_type, + get_total_vector_name, + is_total_vector, + is_derived_vector, + is_per_interval_vector, + is_per_day_vector, +) + +WEEKLY_TOTAL_VECTOR_TABLE = pa.table( + { + "DATE": pa.array( + [datetime.datetime(2021, 1, 1), datetime.datetime(2021, 1, 8), datetime.datetime(2021, 1, 15)] * 3, + type=pa.timestamp("ms"), + ), + "REAL": pa.array([1, 1, 1, 2, 2, 2, 4, 4, 4], type=pa.int16()), + "TOTAL_VECTOR": pa.array([50.0, 100.0, 150.0, 300.0, 400.0, 500.0, 1000.0, 1200.0, 1400.0], type=pa.float32()), + } +) + +MONTHLY_TOTAL_VECTOR_TABLE = pa.table( + { + "DATE": pa.array( + [datetime.datetime(2021, 1, 1), datetime.datetime(2021, 2, 1), datetime.datetime(2021, 3, 1)] * 3, + type=pa.timestamp("ms"), + ), + "REAL": pa.array([1, 1, 1, 2, 2, 2, 4, 4, 4], type=pa.int16()), + "TOTAL_VECTOR": pa.array([250.0, 500.0, 750.0, 300.0, 600.0, 900.0, 400.0, 750.0, 1100.0], type=pa.float32()), + } +) + +YEARLY_TOTAL_VECTOR_TABLE = pa.table( + { + "DATE": pa.array( + [datetime.datetime(2021, 1, 1), datetime.datetime(2022, 1, 1), datetime.datetime(2023, 1, 1)] * 3, + type=pa.timestamp("ms"), + ), + "REAL": pa.array([1, 1, 1, 2, 2, 2, 4, 4, 4], type=pa.int16()), + "TOTAL_VECTOR": pa.array( + [500.0, 1000.0, 1500.0, 1000.0, 2000.0, 3000.0, 1500.0, 3000.0, 4500.0], type=pa.float32() + ), + } +) + + +def test_create_per_interval_vector_table_pa_weekly_input(): + # Expected output table + expected_table = pa.table( + { + "DATE": WEEKLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": WEEKLY_TOTAL_VECTOR_TABLE.column("REAL"), + "PER_INTVL_TOTAL_VECTOR": pa.array( + [50.0, 50.0, 0.0, 100.0, 100.0, 0.0, 200.0, 200.0, 0.0], type=pa.float32() + ), + } + ) + + # Call the function + result_table = create_per_interval_vector_table_pa(WEEKLY_TOTAL_VECTOR_TABLE).select( + ["DATE", "REAL", "PER_INTVL_TOTAL_VECTOR"] + ) + + # Assert the result + assert result_table.equals(expected_table) + + +def test_create_per_interval_vector_table_pa_monthly_input(): + # Expected output table + expected_table = pa.table( + { + "DATE": MONTHLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": MONTHLY_TOTAL_VECTOR_TABLE.column("REAL"), + "PER_INTVL_TOTAL_VECTOR": pa.array( + [250.0, 250.0, 0.0, 300.0, 300.0, 0.0, 350.0, 350.0, 0.0], type=pa.float32() + ), + } + ) + + # Call the function + result_table = create_per_interval_vector_table_pa(MONTHLY_TOTAL_VECTOR_TABLE).select( + ["DATE", "REAL", "PER_INTVL_TOTAL_VECTOR"] + ) + + # Assert the result + assert result_table.equals(expected_table) + + +def test_create_per_interval_vector_table_pa_yearly_input(): + # Expected output table + expected_table = pa.table( + { + "DATE": YEARLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": YEARLY_TOTAL_VECTOR_TABLE.column("REAL"), + "PER_INTVL_TOTAL_VECTOR": pa.array( + [500.0, 500.0, 0.0, 1000.0, 1000.0, 0.0, 1500.0, 1500.0, 0.0], type=pa.float32() + ), + } + ) + + # Call the function + result_table = create_per_interval_vector_table_pa(YEARLY_TOTAL_VECTOR_TABLE).select( + ["DATE", "REAL", "PER_INTVL_TOTAL_VECTOR"] + ) + + # Assert the result + assert result_table.equals(expected_table) + + +def test_create_per_interval_vector_table_pa_missing_columns(): + # Create a sample input table with missing columnss + input_table = pa.table({"DATE": pa.array([1, 2, 3, 4]), "REAL": pa.array([1, 1, 1, 1])}) + + # Call the function and expect an InvalidDataError + with pytest.raises(InvalidDataError, match="Table must contain at least 3 columns"): + create_per_interval_vector_table_pa(input_table) + + +def test_create_per_interval_vector_table_pa_invalid_column_name(): + # Create a sample input table with invalid columns + input_table = pa.table( + { + "INVALID_DATE_NAME": WEEKLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": WEEKLY_TOTAL_VECTOR_TABLE.column("REAL"), + "TOTAL_VECTOR": WEEKLY_TOTAL_VECTOR_TABLE.column("TOTAL_VECTOR"), + } + ) + + # Call the function and expect an InvalidDataError + with pytest.raises(InvalidDataError, match="Table must contain columns 'DATE' and 'REAL'"): + create_per_interval_vector_table_pa(input_table) + + +def test_create_per_interval_vector_table_pa_invalid_column_type(): + # Create a sample input table with invalid column types + data = { + "DATE": pa.array([1, 2, 3, 4], type=pa.int32()), + "REAL": pa.array([1, 1, 1, 1], type=pa.int16()), + "TOTAL_VECTOR": pa.array([50.0, 100.0, 150.0, 200.0], type=pa.float32()), + } + input_table = pa.table(data) + + # Call the function and expect an InvalidDataError + with pytest.raises( + InvalidDataError, match=r"DATE column must be of type timestamp\(ms\), but got int32 \[service=general\]" + ): + create_per_interval_vector_table_pa(input_table) + + +def test_create_per_day_vector_table_pa_weekly_input(): + # Expected output table + expected_table = pa.table( + { + "DATE": WEEKLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": WEEKLY_TOTAL_VECTOR_TABLE.column("REAL"), + "PER_DAY_TOTAL_VECTOR": pa.array( + [50.0 / 7.0, 50.0 / 7.0, 0.0, 100.0 / 7.0, 100.0 / 7.0, 0.0, 200.0 / 7.0, 200.0 / 7.0, 0.0], + type=pa.float32(), + ), + } + ) + + # Call the function + result_table = create_per_day_vector_table_pa(WEEKLY_TOTAL_VECTOR_TABLE).select( + ["DATE", "REAL", "PER_DAY_TOTAL_VECTOR"] + ) + + # Assert the result + assert result_table.equals(expected_table) + + +def test_create_per_day_vector_table_pa_monthly_input(): + # Expected output table + expected_table = pa.table( + { + "DATE": MONTHLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": MONTHLY_TOTAL_VECTOR_TABLE.column("REAL"), + "PER_DAY_TOTAL_VECTOR": pa.array( + [250.0 / 31.0, 250.0 / 28.0, 0.0, 300.0 / 31.0, 300.0 / 28.0, 0.0, 350.0 / 31.0, 350.0 / 28.0, 0.0], + type=pa.float32(), + ), + } + ) + + # Call the function + result_table = create_per_day_vector_table_pa(MONTHLY_TOTAL_VECTOR_TABLE).select( + ["DATE", "REAL", "PER_DAY_TOTAL_VECTOR"] + ) + + # Assert the result + assert result_table.equals(expected_table) + + +def test_create_per_day_vector_table_pa_yearly_input(): + # Expected output table + expected_table = pa.table( + { + "DATE": YEARLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": YEARLY_TOTAL_VECTOR_TABLE.column("REAL"), + "PER_DAY_TOTAL_VECTOR": pa.array( + [ + 500.0 / 365.0, + 500.0 / 365.0, + 0.0, + 1000.0 / 365.0, + 1000.0 / 365.0, + 0.0, + 1500.0 / 365.0, + 1500.0 / 365.0, + 0.0, + ], + type=pa.float32(), + ), + } + ) + + # Call the function + result_table = create_per_day_vector_table_pa(YEARLY_TOTAL_VECTOR_TABLE).select( + ["DATE", "REAL", "PER_DAY_TOTAL_VECTOR"] + ) + + # Assert the result + assert result_table.equals(expected_table) + + +def test_create_per_day_vector_table_pa_missing_columns(): + # Create a sample input table with missing columns + input_table = pa.table({"DATE": pa.array([1, 2, 3, 4]), "REAL": pa.array([1, 1, 1, 1])}) + + # Call the function and expect an InvalidDataError + with pytest.raises(InvalidDataError, match="Table must contain at least 3 columns"): + create_per_day_vector_table_pa(input_table) + + +def test_create_per_day_vector_table_pa_invalid_column_name(): + # Create a sample input table with invalid columns + input_table = pa.table( + { + "INVALID_DATE_NAME": WEEKLY_TOTAL_VECTOR_TABLE.column("DATE"), + "REAL": WEEKLY_TOTAL_VECTOR_TABLE.column("REAL"), + "TOTAL_VECTOR": WEEKLY_TOTAL_VECTOR_TABLE.column("TOTAL_VECTOR"), + } + ) + + # Call the function and expect an InvalidDataError + with pytest.raises(InvalidDataError, match="Table must contain columns 'DATE' and 'REAL'"): + create_per_day_vector_table_pa(input_table) + + +def test_create_per_day_vector_table_pa_invalid_column_type(): + # Create a sample input table with invalid column types + data = { + "DATE": pa.array([1, 2, 3, 4], type=pa.int32()), + "REAL": pa.array([1, 1, 1, 1], type=pa.int16()), + "TOTAL_VECTOR": pa.array([50.0, 100.0, 150.0, 200.0], type=pa.float32()), + } + input_table = pa.table(data) + + # Call the function and expect an InvalidDataError + with pytest.raises( + InvalidDataError, match=r"DATE column must be of type timestamp\(ms\), but got int32 \[service=general\]" + ): + create_per_day_vector_table_pa(input_table) + + +def test_create_derived_realization_vector_list(): + # Create a sample derived vector table + derived_vector_table = pa.table( + { + "DATE": pa.array( + [datetime.datetime(2021, 1, 1), datetime.datetime(2021, 1, 8), datetime.datetime(2021, 1, 15)] * 3, + type=pa.timestamp("ms"), + ), + "REAL": pa.array([1, 1, 1, 2, 2, 2, 4, 4, 4], type=pa.int16()), + "DERIVED_VECTOR": pa.array([10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0], type=pa.float32()), + } + ) + + # Expected output list of DerivedRealizationVector + expected_list = [ + DerivedRealizationVector( + realization=1, + timestamps_utc_ms=[1609459200000, 1610064000000, 1610668800000], + values=[10.0, 20.0, 30.0], + is_rate=False, + unit="unit", + ), + DerivedRealizationVector( + realization=2, + timestamps_utc_ms=[1609459200000, 1610064000000, 1610668800000], + values=[40.0, 50.0, 60.0], + is_rate=False, + unit="unit", + ), + DerivedRealizationVector( + realization=4, + timestamps_utc_ms=[1609459200000, 1610064000000, 1610668800000], + values=[70.0, 80.0, 90.0], + is_rate=False, + unit="unit", + ), + ] + + is_rate = False + + # Call the function + result_list = create_derived_realization_vector_list(derived_vector_table, "DERIVED_VECTOR", is_rate, "unit") + + # Assert the result + assert result_list == expected_list + + +def test_create_derived_realization_vector_list_invalid_column_name(): + # Create a sample derived vector table with invalid column name + derived_vector_table = pa.table( + { + "INVALID_DATE_NAME": pa.array( + [datetime.datetime(2021, 1, 1), datetime.datetime(2021, 1, 8), datetime.datetime(2021, 1, 15)] * 3, + type=pa.timestamp("ms"), + ), + "REAL": pa.array([1, 1, 1, 2, 2, 2, 4, 4, 4], type=pa.int16()), + "DERIVED_VECTOR": pa.array([10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0], type=pa.float32()), + } + ) + + is_rate = False + + # Call the function and expect an InvalidDataError + with pytest.raises(InvalidDataError): + create_derived_realization_vector_list(derived_vector_table, "DERIVED_VECTOR", is_rate, "unit") + + +def test_create_derived_realization_vector_list_invalid_column_type(): + # Create a sample derived vector table with invalid column type + derived_vector_table = pa.table( + { + "DATE": pa.array([1, 2, 3, 4], type=pa.int32()), + "REAL": pa.array([1, 1, 1, 1], type=pa.int16()), + "DERIVED_VECTOR": pa.array([10.0, 20.0, 30.0, 40.0], type=pa.float32()), + } + ) + + is_rate = False + + # Call the function and expect an InvalidDataError + with pytest.raises(InvalidDataError): + create_derived_realization_vector_list(derived_vector_table, "DERIVED_VECTOR", is_rate, "unit") + + +def test_get_total_vector_name_per_day(): + vector_name = "PER_DAY_TOTAL_VECTOR" + expected_name = "TOTAL_VECTOR" + assert get_total_vector_name(vector_name) == expected_name + + +def test_get_total_vector_name_per_interval(): + vector_name = "PER_INTVL_TOTAL_VECTOR" + expected_name = "TOTAL_VECTOR" + assert get_total_vector_name(vector_name) == expected_name + + +def test_get_total_vector_name_invalid(): + vector_name = "INVALID_TOTAL_VECTOR" + with pytest.raises( + InvalidDataError, match="Expected INVALID_TOTAL_VECTOR to be a derived PER_DAY or PER_INTVL vector!" + ): + get_total_vector_name(vector_name) + + +def test_is_total_vector_true(): + assert is_total_vector("WOPT") is True + assert is_total_vector("WOPT:well") is True + assert is_total_vector("GOPT:group") is True + assert is_total_vector("FOPT:region") is True + assert is_total_vector("WOPTH:well") is True # Historical vector + assert is_total_vector("GOPTH:group") is True # Historical vector + + +def test_is_total_vector_false(): + assert is_total_vector("WOPR") is False + assert is_total_vector("WOPR:well") is False + assert is_total_vector("GOPR:group") is False + assert is_total_vector("FOPR:region") is False + assert is_total_vector("WOP:well") is False # Less than 3 characters after removing leading character + assert is_total_vector("GOP:group") is False # Less than 3 characters after removing leading character + + +def test_is_total_vector_invalid_format(): + assert is_total_vector("OPT") is False # Missing first character + assert is_total_vector(":well") is False # Empty vector base name + assert is_total_vector("W:well") is False # Less than 3 characters after removing leading character + assert is_total_vector("") is False # Empty string + assert is_total_vector(":") is False # Only delimiter + + +def test_create_derived_vector_unit_per_day(): + assert create_derived_vector_unit("m3", DerivedVectorType.PER_DAY) == "m3/DAY" + + +def test_create_derived_vector_unit_other(): + assert create_derived_vector_unit("m3", DerivedVectorType.PER_INTERVAL) == "m3" + + +def test_find_derived_vector_category_per_day(): + assert find_derived_vector_type("PER_DAY_VECTOR") == DerivedVectorType.PER_DAY + + +def test_find_derived_vector_category_per_interval(): + assert find_derived_vector_type("PER_INTVL_VECTOR") == DerivedVectorType.PER_INTERVAL + + +def test_find_derived_vector_category_none(): + assert find_derived_vector_type("OTHER_VECTOR") is None + + +def test_get_derived_vector_category_per_day(): + assert get_derived_vector_type("PER_DAY_VECTOR") == DerivedVectorType.PER_DAY + + +def test_get_derived_vector_category_per_interval(): + assert get_derived_vector_type("PER_INTVL_VECTOR") == DerivedVectorType.PER_INTERVAL + + +def test_get_derived_vector_category_none(): + with pytest.raises( + InvalidDataError, match=re.escape("Expected OTHER_VECTOR to be a derived vector. [service=general]") + ): + get_derived_vector_type("OTHER_VECTOR") + + +def test_is_derived_vector_true(): + assert is_derived_vector("PER_DAY_VECTOR") is True + assert is_derived_vector("PER_INTVL_VECTOR") is True + + +def test_is_derived_vector_false(): + assert is_derived_vector("OTHER_VECTOR") is False + + +def test_create_per_day_vector_name(): + assert create_per_day_vector_name("VECTOR") == "PER_DAY_VECTOR" + + +def test_create_per_interval_vector_name(): + assert create_per_interval_vector_name("VECTOR") == "PER_INTVL_VECTOR" + + +def test_is_per_interval_vector_true(): + assert is_per_interval_vector("PER_INTVL_VECTOR") is True + + +def test_is_per_interval_vector_false(): + assert is_per_interval_vector("PER_DAY_VECTOR") is False + + +def test_is_per_day_vector_true(): + assert is_per_day_vector("PER_DAY_VECTOR") is True + + +def test_is_per_day_vector_false(): + assert is_per_day_vector("PER_INTVL_VECTOR") is False diff --git a/backend_py/primary/tests/unit/services/utils/test_arrow_helpers.py b/backend_py/primary/tests/unit/services/utils/test_arrow_helpers.py index a7feb2acd..6568b6b05 100644 --- a/backend_py/primary/tests/unit/services/utils/test_arrow_helpers.py +++ b/backend_py/primary/tests/unit/services/utils/test_arrow_helpers.py @@ -1,9 +1,13 @@ +import pytest +import pyarrow as pa +import numpy as np + +from primary.services.service_exceptions import InvalidDataError, Service + from primary.services.utils.arrow_helpers import is_date_column_monotonically_increasing from primary.services.utils.arrow_helpers import find_first_non_increasing_date_pair from primary.services.utils.arrow_helpers import detect_missing_realizations - -import pyarrow as pa -import numpy as np +from primary.services.utils.arrow_helpers import validate_summary_vector_table_pa def test_monotonically_increasing_date_util_functions() -> None: @@ -54,3 +58,71 @@ def test_detect_missing_realizations() -> None: assert len(missing_reals_list) == 2 assert 0 in missing_reals_list assert 5 in missing_reals_list + + +def test_validate_summary_vector_table_pa_valid() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.float32())]) + table = pa.Table.from_pydict(data, schema=schema) + try: + validate_summary_vector_table_pa(table, vector_name) + except InvalidDataError: + pytest.fail("validate_summary_vector_table_pa raised InvalidDataError unexpectedly!") + + +def test_validate_summary_vector_table_pa_missing_column() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_unexpected_column() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0], "EXTRA": [10.0, 11.0, 12.0]} + schema = pa.schema( + [("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.float32()), ("EXTRA", pa.float32())] + ) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_invalid_date_type() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7.0, 8.0, 9.0]} + schema = pa.schema([("DATE", pa.int32()), ("REAL", pa.int16()), (vector_name, pa.float32())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_invalid_real_type() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4.0, 5.0, 6.0], vector_name: [7.0, 8.0, 9.0]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.float32()), (vector_name, pa.float32())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_invalid_vector_type() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6], vector_name: [7, 8, 9]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16()), (vector_name, pa.int32())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError): + validate_summary_vector_table_pa(table, vector_name) + + +def test_validate_summary_vector_table_pa_sumo_service() -> None: + vector_name = "VECTOR" + data = {"DATE": [1, 2, 3], "REAL": [4, 5, 6]} + schema = pa.schema([("DATE", pa.timestamp("ms")), ("REAL", pa.int16())]) + table = pa.Table.from_pydict(data, schema=schema) + with pytest.raises(InvalidDataError) as excinfo: + validate_summary_vector_table_pa(table, vector_name, Service.SUMO) + assert excinfo.value.service == Service.SUMO diff --git a/frontend/src/api/autogen/sdk.gen.ts b/frontend/src/api/autogen/sdk.gen.ts index 7caa1b280..ee66a4acd 100644 --- a/frontend/src/api/autogen/sdk.gen.ts +++ b/frontend/src/api/autogen/sdk.gen.ts @@ -245,6 +245,8 @@ export const getEnsembleDetails = ( /** * Get Vector List * Get list of all vectors in a given Sumo ensemble, excluding any historical vectors + * + * Optionally include derived vectors. */ export const getVectorList = ( options: Options diff --git a/frontend/src/api/autogen/types.gen.ts b/frontend/src/api/autogen/types.gen.ts index 7aee4b5ce..770bd8fe7 100644 --- a/frontend/src/api/autogen/types.gen.ts +++ b/frontend/src/api/autogen/types.gen.ts @@ -92,6 +92,16 @@ export type DatedFlowNetwork_api = { network: NetworkNode_api; }; +export type DerivedVectorInfo_api = { + type: DerivedVectorType_api; + sourceVector: string; +}; + +export enum DerivedVectorType_api { + PER_DAY = "PER_DAY", + PER_INTVL = "PER_INTVL", +} + export type EnsembleDetails_api = { name: string; field_identifier: string; @@ -615,7 +625,7 @@ export enum StatisticFunction_api { } export type StatisticValueObject_api = { - statistic_function: StatisticFunction_api; + statisticFunction: StatisticFunction_api; values: Array; }; @@ -839,41 +849,44 @@ export type ValidationError_api = { export type VectorDescription_api = { name: string; - descriptive_name: string; - has_historical: boolean; + descriptiveName: string; + hasHistorical: boolean; + derivedVectorInfo: DerivedVectorInfo_api | null; }; export type VectorHistoricalData_api = { - timestamps_utc_ms: Array; + timestampsUtcMs: Array; values: Array; unit: string; - is_rate: boolean; + isRate: boolean; }; export type VectorRealizationData_api = { realization: number; - timestamps_utc_ms: Array; + timestampsUtcMs: Array; values: Array; unit: string; - is_rate: boolean; + isRate: boolean; + derivedVectorInfo: DerivedVectorInfo_api | null; }; export type VectorStatisticData_api = { realizations: Array; - timestamps_utc_ms: Array; - value_objects: Array; + timestampsUtcMs: Array; + valueObjects: Array; unit: string; - is_rate: boolean; + isRate: boolean; + derivedVectorInfo: DerivedVectorInfo_api | null; }; export type VectorStatisticSensitivityData_api = { realizations: Array; - timestamps_utc_ms: Array; - value_objects: Array; + timestampsUtcMs: Array; + valueObjects: Array; unit: string; - is_rate: boolean; - sensitivity_name: string; - sensitivity_case: string; + isRate: boolean; + sensitivityName: string; + sensitivityCase: string; }; export type VfpInjTable_api = { @@ -1176,6 +1189,10 @@ export type GetVectorListData_api = { * Ensemble name */ ensemble_name: string; + /** + * Include derived vectors + */ + include_derived_vectors?: boolean | null; }; url: "/timeseries/vector_list/"; }; @@ -1218,6 +1235,10 @@ export type GetDeltaEnsembleVectorListData_api = { * Reference ensemble name */ reference_ensemble_name: string; + /** + * Include derived vectors + */ + include_derived_vectors?: boolean | null; }; url: "/timeseries/delta_ensemble_vector_list/"; }; diff --git a/frontend/src/modules/SimulationTimeSeries/dataGenerators.ts b/frontend/src/modules/SimulationTimeSeries/dataGenerators.ts index b8d2775b2..e9b3b767c 100644 --- a/frontend/src/modules/SimulationTimeSeries/dataGenerators.ts +++ b/frontend/src/modules/SimulationTimeSeries/dataGenerators.ts @@ -37,7 +37,7 @@ export function makeVectorGroupDataGenerator( let unit = ""; vector.data.forEach((el) => { unit = simulationUnitReformat(el.unit); - const indexOfTimestamp = el.timestamps_utc_ms.indexOf(activeTimestampUtcMs); + const indexOfTimestamp = el.timestampsUtcMs.indexOf(activeTimestampUtcMs); data.push({ key: el.realization, value: indexOfTimestamp === -1 ? el.values[0] : el.values[indexOfTimestamp], diff --git a/frontend/src/modules/SimulationTimeSeries/settings/atoms/derivedAtoms.ts b/frontend/src/modules/SimulationTimeSeries/settings/atoms/derivedAtoms.ts index a3c87390b..7a9554f1c 100644 --- a/frontend/src/modules/SimulationTimeSeries/settings/atoms/derivedAtoms.ts +++ b/frontend/src/modules/SimulationTimeSeries/settings/atoms/derivedAtoms.ts @@ -1,3 +1,4 @@ +import { VectorDefinitionsType } from "@assets/vectorDefinitions"; import { DeltaEnsemble } from "@framework/DeltaEnsemble"; import { DeltaEnsembleIdent } from "@framework/DeltaEnsembleIdent"; import { Parameter, ParameterIdent, ParameterType } from "@framework/EnsembleParameters"; @@ -6,7 +7,9 @@ import { RegularEnsemble } from "@framework/RegularEnsemble"; import { RegularEnsembleIdent } from "@framework/RegularEnsembleIdent"; import { filterEnsembleIdentsByType } from "@framework/utils/ensembleIdentUtils"; import { fixupEnsembleIdents } from "@framework/utils/ensembleUiHelpers"; +import { createDerivedVectorDescription } from "@modules/SimulationTimeSeries/utils/vectorDescriptionUtils"; import { createVectorSelectorDataFromVectors } from "@modules/_shared/components/VectorSelector"; +import { simulationVectorDefinition } from "@modules/_shared/reservoirSimulationStringUtils"; import { atom } from "jotai"; @@ -122,30 +125,60 @@ export const isVectorListQueriesFetchingAtom = atom((get) => { return vectorListQueries.some((query) => query.isFetching); }); -export const availableVectorNamesAtom = atom((get) => { - const ensembleVectorListsHelper = get(ensembleVectorListsHelperAtom); - - const vectorNamesUnion = ensembleVectorListsHelper.vectorsUnion(); +export const ensembleVectorListsHelperAtom = atom((get) => { + const vectorListQueries = get(vectorListQueriesAtom); + const selectedEnsembleIdents = get(selectedEnsembleIdentsAtom); - return vectorNamesUnion; + return new EnsembleVectorListsHelper(selectedEnsembleIdents, vectorListQueries); }); export const vectorSelectorDataAtom = atom((get) => { const isFetching = get(isVectorListQueriesFetchingAtom); - const availableVectorNames = get(availableVectorNamesAtom); + const ensembleVectorListsHelper = get(ensembleVectorListsHelperAtom); if (isFetching) { return []; } - return createVectorSelectorDataFromVectors(availableVectorNames); + const vectorNames = ensembleVectorListsHelper.vectorNamesUnion(); + + return createVectorSelectorDataFromVectors(vectorNames); }); -export const ensembleVectorListsHelperAtom = atom((get) => { - const vectorListQueries = get(vectorListQueriesAtom); - const selectedEnsembleIdents = get(selectedEnsembleIdentsAtom); +export const customVectorDefinitionsAtom = atom((get) => { + const isFetching = get(isVectorListQueriesFetchingAtom); + const ensembleVectorListsHelper = get(ensembleVectorListsHelperAtom); - return new EnsembleVectorListsHelper(selectedEnsembleIdents, vectorListQueries); + if (isFetching) { + return null; + } + + const vectors = ensembleVectorListsHelper.vectorsUnion(); + + // Create custom vector definitions for parent nodes of derived vectors + const customVectorParentNodeDefinitions: VectorDefinitionsType = {}; + for (const vector of vectors) { + if (!vector.derivedVectorInfo) { + continue; + } + + // Create description only for base name of source vector (i.e. parent node) + const sourceVectorBaseName = vector.derivedVectorInfo.sourceVector.split(":", 2)[0]; + const derivedVectorDescription = createDerivedVectorDescription( + sourceVectorBaseName, + vector.derivedVectorInfo.type + ); + const sourceBaseVectorType = simulationVectorDefinition(sourceVectorBaseName)?.type ?? ""; + + // Only add custom definitions for parent nodes + const parentNodeName = vector.name.split(":", 2)[0]; + customVectorParentNodeDefinitions[parentNodeName] = { + type: sourceBaseVectorType, + description: derivedVectorDescription, + }; + } + + return customVectorParentNodeDefinitions; }); export const vectorSpecificationsAtom = atom((get) => { diff --git a/frontend/src/modules/SimulationTimeSeries/settings/atoms/queryAtoms.ts b/frontend/src/modules/SimulationTimeSeries/settings/atoms/queryAtoms.ts index f95300a56..0b22cf669 100644 --- a/frontend/src/modules/SimulationTimeSeries/settings/atoms/queryAtoms.ts +++ b/frontend/src/modules/SimulationTimeSeries/settings/atoms/queryAtoms.ts @@ -21,6 +21,7 @@ export const vectorListQueriesAtom = atomWithQueries((get) => { query: { case_uuid: ensembleIdent.getCaseUuid(), ensemble_name: ensembleIdent.getEnsembleName(), + include_derived_vectors: true, }, throwOnError: true, }); @@ -51,6 +52,7 @@ export const vectorListQueriesAtom = atomWithQueries((get) => { comparison_ensemble_name: comparisonEnsembleIdent.getEnsembleName(), reference_case_uuid: referenceEnsembleIdent.getCaseUuid(), reference_ensemble_name: referenceEnsembleIdent.getEnsembleName(), + include_derived_vectors: true, }, throwOnError: true, }); diff --git a/frontend/src/modules/SimulationTimeSeries/settings/settings.tsx b/frontend/src/modules/SimulationTimeSeries/settings/settings.tsx index 65b2d5b9d..9cea20cf4 100644 --- a/frontend/src/modules/SimulationTimeSeries/settings/settings.tsx +++ b/frontend/src/modules/SimulationTimeSeries/settings/settings.tsx @@ -44,6 +44,7 @@ import { } from "./atoms/baseAtoms"; import { continuousAndNonConstantParametersUnionAtom, + customVectorDefinitionsAtom, ensembleVectorListsHelperAtom, isVectorListQueriesFetchingAtom, selectedEnsembleIdentsAtom, @@ -87,6 +88,7 @@ export function Settings({ settingsContext, workbenchSession }: ModuleSettingsPr const [statisticsSelection, setStatisticsSelection] = useAtom(statisticsSelectionAtom); const [selectedVectorNames, setSelectedVectorNames] = useAtom(selectedVectorNamesAtom); const vectorSelectorData = useAtomValue(vectorSelectorDataAtom); + const customVectorDefinitions = useAtomValue(customVectorDefinitionsAtom); const statisticsType = useAtomValue(statisticsTypeAtom); const [filteredParameterIdentList, setFilteredParameterIdentList] = useAtom(filteredParameterIdentListAtom); const setUserSelectedEnsembleIdents = useSetAtom(userSelectedEnsembleIdentsAtom); @@ -320,6 +322,7 @@ export function Settings({ settingsContext, workbenchSession }: ModuleSettingsPr numSecondsUntilSuggestionsAreShown={0.5} lineBreakAfterTag={true} onChange={handleVectorSelectionChange} + customVectorDefinitions={customVectorDefinitions ?? undefined} /> diff --git a/frontend/src/modules/SimulationTimeSeries/utils/ensemblesVectorListHelper.ts b/frontend/src/modules/SimulationTimeSeries/utils/ensemblesVectorListHelper.ts index 350e8f406..32cb02b53 100644 --- a/frontend/src/modules/SimulationTimeSeries/utils/ensemblesVectorListHelper.ts +++ b/frontend/src/modules/SimulationTimeSeries/utils/ensemblesVectorListHelper.ts @@ -38,7 +38,7 @@ export class EnsembleVectorListsHelper { * * @returns Array of unique vector names, as union of all vectors in all queries */ - vectorsUnion(): string[] { + vectorNamesUnion(): string[] { const uniqueVectorNames = new Set(); for (const query of this._queries) { if (query.data) { @@ -51,6 +51,30 @@ export class EnsembleVectorListsHelper { return Array.from(uniqueVectorNames); } + /** + * Get union of vector descriptions from all queries + * + * If duplicate vector names exist, this will keep the first occurrence of the vector description + * + * @returns Array of unique vector descriptions, as union of all vectors in all queries + */ + vectorsUnion(): VectorDescription_api[] { + const vectorDescriptionMap = new Map(); + for (const query of this._queries) { + if (query.data) { + for (const vector of query.data) { + // Note: This will keep the first vector with the same name, + // i.e. if vectors are different in different ensembles, only the first one will be kept + if (!vectorDescriptionMap.has(vector.name)) { + vectorDescriptionMap.set(vector.name, vector); + } + } + } + } + + return Array.from(vectorDescriptionMap.values()); + } + /** * Check if vector is in the requested ensembles * @@ -79,7 +103,7 @@ export class EnsembleVectorListsHelper { const index = this.findIndexOfEnsembleIdent(ensembleIdent); if (index === -1 || !this._queries[index].data) return false; - return this._queries[index].data?.some((vec) => vec.name === vector && vec.has_historical) ?? false; + return this._queries[index].data?.some((vec) => vec.name === vector && vec.hasHistorical) ?? false; } /** diff --git a/frontend/src/modules/SimulationTimeSeries/utils/vectorDescriptionUtils.ts b/frontend/src/modules/SimulationTimeSeries/utils/vectorDescriptionUtils.ts new file mode 100644 index 000000000..bcd2947d4 --- /dev/null +++ b/frontend/src/modules/SimulationTimeSeries/utils/vectorDescriptionUtils.ts @@ -0,0 +1,21 @@ +import { DerivedVectorType_api } from "@api"; +import { simulationVectorDescription } from "@modules/_shared/reservoirSimulationStringUtils"; + +/** + * Create vector description for derived vector + * + * Use source vector name to retrieve the official vector description, and add prefix and suffix + */ +export function createDerivedVectorDescription(sourceVector: string, derivedVectorType: DerivedVectorType_api): string { + let prefix: string | undefined = undefined; + let suffix: string | undefined = undefined; + if (derivedVectorType === DerivedVectorType_api.PER_DAY) { + prefix = "Average "; + suffix = " Per day"; + } + if (derivedVectorType === DerivedVectorType_api.PER_INTVL) { + prefix = "Interval "; + } + + return simulationVectorDescription(sourceVector, prefix, suffix); +} diff --git a/frontend/src/modules/SimulationTimeSeries/view/atoms/baseAtoms.ts b/frontend/src/modules/SimulationTimeSeries/view/atoms/baseAtoms.ts index a7e8c2a78..9f4086ba4 100644 --- a/frontend/src/modules/SimulationTimeSeries/view/atoms/baseAtoms.ts +++ b/frontend/src/modules/SimulationTimeSeries/view/atoms/baseAtoms.ts @@ -5,6 +5,6 @@ import { atom } from "jotai"; export const userSelectedActiveTimestampUtcMsAtom = atom(null); export const vectorSpecificationsAtom = atom([]); -export const resampleFrequencyAtom = atom(null); +export const resampleFrequencyAtom = atom(Frequency_api.MONTHLY); export const visualizationModeAtom = atom(VisualizationMode.STATISTICAL_FANCHART); export const showObservationsAtom = atom(true); diff --git a/frontend/src/modules/SimulationTimeSeries/view/atoms/derivedAtoms.ts b/frontend/src/modules/SimulationTimeSeries/view/atoms/derivedAtoms.ts index f0449d017..3cfb76e81 100644 --- a/frontend/src/modules/SimulationTimeSeries/view/atoms/derivedAtoms.ts +++ b/frontend/src/modules/SimulationTimeSeries/view/atoms/derivedAtoms.ts @@ -92,7 +92,7 @@ export const activeTimestampUtcMsAtom = atom((get) => { loadedVectorSpecificationsAndRealizationData.length > 0 ) { const firstTimeStamp = - loadedVectorSpecificationsAndRealizationData.at(0)?.data.at(0)?.timestamps_utc_ms[0] ?? null; + loadedVectorSpecificationsAndRealizationData.at(0)?.data.at(0)?.timestampsUtcMs[0] ?? null; return firstTimeStamp; } diff --git a/frontend/src/modules/SimulationTimeSeries/view/hooks/usePlotBuilder.ts b/frontend/src/modules/SimulationTimeSeries/view/hooks/usePlotBuilder.ts index 155712d99..b6349db1d 100644 --- a/frontend/src/modules/SimulationTimeSeries/view/hooks/usePlotBuilder.ts +++ b/frontend/src/modules/SimulationTimeSeries/view/hooks/usePlotBuilder.ts @@ -11,6 +11,7 @@ import { useAtomValue } from "jotai"; import { useMakeEnsembleDisplayNameFunc } from "./useMakeEnsembleDisplayNameFunc"; import { GroupBy, VectorSpec, VisualizationMode } from "../../typesAndEnums"; +import { resampleFrequencyAtom } from "../atoms/baseAtoms"; import { activeTimestampUtcMsAtom, loadedRegularEnsembleVectorSpecificationsAndHistoricalDataAtom, @@ -40,6 +41,7 @@ export function usePlotBuilder( const statisticsSelection = viewContext.useSettingsToViewInterfaceValue("statisticsSelection"); const subplotLimitation = viewContext.useSettingsToViewInterfaceValue("subplotLimitation"); + const resampleFrequency = useAtomValue(resampleFrequencyAtom); const vectorObservationsQueries = useAtomValue(vectorObservationsQueriesAtom); const loadedVectorSpecificationsAndRealizationData = useAtomValue(loadedVectorSpecificationsAndRealizationDataAtom); const loadedVectorSpecificationsAndStatisticsData = useAtomValue(loadedVectorSpecificationsAndStatisticsDataAtom); @@ -74,6 +76,7 @@ export function usePlotBuilder( const plotBuilder = new PlotBuilder( subplotOwner, vectorSpecifications ?? [], + resampleFrequency, makeEnsembleDisplayName, colorSet, wrapperDivSize.width, diff --git a/frontend/src/modules/SimulationTimeSeries/view/utils/PlotBuilder.ts b/frontend/src/modules/SimulationTimeSeries/view/utils/PlotBuilder.ts index 5406be0fc..c9406b761 100644 --- a/frontend/src/modules/SimulationTimeSeries/view/utils/PlotBuilder.ts +++ b/frontend/src/modules/SimulationTimeSeries/view/utils/PlotBuilder.ts @@ -1,6 +1,8 @@ import React from "react"; import { + DerivedVectorInfo_api, + Frequency_api, SummaryVectorObservations_api, VectorHistoricalData_api, VectorRealizationData_api, @@ -11,7 +13,12 @@ import { RegularEnsembleIdent } from "@framework/RegularEnsembleIdent"; import { isEnsembleIdentOfType } from "@framework/utils/ensembleIdentUtils"; import { timestampUtcMsToCompactIsoString } from "@framework/utils/timestampUtils"; import { ColorSet } from "@lib/utils/ColorSet"; -import { SubplotLimitDirection, VectorSpec } from "@modules/SimulationTimeSeries/typesAndEnums"; +import { + FrequencyEnumToStringMapping, + SubplotLimitDirection, + VectorSpec, +} from "@modules/SimulationTimeSeries/typesAndEnums"; +import { createDerivedVectorDescription } from "@modules/SimulationTimeSeries/utils/vectorDescriptionUtils"; import { CoordinateDomain, Figure, makeSubplots } from "@modules/_shared/Figure"; import { simulationUnitReformat, simulationVectorDescription } from "@modules/_shared/reservoirSimulationStringUtils"; @@ -24,12 +31,13 @@ import { createVectorRealizationTrace, createVectorRealizationTraces, createVectorStatisticsTraces, + getTraceLineShape, } from "./PlotlyTraceUtils/createVectorTracesUtils"; import { scaleHexColorLightness } from "./colorUtils"; import { EnsemblesContinuousParameterColoring } from "./ensemblesContinuousParameterColoring"; import { TimeSeriesPlotData } from "./timeSeriesPlotData"; -type VectorNameUnitMap = { [vectorName: string]: string }; +type VectorNameSubplotTitleMap = { [vectorName: string]: string }; type HexColorMap = { [key: string]: string }; export enum SubplotOwner { VECTOR = "Vector", @@ -46,6 +54,8 @@ export class PlotBuilder { private _numberOfSubplots = 0; private _subplotOwner: SubplotOwner; + private _resampleFrequency: Frequency_api | null = null; + private _addedVectorsLegendTracker: string[] = []; private _addedEnsemblesLegendTracker: (RegularEnsembleIdent | DeltaEnsembleIdent)[] = []; @@ -74,7 +84,7 @@ export class PlotBuilder { private _traceFallbackColor = "#000000"; - private _vectorNameUnitMap: VectorNameUnitMap = {}; + private _vectorNameSubplotTitleMap: VectorNameSubplotTitleMap = {}; private _timeAnnotationTimestamps: number[] = []; @@ -88,6 +98,7 @@ export class PlotBuilder { constructor( subplotOwner: SubplotOwner, selectedVectorSpecifications: VectorSpec[], + resampleFrequency: Frequency_api | null, makeEnsembleDisplayName: (ensembleIdent: RegularEnsembleIdent | DeltaEnsembleIdent) => string, colorSet: ColorSet, width: number, @@ -101,6 +112,7 @@ export class PlotBuilder { this._width = width; this._height = height; this._makeEnsembleDisplayName = makeEnsembleDisplayName; + this._resampleFrequency = resampleFrequency; this._uniqueVectorNames = [...new Set(selectedVectorSpecifications.map((vec) => vec.vectorName))]; this._uniqueEnsembleIdents = []; @@ -225,15 +237,15 @@ export class PlotBuilder { * The subplot titles are updated based on the vector name and unit provided in the vectorNameUnitMap. * The unit is provided after traces are added, thus the subplot titles are updated after traces are added. */ - private createAndSetSubplotTitles(): void { + private updateSubplotTitles(): void { if (this._subplotOwner === SubplotOwner.VECTOR) { this._uniqueVectorNames.forEach((vectorName, subplotIndex) => { + const newSubplotTitle = this._vectorNameSubplotTitleMap[vectorName]; const { row, col } = this.getSubplotRowAndColFromIndex(subplotIndex); - if (!this._figure.hasSubplotTitle(row, col)) { + if (!newSubplotTitle || !this._figure.hasSubplotTitle(row, col)) { return; } - const newSubplotTitle = this.createVectorSubplotTitle(vectorName); this._figure.updateSubplotTitle(newSubplotTitle, row, col); }); } else { @@ -250,7 +262,7 @@ export class PlotBuilder { build(handleOnClick?: ((event: Readonly) => void) | undefined): React.ReactNode { this.createGraphLegends(); - this.createAndSetSubplotTitles(); + this.updateSubplotTitles(); // Add time annotations and shapes for (let index = 0; index < this._numberOfSubplots; index++) { @@ -313,14 +325,15 @@ export class PlotBuilder { } const name = this.makeTraceNameFromVectorSpecification(elm.vectorSpecification); + const lineShape = getTraceLineShape(realizationData); const vectorRealizationTrace = createVectorRealizationTrace({ vectorRealizationData: realizationData, name: name, color: parameterColor, legendGroup: this._makeEnsembleDisplayName(elm.vectorSpecification.ensembleIdent), + lineShape: lineShape, hoverTemplate: this._defaultHoverTemplate, showLegend: addLegendForTraces, - yaxis: `y${subplotIndex + 1}`, type: this._scatterType, }); @@ -328,7 +341,11 @@ export class PlotBuilder { this._figure.addTrace(vectorRealizationTrace, row, col); this._hasRealizationsTracesColoredByParameter = true; - this.insertVectorNameAndUnitIntoMap(elm.vectorSpecification.vectorName, realizationData.unit); + this.createVectorSubplotTitleAndInsertIntoMap( + elm.vectorSpecification.vectorName, + realizationData.unit, + realizationData.derivedVectorInfo + ); } } } @@ -350,6 +367,7 @@ export class PlotBuilder { for (const elm of selectedVectorsRealizationData) { const subplotIndex = this.getSubplotIndexFromVectorSpec(elm.vectorSpecification); if (subplotIndex === -1) continue; + if (elm.data.length === 0) continue; // Get legend group and color const legendGroup = this.getLegendGroupAndUpdateTracker(elm.vectorSpecification); @@ -359,11 +377,13 @@ export class PlotBuilder { } const name = this.makeTraceNameFromVectorSpecification(elm.vectorSpecification); + const lineShape = getTraceLineShape(elm.data[0]); const vectorRealizationTraces = createVectorRealizationTraces({ vectorRealizationsData: elm.data, name: name, color: color, legendGroup: legendGroup, + lineShape: lineShape, hoverTemplate: this._defaultHoverTemplate, showLegend: addLegendForTraces, type: this._scatterType, @@ -373,7 +393,11 @@ export class PlotBuilder { this._figure.addTraces(vectorRealizationTraces, row, col); if (elm.data.length !== 0) { - this.insertVectorNameAndUnitIntoMap(elm.vectorSpecification.vectorName, elm.data[0].unit); + this.createVectorSubplotTitleAndInsertIntoMap( + elm.vectorSpecification.vectorName, + elm.data[0].unit, + elm.data[0].derivedVectorInfo + ); } } } @@ -398,19 +422,24 @@ export class PlotBuilder { const color = this.getHexColor(elm.vectorSpecification); const name = this.makeTraceNameFromVectorSpecification(elm.vectorSpecification); + const lineShape = getTraceLineShape(elm.data); const vectorFanchartTraces = createVectorFanchartTraces({ vectorStatisticData: elm.data, hexColor: color, legendGroup: legendGroup, + lineShape: lineShape, name: name, - yaxis: `y${subplotIndex + 1}`, type: this._scatterType, }); const { row, col } = this.getSubplotRowAndColFromIndex(subplotIndex); this._figure.addTraces(vectorFanchartTraces, row, col); - this.insertVectorNameAndUnitIntoMap(elm.vectorSpecification.vectorName, elm.data.unit); + this.createVectorSubplotTitleAndInsertIntoMap( + elm.vectorSpecification.vectorName, + elm.data.unit, + elm.data.derivedVectorInfo + ); } } @@ -437,10 +466,12 @@ export class PlotBuilder { const color = this.getHexColor(elm.vectorSpecification); const name = this.makeTraceNameFromVectorSpecification(elm.vectorSpecification); + const lineShape = getTraceLineShape(elm.data); const vectorStatisticsTraces = createVectorStatisticsTraces({ vectorStatisticData: elm.data, hexColor: color, legendGroup: legendGroup, + lineShape: lineShape, name: name, lineWidth: lineWidth, type: this._scatterType, @@ -449,7 +480,11 @@ export class PlotBuilder { const { row, col } = this.getSubplotRowAndColFromIndex(subplotIndex); this._figure.addTraces(vectorStatisticsTraces, row, col); - this.insertVectorNameAndUnitIntoMap(elm.vectorSpecification.vectorName, elm.data.unit); + this.createVectorSubplotTitleAndInsertIntoMap( + elm.vectorSpecification.vectorName, + elm.data.unit, + elm.data.derivedVectorInfo + ); } } @@ -472,18 +507,20 @@ export class PlotBuilder { if (subplotIndex === -1) continue; const name = this.makeTraceNameFromVectorSpecification(elm.vectorSpecification); + const lineShape = getTraceLineShape(elm.data); const vectorHistoryTrace = createHistoricalVectorTrace({ vectorHistoricalData: elm.data, name: name, color: this._historyVectorColor, type: this._scatterType, + lineShape: lineShape, }); const { row, col } = this.getSubplotRowAndColFromIndex(subplotIndex); this._figure.addTrace(vectorHistoryTrace, row, col); this._hasHistoryTraces = true; - this.insertVectorNameAndUnitIntoMap(elm.vectorSpecification.vectorName, elm.data.unit); + this.createVectorSubplotTitleAndInsertIntoMap(elm.vectorSpecification.vectorName, elm.data.unit); } } @@ -731,18 +768,33 @@ export class PlotBuilder { return this._traceFallbackColor; } - private insertVectorNameAndUnitIntoMap(vectorName: string, unit: string): void { - if (vectorName in this._vectorNameUnitMap) return; + private createVectorSubplotTitleAndInsertIntoMap( + vectorName: string, + unit: string, + derivedVectorInfo?: DerivedVectorInfo_api | null + ): void { + if (vectorName in this._vectorNameSubplotTitleMap) return; + + const vectorDescription = this.createVectorDescription(vectorName, derivedVectorInfo); + const unitText = unit.length === 0 ? "" : ` [${simulationUnitReformat(unit)}]`; - this._vectorNameUnitMap[vectorName] = unit; + this._vectorNameSubplotTitleMap[vectorName] = `${vectorDescription}${unitText}`; } - private createVectorSubplotTitle(vectorName: string): string { - const vectorDescription = simulationVectorDescription(vectorName); - const unit = this._vectorNameUnitMap[vectorName]; - if (!unit) return vectorDescription; + private createVectorDescription(vectorName: string, derivedVectorInfo?: DerivedVectorInfo_api | null): string { + if (derivedVectorInfo) { + const derivedVectorDescription = createDerivedVectorDescription( + derivedVectorInfo.sourceVector, + derivedVectorInfo.type + ); + if (this._resampleFrequency) { + const frequencyString = FrequencyEnumToStringMapping[this._resampleFrequency]; + return `${frequencyString} ${derivedVectorDescription}`; + } + return derivedVectorDescription; + } - return `${vectorDescription} [${simulationUnitReformat(unit)}]`; + return simulationVectorDescription(vectorName); } private makeTraceNameFromVectorSpecification(vectorSpecification: VectorSpec): string { diff --git a/frontend/src/modules/SimulationTimeSeries/view/utils/PlotlyTraceUtils/createVectorTracesUtils.ts b/frontend/src/modules/SimulationTimeSeries/view/utils/PlotlyTraceUtils/createVectorTracesUtils.ts index cbe13bd55..2b106e1cb 100644 --- a/frontend/src/modules/SimulationTimeSeries/view/utils/PlotlyTraceUtils/createVectorTracesUtils.ts +++ b/frontend/src/modules/SimulationTimeSeries/view/utils/PlotlyTraceUtils/createVectorTracesUtils.ts @@ -1,4 +1,5 @@ import { + DerivedVectorType_api, StatisticFunction_api, SummaryVectorDateObservation_api, VectorHistoricalData_api, @@ -12,11 +13,33 @@ import { LineData, StatisticsData, createStatisticsTraces } from "./statisticsPl import { TimeSeriesPlotData } from "../timeSeriesPlotData"; +function isDerivedVectorOfType( + vectorData: VectorRealizationData_api | VectorStatisticData_api | VectorHistoricalData_api, + type: DerivedVectorType_api +): boolean { + return "derivedVectorInfo" in vectorData && vectorData.derivedVectorInfo?.type === type; +} + /** - Get line shape - "vh" for rate data, "linear" for non-rate data + * Utility function for getting the shape of the trace line for given vector data. + * + * Default is "linear", rate vectors are "vh", and custom calculated vectors are "hv". */ -export function getLineShape(isRate: boolean): "linear" | "vh" { - return isRate ? "vh" : "linear"; +export function getTraceLineShape( + vectorData: VectorRealizationData_api | VectorStatisticData_api | VectorHistoricalData_api +): "linear" | "hv" | "vh" { + if ( + isDerivedVectorOfType(vectorData, DerivedVectorType_api.PER_DAY) || + isDerivedVectorOfType(vectorData, DerivedVectorType_api.PER_INTVL) + ) { + // Custom calculated vectors valid forward in time + return "hv"; + } + if (vectorData.isRate) { + // Rate vectors are valid backward in time + return "vh"; + } + return "linear"; } /** @@ -27,7 +50,7 @@ type CreateRealizationTraceBaseOptions = { color: string; legendGroup: string; hoverTemplate?: string; - // lineShape?: "linear" | "spline" | "hv" | "vh" | "hvh" | "vhv"; + lineShape?: "linear" | "spline" | "hv" | "vh" | "hvh" | "vhv"; showLegend?: boolean; yaxis?: string; xaxis?: string; @@ -47,6 +70,7 @@ export function createVectorRealizationTrace({ color, legendGroup, hoverTemplate = "", + lineShape = "linear", showLegend = false, yaxis = "y", xaxis = "x", @@ -55,13 +79,11 @@ export function createVectorRealizationTrace({ // TODO: // - type: "scattergl" or "scatter"? Maximum 8 WebGL contexts in Chrome gives issues? // "scattergl" hides traces when zooming and panning for Ruben on work computer. - // - lineShape - Each VectorRealizationData_api element has its own `is_rate` property. Should we - // use that to determine the line shape or provide a lineShape argument? return { - x: vectorRealizationData.timestamps_utc_ms, + x: vectorRealizationData.timestampsUtcMs, y: vectorRealizationData.values, - line: { width: 1, color: color, shape: getLineShape(vectorRealizationData.is_rate) }, + line: { width: 1, color: color, shape: lineShape }, mode: "lines", type: type, hovertemplate: `${hoverTemplate}Realization: ${vectorRealizationData.realization}`, @@ -81,27 +103,25 @@ export type CreateVectorRealizationTracesOptions = CreateRealizationTraceBaseOpt vectorRealizationsData: VectorRealizationData_api[]; }; export function createVectorRealizationTraces({ - vectorRealizationsData, + vectorRealizationsData: vectorRealizationDataArray, name, color, legendGroup, hoverTemplate = "", + lineShape = "linear", showLegend = false, yaxis = "y", xaxis = "x", type = "scatter", }: CreateVectorRealizationTracesOptions): Partial[] { - // TODO: - // - lineShape - Each VectorRealizationData_api element has its own `is_rate` property. Should we - // use that to determine the line shape or provide a lineShape argument? - - return vectorRealizationsData.map((realization) => { + return vectorRealizationDataArray.map((realization) => { return createVectorRealizationTrace({ vectorRealizationData: realization, name, color, legendGroup, hoverTemplate, + lineShape, showLegend, yaxis, xaxis, @@ -120,7 +140,7 @@ export type CreateHistoricalVectorTraceOptions = { xaxis?: string; showLegend?: boolean; type?: "scatter" | "scattergl"; - // lineShape?: "linear" | "spline" | "hv" | "vh" | "hvh" | "vhv"; + lineShape?: "linear" | "spline" | "hv" | "vh" | "hvh" | "vhv"; name?: string; legendRank?: number; }; @@ -131,15 +151,16 @@ export function createHistoricalVectorTrace({ xaxis = "x", showLegend = false, type = "scatter", + lineShape = "linear", name: name, legendRank, }: CreateHistoricalVectorTraceOptions): Partial { const hoverText = name ? `History: ${name}` : "History"; return { - line: { shape: getLineShape(vectorHistoricalData.is_rate), color: color }, + line: { shape: lineShape, color: color }, mode: "lines", type: type, - x: vectorHistoricalData.timestamps_utc_ms, + x: vectorHistoricalData.timestampsUtcMs, y: vectorHistoricalData.values, hovertext: hoverText, hoverinfo: "y+x+text", @@ -224,7 +245,7 @@ export type CreateVectorFanchartTracesOptions = { name?: string; yaxis?: string; xaxis?: string; - // lineShape?: "vh" | "linear" | "spline" | "hv" | "hvh" | "vhv"; + lineShape?: "vh" | "linear" | "spline" | "hv" | "hvh" | "vhv"; hoverTemplate?: string; showLegend?: boolean; legendRank?: number; @@ -237,25 +258,26 @@ export function createVectorFanchartTraces({ name = undefined, yaxis = "y", xaxis = "x", + lineShape = "linear", hoverTemplate = "(%{x}, %{y})
", showLegend = false, type = "scatter", legendRank, }: CreateVectorFanchartTracesOptions): Partial[] { - const lowData = vectorStatisticData.value_objects.find((v) => v.statistic_function === StatisticFunction_api.P90); - const highData = vectorStatisticData.value_objects.find((v) => v.statistic_function === StatisticFunction_api.P10); + const lowData = vectorStatisticData.valueObjects.find((v) => v.statisticFunction === StatisticFunction_api.P90); + const highData = vectorStatisticData.valueObjects.find((v) => v.statisticFunction === StatisticFunction_api.P10); let lowHighData: LowHighData | undefined = undefined; if (lowData && highData) { lowHighData = { - highName: highData.statistic_function.toString(), + highName: highData.statisticFunction.toString(), highData: highData.values, - lowName: lowData.statistic_function.toString(), + lowName: lowData.statisticFunction.toString(), lowData: lowData.values, }; } - const minData = vectorStatisticData.value_objects.find((v) => v.statistic_function === StatisticFunction_api.MIN); - const maxData = vectorStatisticData.value_objects.find((v) => v.statistic_function === StatisticFunction_api.MAX); + const minData = vectorStatisticData.valueObjects.find((v) => v.statisticFunction === StatisticFunction_api.MIN); + const maxData = vectorStatisticData.valueObjects.find((v) => v.statisticFunction === StatisticFunction_api.MAX); let minMaxData: MinMaxData | undefined = undefined; if (minData && maxData) { minMaxData = { @@ -264,17 +286,17 @@ export function createVectorFanchartTraces({ }; } - const meanData = vectorStatisticData.value_objects.find((v) => v.statistic_function === StatisticFunction_api.MEAN); + const meanData = vectorStatisticData.valueObjects.find((v) => v.statisticFunction === StatisticFunction_api.MEAN); let meanFreeLineData: FreeLineData | undefined = undefined; if (meanData) { meanFreeLineData = { - name: meanData.statistic_function.toString(), + name: meanData.statisticFunction.toString(), data: meanData.values, }; } const fanchartData: FanchartData = { - samples: vectorStatisticData.timestamps_utc_ms, + samples: vectorStatisticData.timestampsUtcMs, lowHigh: lowHighData, minimumMaximum: minMaxData, freeLine: meanFreeLineData, @@ -285,7 +307,7 @@ export function createVectorFanchartTraces({ hexColor: hexColor, legendGroup: legendGroup, name: name, - lineShape: getLineShape(vectorStatisticData.is_rate), + lineShape: lineShape, showLegend: showLegend, hoverTemplate: hoverTemplate, legendRank: legendRank, @@ -308,7 +330,7 @@ export type CreateVectorStatisticsTracesOptions = { name?: string; yaxis?: string; xaxis?: string; - // lineShape?: "vh" | "linear" | "spline" | "hv" | "hvh" | "vhv"; + lineShape?: "vh" | "linear" | "spline" | "hv" | "hvh" | "vhv"; lineWidth?: number; hoverTemplate?: string; showLegend?: boolean; @@ -322,49 +344,25 @@ export function createVectorStatisticsTraces({ name = undefined, yaxis = "y", xaxis = "x", + lineShape = "linear", lineWidth = 2, hoverTemplate = "(%{x}, %{y})
", showLegend = false, type = "scatter", legendRank, }: CreateVectorStatisticsTracesOptions): Partial[] { - const lowValueObject = vectorStatisticData.value_objects.find( - (v) => v.statistic_function === StatisticFunction_api.P90 - ); - const midValueObject = vectorStatisticData.value_objects.find( - (v) => v.statistic_function === StatisticFunction_api.P50 - ); - const highValueObject = vectorStatisticData.value_objects.find( - (v) => v.statistic_function === StatisticFunction_api.P10 - ); - const minValueObject = vectorStatisticData.value_objects.find( - (v) => v.statistic_function === StatisticFunction_api.MIN - ); - const maxValueObject = vectorStatisticData.value_objects.find( - (v) => v.statistic_function === StatisticFunction_api.MAX - ); - const meanValueObject = vectorStatisticData.value_objects.find( - (v) => v.statistic_function === StatisticFunction_api.MEAN - ); - - const lowData: LineData | undefined = lowValueObject - ? { data: lowValueObject.values, name: lowValueObject.statistic_function.toString() } - : undefined; - const midData: LineData | undefined = midValueObject - ? { data: midValueObject.values, name: midValueObject.statistic_function.toString() } - : undefined; - const highData: LineData | undefined = highValueObject - ? { data: highValueObject.values, name: highValueObject.statistic_function.toString() } - : undefined; - const meanData: LineData | undefined = meanValueObject - ? { data: meanValueObject.values, name: meanValueObject.statistic_function.toString() } - : undefined; + const lowData = getVectorStatisticLineDataForFunction(vectorStatisticData, StatisticFunction_api.P90); + const midData = getVectorStatisticLineDataForFunction(vectorStatisticData, StatisticFunction_api.P50); + const highData = getVectorStatisticLineDataForFunction(vectorStatisticData, StatisticFunction_api.P10); + const minData = getVectorStatisticLineDataForFunction(vectorStatisticData, StatisticFunction_api.MIN); + const maxData = getVectorStatisticLineDataForFunction(vectorStatisticData, StatisticFunction_api.MAX); + const meanData = getVectorStatisticLineDataForFunction(vectorStatisticData, StatisticFunction_api.MEAN); const statisticsData: StatisticsData = { - samples: vectorStatisticData.timestamps_utc_ms, + samples: vectorStatisticData.timestampsUtcMs, freeLine: meanData, - minimum: minValueObject ? minValueObject.values : undefined, - maximum: maxValueObject ? maxValueObject.values : undefined, + minimum: minData ? minData.data : undefined, + maximum: maxData ? maxData.data : undefined, lowPercentile: lowData, highPercentile: highData, midPercentile: midData, @@ -375,7 +373,7 @@ export function createVectorStatisticsTraces({ color: hexColor, legendGroup: legendGroup, name: name, - lineShape: getLineShape(vectorStatisticData.is_rate), + lineShape: lineShape, lineWidth: lineWidth, showLegend: showLegend, hoverTemplate: hoverTemplate, @@ -385,3 +383,19 @@ export function createVectorStatisticsTraces({ type: type, }); } + +function getVectorStatisticLineDataForFunction( + vectorStatisticData: VectorStatisticData_api, + statisticFunction: StatisticFunction_api +): LineData | undefined { + const valueObject = vectorStatisticData.valueObjects.find((v) => v.statisticFunction === statisticFunction); + + if (!valueObject) { + return undefined; + } + + return { + data: valueObject.values, + name: statisticFunction.toString(), + }; +} diff --git a/frontend/src/modules/SimulationTimeSeries/view/utils/vectorSpecificationsAndQueriesUtils.ts b/frontend/src/modules/SimulationTimeSeries/view/utils/vectorSpecificationsAndQueriesUtils.ts index ad21cc4e7..87673ff25 100644 --- a/frontend/src/modules/SimulationTimeSeries/view/utils/vectorSpecificationsAndQueriesUtils.ts +++ b/frontend/src/modules/SimulationTimeSeries/view/utils/vectorSpecificationsAndQueriesUtils.ts @@ -44,10 +44,10 @@ export function filterVectorSpecificationAndIndividualStatisticsDataArray( if (selectedIndividualStatisticOptions.length === 0) return []; const output = vectorSpecificationAndStatisticsData.map((v) => { - const filteredValueObjects = v.data.value_objects.filter((vo) => { - return selectedIndividualStatisticOptions.includes(vo.statistic_function); + const filteredValueObjects = v.data.valueObjects.filter((vo) => { + return selectedIndividualStatisticOptions.includes(vo.statisticFunction); }); - return { vectorSpecification: v.vectorSpecification, data: { ...v.data, value_objects: filteredValueObjects } }; + return { vectorSpecification: v.vectorSpecification, data: { ...v.data, valueObjects: filteredValueObjects } }; }); return output; } @@ -74,10 +74,10 @@ export function filterVectorSpecificationAndFanchartStatisticsDataArray( if (includeStatisticFunctions.length === 0) return []; const output = vectorSpecificationAndStatisticsData.map((v) => { - const filteredValueObjects = v.data.value_objects.filter((vo) => { - return includeStatisticFunctions.includes(vo.statistic_function); + const filteredValueObjects = v.data.valueObjects.filter((vo) => { + return includeStatisticFunctions.includes(vo.statisticFunction); }); - return { vectorSpecification: v.vectorSpecification, data: { ...v.data, value_objects: filteredValueObjects } }; + return { vectorSpecification: v.vectorSpecification, data: { ...v.data, valueObjects: filteredValueObjects } }; }); return output; } diff --git a/frontend/src/modules/SimulationTimeSeriesSensitivity/dataGenerators.ts b/frontend/src/modules/SimulationTimeSeriesSensitivity/dataGenerators.ts index 208ac377c..5dca08fdd 100644 --- a/frontend/src/modules/SimulationTimeSeriesSensitivity/dataGenerators.ts +++ b/frontend/src/modules/SimulationTimeSeriesSensitivity/dataGenerators.ts @@ -14,7 +14,7 @@ export function makeVectorDataGenerator( if (ensemble && vectorRealizationData) { vectorRealizationData.forEach((vec) => { - const indexOfTimestamp = indexOf(vec.timestamps_utc_ms, activeTimestampUtcMs); + const indexOfTimestamp = indexOf(vec.timestampsUtcMs, activeTimestampUtcMs); data.push({ key: vec.realization, value: indexOfTimestamp === -1 ? 0 : vec.values[indexOfTimestamp], diff --git a/frontend/src/modules/SimulationTimeSeriesSensitivity/settings/atoms/derivedAtoms.ts b/frontend/src/modules/SimulationTimeSeriesSensitivity/settings/atoms/derivedAtoms.ts index cc3ab4d49..1bff5f53c 100644 --- a/frontend/src/modules/SimulationTimeSeriesSensitivity/settings/atoms/derivedAtoms.ts +++ b/frontend/src/modules/SimulationTimeSeriesSensitivity/settings/atoms/derivedAtoms.ts @@ -116,7 +116,7 @@ export const selectedVectorNameHasHistoricalAtom = atom((get) => { const vectorListQuery = get(vectorListQueryAtom); const selectedVector = vectorListQuery.data?.find((vec) => vec.name === selectedVectorName); - return !!selectedVector?.has_historical; + return !!selectedVector?.hasHistorical; }); export const vectorSpecificationAtom = atom((get) => { diff --git a/frontend/src/modules/SimulationTimeSeriesSensitivity/view/atoms/derivedAtoms.ts b/frontend/src/modules/SimulationTimeSeriesSensitivity/view/atoms/derivedAtoms.ts index 50432b10c..846189cd7 100644 --- a/frontend/src/modules/SimulationTimeSeriesSensitivity/view/atoms/derivedAtoms.ts +++ b/frontend/src/modules/SimulationTimeSeriesSensitivity/view/atoms/derivedAtoms.ts @@ -11,8 +11,7 @@ export const activeTimestampUtcMsAtom = atom((get) => { const userSelectedActiveTimestampUtcMs = get(userSelectedActiveTimestampUtcMsAtom); const statisticalVectorSensitivityDataQuery = get(statisticalVectorSensitivityDataQueryAtom); - const lastTimestampUtcMs = - statisticalVectorSensitivityDataQuery.data?.at(0)?.timestamps_utc_ms.slice(-1)[0] ?? null; + const lastTimestampUtcMs = statisticalVectorSensitivityDataQuery.data?.at(0)?.timestampsUtcMs.slice(-1)[0] ?? null; if (lastTimestampUtcMs !== null && userSelectedActiveTimestampUtcMs === null) { return lastTimestampUtcMs; diff --git a/frontend/src/modules/SimulationTimeSeriesSensitivity/view/hooks/useTimeSeriesChartTracesDataArrayBuilder.ts b/frontend/src/modules/SimulationTimeSeriesSensitivity/view/hooks/useTimeSeriesChartTracesDataArrayBuilder.ts index 311658f27..177e6bfbb 100644 --- a/frontend/src/modules/SimulationTimeSeriesSensitivity/view/hooks/useTimeSeriesChartTracesDataArrayBuilder.ts +++ b/frontend/src/modules/SimulationTimeSeriesSensitivity/view/hooks/useTimeSeriesChartTracesDataArrayBuilder.ts @@ -56,7 +56,7 @@ export function useTimeSeriesChartTracesDataArrayBuilder(colorSet: ColorSet): Ti // Add statistics traces if (showStatistics && statisticsQuery.data) { const matchingCases: VectorStatisticSensitivityData_api[] = statisticsQuery.data.filter( - (stat) => stat.sensitivity_name === sensitivityName + (stat) => stat.sensitivityName === sensitivityName ); const traces = createStatisticalLineTraces(matchingCases, StatisticFunction_api.MEAN, color); traceDataArr.push(...traces); @@ -79,7 +79,7 @@ export function useTimeSeriesChartTracesDataArrayBuilder(colorSet: ColorSet): Ti if (historicalQuery?.data && showHistorical) { traceDataArr.push( createLineTrace({ - timestampsMsUtc: historicalQuery.data.timestamps_utc_ms, + timestampsMsUtc: historicalQuery.data.timestampsUtcMs, values: historicalQuery.data.values, name: "history", lineShape: "linear", diff --git a/frontend/src/modules/SimulationTimeSeriesSensitivity/view/utils/createTracesUtils.ts b/frontend/src/modules/SimulationTimeSeriesSensitivity/view/utils/createTracesUtils.ts index 71860b5a6..6aa6e6d70 100644 --- a/frontend/src/modules/SimulationTimeSeriesSensitivity/view/utils/createTracesUtils.ts +++ b/frontend/src/modules/SimulationTimeSeriesSensitivity/view/utils/createTracesUtils.ts @@ -15,20 +15,20 @@ export function createStatisticalLineTraces( ): TimeSeriesPlotlyTrace[] { const traces: TimeSeriesPlotlyTrace[] = []; sensitivityData.forEach((aCase, index) => { - const statisticObj = aCase.value_objects.find((obj) => obj.statistic_function === statisticsFunction); + const statisticObj = aCase.valueObjects.find((obj) => obj.statisticFunction === statisticsFunction); if (statisticObj) { traces.push( createLineTrace({ - timestampsMsUtc: aCase.timestamps_utc_ms, + timestampsMsUtc: aCase.timestampsUtcMs, values: statisticObj.values, - name: `${aCase.sensitivity_name}`, - legendGroup: `${aCase.sensitivity_name}`, + name: `${aCase.sensitivityName}`, + legendGroup: `${aCase.sensitivityName}`, lineShape: "linear", lineDash: "dash", showLegend: index === 0, lineColor: color, lineWidth: 3, - hoverTemplate: `Sensitivity:${aCase.sensitivity_name}
Case: ${aCase.sensitivity_case}
Value: %{y}
Date: %{x}`, + hoverTemplate: `Sensitivity:${aCase.sensitivityName}
Case: ${aCase.sensitivityName}
Value: %{y}
Date: %{x}`, }) ); } @@ -51,7 +51,7 @@ export function createRealizationLineTraces( const isHighlighted = vec.realization === highlightedRealization ? true : false; const trace = createLineTrace({ - timestampsMsUtc: vec.timestamps_utc_ms, + timestampsMsUtc: vec.timestampsUtcMs, values: vec.values, name: `real-${vec.realization}`, lineShape: lineShape, diff --git a/frontend/src/modules/_shared/reservoirSimulationStringUtils.ts b/frontend/src/modules/_shared/reservoirSimulationStringUtils.ts index a37d4829a..05f964d29 100644 --- a/frontend/src/modules/_shared/reservoirSimulationStringUtils.ts +++ b/frontend/src/modules/_shared/reservoirSimulationStringUtils.ts @@ -20,28 +20,12 @@ function getVectorDefinition(vector: string): VectorDefinition | null { } /** - * Returns a more human friendly description of the vector name if possible, otherwise returns the vector name as is. - * - * NOTE: - * - Based on https://github.com/equinor/webviz-subsurface/blob/master/webviz_subsurface/_abbreviations/reservoir_simulation.py - * - Handle user defined vectors later on + * Returns the vector definition for the simulation vector if it exists, otherwise returns null. */ -export function simulationVectorDescription(vector: string): string { - let prefix = ""; - let suffix = ""; - if (vector.startsWith("PER_DAY_")) { - prefix = "Average "; - suffix = " Per day"; - vector = vector.slice("PER_DAY_".length); - } else if (vector.startsWith("PER_INTVL_")) { - prefix = "Interval "; - vector = vector.slice("PER_INTVL_".length); - } - +export function simulationVectorDefinition(vector: string): VectorDefinition | null { let vectorName = vector; - let node: string | null = null; if (vector.includes(":")) { - [vectorName, node] = vector.split(":", 2); + [vectorName] = vector.split(":", 2); } // Handle regions and completions @@ -53,33 +37,62 @@ export function simulationVectorDescription(vector: string): string { // Underscores _ are always used to fill const vectorBaseName = vectorName.slice(0, 5).replace(/_+$/, ""); // Equivalent to rstrip("_") - const fip = vectorName.slice(5); - - const definition = getVectorDefinition(vectorBaseName); - if (definition && definition.type === "region") { - return `${prefix}${definition.description}${suffix}, region ${fip} ${node ?? ""}`; - } + return getVectorDefinition(vectorBaseName); } else if (vectorName[0] === "W" && vectorName[4] === "L") { // These are completion vectors, e.g. WWCTL:__1:OP_1 and WOPRL_10:OP_1 for // water-cut in OP_1 completion 1 and oil production rate in OP_1 completion 10 const vectorBaseName = vector.slice(0, 5); - const comp = vectorName.slice(5).replace(/^_+/, ""); // Equivalent to lstrip("_") - - const definition = getVectorDefinition(vectorBaseName); - if (definition && definition.type === "completion") { - return `${prefix}${definition.description}${suffix}, well ${node ?? ""} completion ${comp}`; - } + return getVectorDefinition(vectorBaseName); } } - const definition = getVectorDefinition(vectorName); - if (definition) { - if (node === null) { - return `${prefix}${definition.description}${suffix}`; - } - return `${prefix}${definition.description}${suffix}, ${definition.type.replace(/_/g, " ")} ${node}`; + return getVectorDefinition(vectorName); +} + +/** + * Returns a more human friendly description of the vector name if possible, otherwise returns the vector name as is. + * + * Optional prefix and suffix can be added to the description. These are added before and after the vector base name description. + * This implies suffix to be added in front of any well, region or completion description. + * + * Exampled usage: simulationVectorDescription("WOPR:A1", "Average ", " Per Day") => "Average Oil Production Rate Per Day, well A1" + * + * NOTE: + * - Based on https://github.com/equinor/webviz-subsurface/blob/master/webviz_subsurface/_abbreviations/reservoir_simulation.py + * - Handle user defined vectors later on + */ +export function simulationVectorDescription( + vector: string, + vectorBasePrefix = "", + vectorBaseSuffix = "", + excludeTypeDescription = false +): string { + const [vectorName, node] = vector.includes(":") ? vector.split(":", 2) : [vector, null]; + const vectorDefinition = simulationVectorDefinition(vectorName); + + if (!vectorDefinition) { + return `${vectorBasePrefix}${vector}${vectorBaseSuffix}`; + } + + const { description, type } = vectorDefinition; + const baseDescription = `${vectorBasePrefix}${description}${vectorBaseSuffix}`; + + if (excludeTypeDescription) { + return baseDescription; } - return `${prefix}${vector}${suffix}`; + if (type === "region") { + const fip = vectorName.slice(5); + const region = fip ? `region ${fip} ${node ?? ""}` : `region ${node ?? ""}`; + return `${baseDescription}, ${region}`; + } + if (type === "completion") { + const comp = vectorName.slice(5).replace(/^_+/, ""); // Equivalent to lstrip("_") + return `${baseDescription}, well ${node ?? ""} completion ${comp}`; + } + if (node) { + return `${baseDescription}, ${type.replace(/_/g, " ")} ${node}`; + } + return baseDescription; } diff --git a/frontend/tests/unit/reservoirSimulationStringUtils.test.ts b/frontend/tests/unit/reservoirSimulationStringUtils.test.ts index 257f380f7..5803f27f0 100644 --- a/frontend/tests/unit/reservoirSimulationStringUtils.test.ts +++ b/frontend/tests/unit/reservoirSimulationStringUtils.test.ts @@ -1,4 +1,8 @@ -import { simulationUnitReformat, simulationVectorDescription } from "@modules/_shared/reservoirSimulationStringUtils"; +import { + simulationUnitReformat, + simulationVectorDefinition, + simulationVectorDescription, +} from "@modules/_shared/reservoirSimulationStringUtils"; import { describe, expect, test } from "vitest"; @@ -29,4 +33,24 @@ describe("Reservoir Simulation string utils tests", () => { expect(simulationUnitReformat("M3", "INVALID_UNIT_SYSTEM")).toEqual("M3"); expect(simulationUnitReformat("SM3/DAY", "INVALID_UNIT_SYSTEM")).toEqual("SM3/DAY"); }); + + test("Test simulationVectorDefinition", () => { + expect(simulationVectorDefinition("INVALID_VECTOR")).toBeNull(); + expect(simulationVectorDefinition("WOPR")).toEqual({ + description: "Oil Production Rate", + type: "well", + }); + expect(simulationVectorDefinition("ROIP:1")).toEqual({ + description: "Oil In Place (liquid+gas phase)", + type: "region", + }); + expect(simulationVectorDefinition("WOPRL_10")).toEqual({ + description: "Oil Flow Rate", + type: "completion", + }); + expect(simulationVectorDefinition("FGIP")).toEqual({ + description: "Gas In Place (liquid+gas phase)", + type: "field", + }); + }); });