Skip to content

Commit

Permalink
292 flaky tests (#293)
Browse files Browse the repository at this point in the history
Fixes issues with MOM6 testing, and time parsing of same.

* Revert "Add MOM6 support (om4 025jra ryf) (#258)"

This reverts commit 8d18b19.

Testing to see whether this restores test stability

* Revert "Revert "Add MOM6 support (om4 025jra ryf) (#258)""

This reverts commit 23b3b5f - ie. it
restores the mom6 stuff.

* Add xarray complete

* Pin dependencies back in time for 3.11

* Fail fast false

* Pin a bunch of deps

* Added toxfile

* tox.ini w/ comments on failures

* Revert "Pin dependencies back in time for 3.11"

This reverts commit 6fa6676.

* These changes are ugly & horrible but mostly seem to resolve the issues with cftime. THey cause some assets to fail because they alter the parser, but I think this is a window into a solution.

* Lots of catches for overflow errors: keeping for posterity

* Restore 'test_builders.py' to same state as main

* Fix mom6 tests - should now all be failing

* Ready to replace time info guesses for MOM6 with a subclass

* Fixed broken MOM6 builder

* re-enable fast fail

* Reverted CI environments to main

* Removed '_access' from a bunch of function names - now we have GFDL models in the builders, this is misleading

* Revert load_dataset => open_dataset

* Updating test to fix coverage

* Restored to working state

* Tests for GenericTimeParser & AccessTimeParser

* Improve test coverage

* Improve test coverage for GfdlTimeParser

* Improve test coverage for GfdlTimeParser

* Improve test coverage for GfdlTimeParser

* Marc's comments
  • Loading branch information
charles-turner-1 authored Dec 9, 2024
1 parent 8a1b1d9 commit 2f5ba48
Show file tree
Hide file tree
Showing 5 changed files with 702 additions and 68 deletions.
35 changes: 18 additions & 17 deletions src/access_nri_intake/source/builders.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
from . import ESM_JSONSCHEMA, PATH_COLUMN, VARIABLE_COLUMN
from .utils import (
EmptyFileError,
_AccessNCFileInfo,
GenericTimeParser,
GfdlTimeParser,
_NCFileInfo,
_VarInfo,
get_timeinfo,
)

# Frequency translations
Expand Down Expand Up @@ -56,8 +57,9 @@ class BaseBuilder(Builder):
This builds on the ecgtools.Builder class.
"""

# Base class carries an empty set
# Base class carries an empty set, and a GenericParser
PATTERNS: list = []
TIME_PARSER = GenericTimeParser

def __init__(
self,
Expand Down Expand Up @@ -222,7 +224,7 @@ def parser(file):
raise NotImplementedError

@classmethod
def parse_access_filename(
def parse_filename(
cls,
filename: str,
patterns: list[str] | None = None,
Expand Down Expand Up @@ -285,11 +287,9 @@ def parse_access_filename(
return file_id, timestamp, frequency

@classmethod
def parse_access_ncfile(
cls, file: str, time_dim: str = "time"
) -> _AccessNCFileInfo:
def parse_ncfile(cls, file: str, time_dim: str = "time") -> _NCFileInfo:
"""
Get Intake-ESM datastore entry info from an ACCESS netcdf file
Get Intake-ESM datastore entry info from a netcdf file
Parameters
----------
Expand All @@ -300,7 +300,7 @@ def parse_access_ncfile(
Returns
-------
output_nc_info: _AccessNCFileInfo
output_nc_info: _NCFileInfo
A dataclass containing the information parsed from the file
Raises
Expand All @@ -310,7 +310,7 @@ def parse_access_ncfile(

file_path = Path(file)

file_id, filename_timestamp, filename_frequency = cls.parse_access_filename(
file_id, filename_timestamp, filename_frequency = cls.parse_filename(
file_path.stem
)

Expand All @@ -327,14 +327,14 @@ def parse_access_ncfile(
attrs = ds[var].attrs
dvars.append_attrs(var, attrs) # type: ignore

start_date, end_date, frequency = get_timeinfo(
start_date, end_date, frequency = cls.TIME_PARSER(
ds, filename_frequency, time_dim
)
)()

if not dvars.variable_list:
raise EmptyFileError("This file contains no variables")

output_ncfile = _AccessNCFileInfo(
output_ncfile = _NCFileInfo(
filename=file_path.name,
path=file,
file_id=file_id,
Expand Down Expand Up @@ -399,7 +399,7 @@ def parser(cls, file) -> dict:
if realm == "ice":
realm = "seaIce"

nc_info = cls.parse_access_ncfile(file)
nc_info = cls.parse_ncfile(file)
ncinfo_dict = nc_info.to_dict()

ncinfo_dict["realm"] = realm
Expand Down Expand Up @@ -457,7 +457,7 @@ def __init__(self, path):
@classmethod
def parser(cls, file) -> dict:
try:
output_nc_info = cls.parse_access_ncfile(file)
output_nc_info = cls.parse_ncfile(file)
ncinfo_dict = output_nc_info.to_dict()

if "mom6" in ncinfo_dict["filename"]:
Expand Down Expand Up @@ -487,6 +487,7 @@ class Mom6Builder(BaseBuilder):
rf"[^\.]*({PATTERNS_HELPERS['ymd-ns']})\.{PATTERNS_HELPERS['mom6_components']}.*{PATTERNS_HELPERS['mom6_added_timestamp']}.*$", # Daily snapshot naming
rf"[^\.]*({PATTERNS_HELPERS['ymd-ns']})\.{PATTERNS_HELPERS['mom6_components']}.*$", # Basic naming
]
TIME_PARSER = GfdlTimeParser

def __init__(self, path):
"""
Expand Down Expand Up @@ -529,7 +530,7 @@ def __init__(self, path):
@classmethod
def parser(cls, file):
try:
output_nc_info = cls.parse_access_ncfile(file)
output_nc_info = cls.parse_ncfile(file)
ncinfo_dict = output_nc_info.to_dict()

if "ocean" in ncinfo_dict["filename"]:
Expand Down Expand Up @@ -605,7 +606,7 @@ def parser(cls, file):

realm_mapping = {"atm": "atmos", "ocn": "ocean", "ice": "seaIce"}

nc_info = cls.parse_access_ncfile(file)
nc_info = cls.parse_ncfile(file)
ncinfo_dict = nc_info.to_dict()

# Remove exp_id from file id so that members can be part of the same dataset
Expand Down
Loading

0 comments on commit 2f5ba48

Please sign in to comment.