|
2 | 2 | import logging
|
3 | 3 | from open_data_pvnet.scripts.archive import handle_archive
|
4 | 4 | from open_data_pvnet.utils.env_loader import load_environment_variables
|
5 |
| -from open_data_pvnet.utils.data_downloader import load_zarr_data |
| 5 | +from open_data_pvnet.utils.data_downloader import load_zarr_data, load_zarr_data_for_day |
6 | 6 | from pathlib import Path
|
7 | 7 | import concurrent.futures
|
8 | 8 | from typing import List, Tuple
|
@@ -76,7 +76,12 @@ def _add_common_arguments(parser, provider_name):
|
76 | 76 | """Add arguments common to both archive and load operations."""
|
77 | 77 | parser.add_argument("--year", type=int, required=True, help="Year of data")
|
78 | 78 | parser.add_argument("--month", type=int, required=True, help="Month of data")
|
79 |
| - parser.add_argument("--day", type=int, required=True, help="Day of data") |
| 79 | + parser.add_argument( |
| 80 | + "--day", |
| 81 | + type=int, |
| 82 | + help="Day of data (optional - if not provided, loads entire month)", |
| 83 | + default=None, |
| 84 | + ) |
80 | 85 |
|
81 | 86 | # Add Met Office specific arguments
|
82 | 87 | if provider_name == "metoffice":
|
@@ -114,28 +119,41 @@ def parse_chunks(chunks_str):
|
114 | 119 |
|
115 | 120 | def handle_load(provider: str, year: int, month: int, day: int, **kwargs):
|
116 | 121 | """Handle loading archived data."""
|
117 |
| - hour = kwargs.get("hour", 0) # Default to hour 0 if not specified |
118 | 122 | chunks = parse_chunks(kwargs.get("chunks"))
|
119 | 123 | remote = kwargs.get("remote", False)
|
| 124 | + hour = kwargs.get("hour") |
120 | 125 |
|
121 |
| - # Construct the archive path based on provider and parameters |
122 |
| - # Format: data/2023/01/16/2023-01-16-00.zarr.zip |
123 |
| - archive_path = ( |
124 |
| - Path("data") |
125 |
| - / str(year) |
126 |
| - / f"{month:02d}" |
127 |
| - / f"{day:02d}" |
128 |
| - / f"{year}-{month:02d}-{day:02d}-{hour:02d}.zarr.zip" |
129 |
| - ) |
| 126 | + # Base path for the data |
| 127 | + base_path = Path("data") / str(year) / f"{month:02d}" / f"{day:02d}" |
130 | 128 |
|
131 | 129 | try:
|
132 |
| - dataset = load_zarr_data( |
133 |
| - archive_path, |
134 |
| - chunks=chunks, |
135 |
| - remote=remote, |
136 |
| - download=not remote, # Don't try to download if remote=True |
137 |
| - ) |
138 |
| - logger.info(f"Successfully loaded dataset for {year}-{month:02d}-{day:02d} hour {hour:02d}") |
| 130 | + if hour is not None: |
| 131 | + # Load specific hour |
| 132 | + archive_path = base_path / f"{year}-{month:02d}-{day:02d}-{hour:02d}.zarr.zip" |
| 133 | + dataset = load_zarr_data( |
| 134 | + archive_path, |
| 135 | + chunks=chunks, |
| 136 | + remote=remote, |
| 137 | + download=not remote, |
| 138 | + ) |
| 139 | + logger.info( |
| 140 | + f"Successfully loaded dataset for {year}-{month:02d}-{day:02d} hour {hour:02d}" |
| 141 | + ) |
| 142 | + else: |
| 143 | + # Load all hours for the day |
| 144 | + dataset = load_zarr_data_for_day( |
| 145 | + base_path, |
| 146 | + year, |
| 147 | + month, |
| 148 | + day, |
| 149 | + chunks=chunks, |
| 150 | + remote=remote, |
| 151 | + download=not remote, |
| 152 | + ) |
| 153 | + logger.info( |
| 154 | + f"Successfully loaded all available datasets for {year}-{month:02d}-{day:02d}" |
| 155 | + ) |
| 156 | + |
139 | 157 | return dataset
|
140 | 158 | except Exception as e:
|
141 | 159 | logger.error(f"Error loading dataset: {e}")
|
|
0 commit comments