openclimatefix
diff --git a/‎pvnet/models/base_model.py
+1-1 b/‎pvnet/models/base_model.py
+1-1
diff --git a/‎pvnet/models/baseline/last_value.py
+1-1 b/‎pvnet/models/baseline/last_value.py
+1-1
diff --git a/‎pvnet/models/baseline/single_value.py
+1-1 b/‎pvnet/models/baseline/single_value.py
+1-1
diff --git a/‎pvnet/models/multimodal/multimodal.py
-10 b/‎pvnet/models/multimodal/multimodal.py
-10
diff --git a/‎pvnet/models/multimodal/unimodal_teacher.py
+5-5 b/‎pvnet/models/multimodal/unimodal_teacher.py
+5-5
diff --git a/‎tests/conftest.py
+17-35 b/‎tests/conftest.py
+17-35
diff --git a/‎tests/data/test_datamodule.py
+18-11 b/‎tests/data/test_datamodule.py
+18-11
diff --git a/‎tests/models/baseline/test_last_value.py
+1 b/‎tests/models/baseline/test_last_value.py
+1
diff --git a/‎tests/models/multimodal/site_encoders/test_encoders.py
+26-11 b/‎tests/models/multimodal/site_encoders/test_encoders.py
+26-11
@@ -605,7 +605,7 @@ def _log_validation_results(self, batch, y_hat, accum_batch_num):
         """Append validation results to self.validation_epoch_results"""
 
         # get truth values, shape (b, forecast_len)
-        y = batch[self._target_key][:, -self.forecast_len :, 0]
+        y = batch[self._target_key][:, -self.forecast_len :]
         y = y.detach().cpu().numpy()
         batch_size = y.shape[0]
 
 
@@ -36,7 +36,7 @@ def forward(self, x: dict):
 
         # take the last value non forecaster value and the first in the pv yeild
         # (this is the pv site we are preditcting for)
-        y_hat = gsp_yield[:, -self.forecast_len - 1, 0]
+        y_hat = gsp_yield[:, -self.forecast_len - 1]
 
         # expand the last valid forward n predict steps
         out = y_hat.unsqueeze(1).repeat(1, self.forecast_len)
 
@@ -33,5 +33,5 @@ def __init__(
     def forward(self, x: dict):
         """Run model forward on dict batch of data"""
         # Returns a single value at all steps
-        y_hat = torch.zeros_like(x[BatchKey.gsp][:, : self.forecast_len, 0]) + self._value
+        y_hat = torch.zeros_like(x[BatchKey.gsp][:, : self.forecast_len]) + self._value
         return y_hat
@@ -377,16 +377,6 @@ def forward(self, x):
                 # This needs to be a Batch as input
                 modes["wind"] = self.wind_encoder(x_tmp)
 
-        # *********************** Sensor Data ************************************
-        if self.include_sensor:
-            if self._target_key_name != "sensor":
-                modes["sensor"] = self.sensor_encoder(x)
-            else:
-                x_tmp = x.copy()
-                x_tmp[BatchKey.sensor] = x_tmp[BatchKey.sensor][:, : self.history_len + 1]
-                # This needs to be a Batch as input
-                modes["sensor"] = self.sensor_encoder(x_tmp)
-
         if self.include_sun:
             sun = torch.cat(
                 (
 
@@ -219,7 +219,7 @@ def teacher_forward(self, x):
                 sat_data = torch.swapaxes(sat_data, 1, 2).float()  # switch time and channels
 
                 if self.add_image_embedding_channel:
-                    id = x[BatchKey.gsp_id][:, 0].int()
+                    id = x[BatchKey.gsp_id].int()
                     sat_data = teacher_model.sat_embed(sat_data, id)
 
                 modes[mode] = teacher_model.sat_encoder(sat_data)
@@ -233,7 +233,7 @@ def teacher_forward(self, x):
                 nwp_data = torch.swapaxes(nwp_data, 1, 2)  # switch time and channels
                 nwp_data = torch.clip(nwp_data, min=-50, max=50)
                 if teacher_model.add_image_embedding_channel:
-                    id = x[BatchKey.gsp_id][:, 0].int()
+                    id = x[BatchKey.gsp_id].int()
                     nwp_data = teacher_model.nwp_embed_dict[nwp_source](nwp_data, id)
 
                 nwp_out = teacher_model.nwp_encoders_dict[nwp_source](nwp_data)
@@ -260,7 +260,7 @@ def forward(self, x, return_modes=False):
             sat_data = torch.swapaxes(sat_data, 1, 2).float()  # switch time and channels
 
             if self.add_image_embedding_channel:
-                id = x[BatchKey.gsp_id][:, 0].int()
+                id = x[BatchKey.gsp_id].int()
                 sat_data = self.sat_embed(sat_data, id)
             modes["sat"] = self.sat_encoder(sat_data)
 
@@ -276,7 +276,7 @@ def forward(self, x, return_modes=False):
                 nwp_data = torch.clip(nwp_data, min=-50, max=50)
 
                 if self.add_image_embedding_channel:
-                    id = x[BatchKey.gsp_id][:, 0].int()
+                    id = x[BatchKey.gsp_id].int()
                     nwp_data = self.nwp_embed_dict[nwp_source](nwp_data, id)
 
                 nwp_out = self.nwp_encoders_dict[nwp_source](nwp_data)
@@ -301,7 +301,7 @@ def forward(self, x, return_modes=False):
 
         # ********************** Embedding of GSP ID ********************
         if self.embedding_dim:
-            id = x[BatchKey.gsp_id][:, 0].int()
+            id = x[BatchKey.gsp_id].int()
             id_embedding = self.embed(id)
             modes["id"] = id_embedding
 
 
@@ -106,40 +106,37 @@ def sample_train_val_datamodule():
 
         file_n = 0
 
-        for file in glob.glob("tests/test_data/sample_batches/train/*.pt"):
-            batch = torch.load(file)
+        for file_n, file in enumerate(glob.glob("tests/test_data/presaved_samples/train/*.pt")):
+            sample = torch.load(file)
 
             for i in range(n_duplicates):
                 # Save fopr both train and val
-                torch.save(batch, f"{tmpdirname}/train/{file_n:06}.pt")
-                torch.save(batch, f"{tmpdirname}/val/{file_n:06}.pt")
-
-                file_n += 1
+                torch.save(sample, f"{tmpdirname}/train/{file_n:06}.pt")
+                torch.save(sample, f"{tmpdirname}/val/{file_n:06}.pt")
 
         dm = DataModule(
             configuration=None,
+            sample_dir=f"{tmpdirname}",
             batch_size=2,
             num_workers=0,
             prefetch_factor=None,
             train_period=[None, None],
             val_period=[None, None],
-            test_period=[None, None],
-            batch_dir=f"{tmpdirname}",
+            
         )
         yield dm
 
 
 @pytest.fixture()
 def sample_datamodule():
     dm = DataModule(
+        sample_dir="tests/test_data/presaved_samples",
         configuration=None,
         batch_size=2,
         num_workers=0,
         prefetch_factor=None,
         train_period=[None, None],
         val_period=[None, None],
-        test_period=[None, None],
-        batch_dir="tests/test_data/sample_batches",
     )
     return dm
 
@@ -157,9 +154,10 @@ def sample_satellite_batch(sample_batch):
 
 
 @pytest.fixture()
-def sample_pv_batch(sample_batch):
-    pv_data = sample_batch[BatchKey.pv]
-    return pv_data
+def sample_pv_batch():
+    # TODO: Once PV site inputs are available from ocf-data-sampler UK regional remove these
+    # old batches. For now we use the old batches to test the site encoder models
+    return torch.load("tests/test_data/presaved_batches/train/000000.pt")
 
 
 @pytest.fixture()
@@ -191,7 +189,7 @@ def model_minutes_kwargs():
 def encoder_model_kwargs():
     # Used to test encoder model on satellite data
     kwargs = dict(
-        sequence_length=(90 - 30) // 5 + 1,
+        sequence_length=7, # 30 minutes of 5 minutely satellite data = 7 time steps
         image_size_pixels=24,
         in_channels=11,
         out_features=128,
@@ -240,23 +238,16 @@ def raw_multimodal_model_kwargs(model_minutes_kwargs):
             "ukv": dict(
                 _target_=pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet,
                 _partial_=True,
-                in_channels=2,
+                in_channels=11,
                 out_features=128,
                 number_of_conv3d_layers=6,
                 conv3d_channels=32,
                 image_size_pixels=24,
             ),
         },
         add_image_embedding_channel=True,
-        pv_encoder=dict(
-            _target_=pvnet.models.multimodal.site_encoders.encoders.SingleAttentionNetwork,
-            _partial_=True,
-            num_sites=349,
-            out_features=40,
-            num_heads=4,
-            kdim=40,
-            id_embed_dim=20,
-        ),
+        # ocf-data-sampler doesn't supprt PV site inputs yet 
+        pv_encoder=None,
         output_network=dict(
             _target_=pvnet.models.multimodal.linear_networks.networks.ResFCNet2,
             _partial_=True,
@@ -268,11 +259,10 @@ def raw_multimodal_model_kwargs(model_minutes_kwargs):
         embedding_dim=16,
         include_sun=True,
         include_gsp_yield_history=True,
-        sat_history_minutes=90,
+        sat_history_minutes=30,
         nwp_history_minutes={"ukv": 120},
         nwp_forecast_minutes={"ukv": 480},
-        pv_history_minutes=180,
-        min_sat_delay_minutes=30,
+        min_sat_delay_minutes=0,
     )
 
     kwargs.update(model_minutes_kwargs)
@@ -297,14 +287,6 @@ def multimodal_quantile_model(multimodal_model_kwargs):
     return model
 
 
-@pytest.fixture()
-def multimodal_weighted_quantile_model(multimodal_model_kwargs):
-    model = Model(
-        output_quantiles=[0.1, 0.5, 0.9], **multimodal_model_kwargs, use_weighted_loss=True
-    )
-    return model
-
-
 @pytest.fixture()
 def multimodal_quantile_model_ignore_minutes(multimodal_model_kwargs):
     """Only forecsat second half of the 8 hours"""
 
@@ -1,3 +1,4 @@
+import pytest
 from pvnet.data.datamodule import DataModule
 from pvnet.data.wind_datamodule import WindDataModule
 from pvnet.data.pv_site_datamodule import PVSiteDataModule
@@ -8,16 +9,16 @@
 def test_init():
     dm = DataModule(
         configuration=None,
+        sample_dir="tests/test_data/presaved_samples",
         batch_size=2,
         num_workers=0,
         prefetch_factor=None,
         train_period=[None, None],
         val_period=[None, None],
-        test_period=[None, None],
-        batch_dir="tests/test_data/sample_batches",
     )
 
 
+@pytest.mark.skip(reason="Has not been updated for ocf-data-sampler yet")
 def test_wind_init():
     dm = WindDataModule(
         configuration=None,
@@ -30,7 +31,7 @@ def test_wind_init():
         batch_dir="tests/data/sample_batches",
     )
 
-
+@pytest.mark.skip(reason="Has not been updated for ocf-data-sampler yet")
 def test_wind_init_with_nwp_filter():
     dm = WindDataModule(
         configuration=None,
@@ -53,6 +54,7 @@ def test_wind_init_with_nwp_filter():
     assert batch[BatchKey.nwp]["ecmwf"][NWPBatchKey.nwp].shape[2] == 2
 
 
+@pytest.mark.skip(reason="Has not been updated for ocf-data-sampler yet")
 def test_pv_site_init():
     dm = PVSiteDataModule(
         configuration=f"{os.path.dirname(os.path.abspath(__file__))}/test_data/sample_batches/data_configuration.yaml",
@@ -69,13 +71,12 @@ def test_pv_site_init():
 def test_iter():
     dm = DataModule(
         configuration=None,
+        sample_dir="tests/test_data/presaved_samples",
         batch_size=2,
         num_workers=0,
         prefetch_factor=None,
         train_period=[None, None],
         val_period=[None, None],
-        test_period=[None, None],
-        batch_dir="tests/test_data/sample_batches",
     )
 
     batch = next(iter(dm.train_dataloader()))
@@ -84,15 +85,21 @@ def test_iter():
 def test_iter_multiprocessing():
     dm = DataModule(
         configuration=None,
-        batch_size=2,
+        sample_dir="tests/test_data/presaved_samples",
+        batch_size=1,
         num_workers=2,
-        prefetch_factor=2,
+        prefetch_factor=1,
         train_period=[None, None],
         val_period=[None, None],
-        test_period=[None, None],
-        batch_dir="tests/test_data/sample_batches",
     )
 
-    batch = next(iter(dm.train_dataloader()))
+    served_batches = 0
     for batch in dm.train_dataloader():
-        pass
+        served_batches += 1
+
+        # Stop once we've got 2 batches
+        if served_batches==2:
+            break
+
+    # Make sure we've served 2 batches
+    assert served_batches == 2
@@ -9,6 +9,7 @@ def last_value_model(model_minutes_kwargs):
 
 
 def test_model_forward(last_value_model, sample_batch):
+    
     y = last_value_model(sample_batch)
 
     # check output is the correct shape
 
@@ -10,10 +10,10 @@
 import pytest
 
 
-def _test_model_forward(batch, model_class, kwargs):
+def _test_model_forward(batch, model_class, kwargs, batch_size):
     model = model_class(**kwargs)
     y = model(batch)
-    assert tuple(y.shape) == (2, kwargs["out_features"]), y.shape
+    assert tuple(y.shape) == (batch_size, kwargs["out_features"]), y.shape
 
 
 def _test_model_backward(batch, model_class, kwargs):
@@ -24,22 +24,37 @@ def _test_model_backward(batch, model_class, kwargs):
 
 
 # Test model forward on all models
-def test_simplelearnedaggregator_forward(sample_batch, site_encoder_model_kwargs):
-    _test_model_forward(sample_batch, SimpleLearnedAggregator, site_encoder_model_kwargs)
+def test_simplelearnedaggregator_forward(sample_pv_batch, site_encoder_model_kwargs):
+    _test_model_forward(
+        sample_pv_batch, 
+        SimpleLearnedAggregator, 
+        site_encoder_model_kwargs, 
+        batch_size=8,
+    )
 
 
-def test_singleattentionnetwork_forward(sample_batch, site_encoder_model_kwargs):
-    _test_model_forward(sample_batch, SingleAttentionNetwork, site_encoder_model_kwargs)
+def test_singleattentionnetwork_forward(sample_pv_batch, site_encoder_model_kwargs):
+    _test_model_forward(
+        sample_pv_batch, 
+        SingleAttentionNetwork, 
+        site_encoder_model_kwargs, 
+        batch_size=8,
+    )
 
 
 def test_singleattentionnetwork_forward_4d(sample_wind_batch, site_encoder_sensor_model_kwargs):
-    _test_model_forward(sample_wind_batch, SingleAttentionNetwork, site_encoder_sensor_model_kwargs)
+    _test_model_forward(
+        sample_wind_batch, 
+        SingleAttentionNetwork, 
+        site_encoder_sensor_model_kwargs, 
+        batch_size=2,
+    )
 
 
 # Test model backward on all models
-def test_simplelearnedaggregator_backward(sample_batch, site_encoder_model_kwargs):
-    _test_model_backward(sample_batch, SimpleLearnedAggregator, site_encoder_model_kwargs)
+def test_simplelearnedaggregator_backward(sample_pv_batch, site_encoder_model_kwargs):
+    _test_model_backward(sample_pv_batch, SimpleLearnedAggregator, site_encoder_model_kwargs)
 
 
-def test_singleattentionnetwork_backward(sample_batch, site_encoder_model_kwargs):
-    _test_model_backward(sample_batch, SingleAttentionNetwork, site_encoder_model_kwargs)
+def test_singleattentionnetwork_backward(sample_pv_batch, site_encoder_model_kwargs):
+    _test_model_backward(sample_pv_batch, SingleAttentionNetwork, site_encoder_model_kwargs)