diff --git a/mlforecast/core.py b/mlforecast/core.py index 040288c7..5ee05e53 100644 --- a/mlforecast/core.py +++ b/mlforecast/core.py @@ -703,6 +703,7 @@ def _predict_multi( result = df_constructor({self.id_col: uids, self.time_col: dates}) for name, model in models.items(): with self._backup(): + self._predict_setup() new_x = self._get_features_for_next_step(X_df) if before_predict_callback is not None: new_x = before_predict_callback(new_x) @@ -789,10 +790,16 @@ def predict( raise ValueError( f"The following features were provided through `X_df` but were considered as static during fit: {common}.\n" "Please re-run the fit step using the `static_features` argument to indicate which features are static. " - "If all your features are dynamic please pass an empty list (static_features=[])." + "If all your features are dynamic please provide an empty list (static_features=[])." ) starts = ufp.offset_times(self.last_dates, self.freq, 1) - ends = ufp.offset_times(self.last_dates, self.freq, horizon) + if getattr(self, "max_horizon", None) is None: + ends = ufp.offset_times(self.last_dates, self.freq, horizon) + expected_rows_X = len(self.uids) * horizon + else: + # direct approach uses only the immediate next timestamp + ends = starts + expected_rows_X = len(self.uids) dates_validation = type(X_df)( { self.id_col: self.uids, @@ -803,7 +810,7 @@ def predict( X_df = ufp.join(X_df, dates_validation, on=self.id_col) mask = ufp.between(X_df[self.time_col], X_df["_start"], X_df["_end"]) X_df = ufp.filter_with_mask(X_df, mask) - if X_df.shape[0] != len(self.uids) * horizon: + if X_df.shape[0] != expected_rows_X: msg = ( "Found missing inputs in X_df. " "It should have one row per id and time for the complete forecasting horizon.\n" diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 1c6cd012..97ce1ec7 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -1184,6 +1184,7 @@ " result = df_constructor({self.id_col: uids, self.time_col: dates})\n", " for name, model in models.items():\n", " with self._backup():\n", + " self._predict_setup()\n", " new_x = self._get_features_for_next_step(X_df)\n", " if before_predict_callback is not None:\n", " new_x = before_predict_callback(new_x)\n", @@ -1261,10 +1262,16 @@ " raise ValueError(\n", " f\"The following features were provided through `X_df` but were considered as static during fit: {common}.\\n\"\n", " \"Please re-run the fit step using the `static_features` argument to indicate which features are static. \"\n", - " \"If all your features are dynamic please pass an empty list (static_features=[]).\"\n", + " \"If all your features are dynamic please provide an empty list (static_features=[]).\"\n", " )\n", " starts = ufp.offset_times(self.last_dates, self.freq, 1)\n", - " ends = ufp.offset_times(self.last_dates, self.freq, horizon)\n", + " if getattr(self, 'max_horizon', None) is None:\n", + " ends = ufp.offset_times(self.last_dates, self.freq, horizon)\n", + " expected_rows_X = len(self.uids) * horizon\n", + " else:\n", + " # direct approach uses only the immediate next timestamp\n", + " ends = starts\n", + " expected_rows_X = len(self.uids)\n", " dates_validation = type(X_df)(\n", " {\n", " self.id_col: self.uids,\n", @@ -1275,7 +1282,7 @@ " X_df = ufp.join(X_df, dates_validation, on=self.id_col)\n", " mask = ufp.between(X_df[self.time_col], X_df['_start'], X_df['_end'])\n", " X_df = ufp.filter_with_mask(X_df, mask)\n", - " if X_df.shape[0] != len(self.uids) * horizon:\n", + " if X_df.shape[0] != expected_rows_X:\n", " msg = (\n", " \"Found missing inputs in X_df. \"\n", " \"It should have one row per id and time for the complete forecasting horizon.\\n\"\n", @@ -2015,7 +2022,7 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L757){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L758){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "## TimeSeries.predict\n", "\n", @@ -2029,7 +2036,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L757){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L758){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "## TimeSeries.predict\n", "\n", @@ -2167,7 +2174,7 @@ "text/markdown": [ "---\n", "\n", - "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L862){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L869){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "## TimeSeries.update\n", "\n", @@ -2180,7 +2187,7 @@ "text/plain": [ "---\n", "\n", - "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L862){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", + "[source](https://github.com/Nixtla/mlforecast/blob/main/mlforecast/core.py#L869){target=\"_blank\" style=\"float:right; font-size:smaller\"}\n", "\n", "## TimeSeries.update\n", "\n", diff --git a/nbs/forecast.ipynb b/nbs/forecast.ipynb index 38241517..552e02fd 100644 --- a/nbs/forecast.ipynb +++ b/nbs/forecast.ipynb @@ -1149,6 +1149,7 @@ "import numpy as np\n", "import xgboost as xgb\n", "from sklearn.linear_model import LinearRegression\n", + "from utilsforecast.feature_engineering import time_features\n", "from utilsforecast.plotting import plot_series\n", "\n", "from mlforecast.lag_transforms import ExpandingMean, ExponentiallyWeightedMean, RollingMean\n", @@ -5439,6 +5440,33 @@ "preds2 = fcst2.predict(10)\n", "pd.testing.assert_frame_equal(preds, preds2)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4a39447b-3d6b-4960-83c9-4f927c472ebb", + "metadata": {}, + "outputs": [], + "source": [ + "#| hide\n", + "# direct approach requires only one timestamp and produces same results for two models\n", + "series = generate_daily_series(5)\n", + "h = 5\n", + "freq = 'D'\n", + "train, future = time_features(series, freq=freq, features=['day'], h=h)\n", + "models = [LinearRegression(), lgb.LGBMRegressor(n_estimators=5)]\n", + "\n", + "fcst1 = MLForecast(models=models, freq=freq, date_features=['dayofweek'])\n", + "fcst1.fit(train, max_horizon=h, static_features=[])\n", + "preds1 = fcst1.predict(h=h, X_df=future) # extra timestamps\n", + "\n", + "fcst2 = MLForecast(models=models[::-1], freq=freq, date_features=['dayofweek'])\n", + "fcst2.fit(train, max_horizon=h, static_features=[])\n", + "X_df_one = future.groupby('unique_id', observed=True).head(1)\n", + "preds2 = fcst2.predict(h=h, X_df=X_df_one) # only needed timestamp\n", + "\n", + "pd.testing.assert_frame_equal(preds1, preds2[preds1.columns])" + ] } ], "metadata": {