From 27fb4a12c2fcce7188de7a9d56b94a0277aa4ea1 Mon Sep 17 00:00:00 2001
From: Himel Das <151542219+himelds@users.noreply.github.com>
Date: Wed, 22 Apr 2026 16:10:24 +0600
Subject: [PATCH 1/4] feat: add hyperparameter-searchable time series
 forecasting pipeline

---
 .../timeseries/__init__.py                    |   2 +
 .../timeseries/forecasting/__init__.py        |   2 +
 .../forecasting/test_functions/__init__.py    |   3 +-
 .../time_series_pipeline_forecaster.py        | 271 ++++++++++++++++++
 4 files changed, 277 insertions(+), 1 deletion(-)
 create mode 100644 src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py

diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py
index 9918fe48..a14e5e4d 100644
--- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py
@@ -11,6 +11,7 @@
     ExpSmoothingForecasterFunction,
     GradientBoostingForecasterFunction,
     RandomForestForecasterFunction,
+    TimeSeriesPipelineForecasterFunction,
 )
 
 __all__ = [
@@ -18,6 +19,7 @@
     "GradientBoostingForecasterFunction",
     "RandomForestForecasterFunction",
     "ExpSmoothingForecasterFunction",
+    "TimeSeriesPipelineForecasterFunction",
     # Classification
     "RandomForestTSClassifierFunction",
     "KNNTSClassifierFunction",
diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py
index 2346069a..c56b0ac8 100644
--- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py
@@ -6,10 +6,12 @@
     ExpSmoothingForecasterFunction,
     GradientBoostingForecasterFunction,
     RandomForestForecasterFunction,
+    TimeSeriesPipelineForecasterFunction,
 )
 
 __all__ = [
     "GradientBoostingForecasterFunction",
     "RandomForestForecasterFunction",
     "ExpSmoothingForecasterFunction",
+    "TimeSeriesPipelineForecasterFunction",
 ]
diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py
index fd78b384..d4118f8d 100644
--- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py
@@ -5,9 +5,10 @@
 from .exp_smoothing_forecaster import ExpSmoothingForecasterFunction
 from .gradient_boosting_forecaster import GradientBoostingForecasterFunction
 from .random_forest_forecaster import RandomForestForecasterFunction
-
+from .time_series_pipeline_forecaster import TimeSeriesPipelineForecasterFunction
 __all__ = [
     "GradientBoostingForecasterFunction",
     "RandomForestForecasterFunction",
     "ExpSmoothingForecasterFunction",
+    "TimeSeriesPipelineForecasterFunction",
 ]
diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
new file mode 100644
index 00000000..5c8463e3
--- /dev/null
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
@@ -0,0 +1,271 @@
+import numpy as np
+from typing import Any, Dict, List, Optional
+from numpy.lib.stride_tricks import sliding_window_view
+
+# model and preprocessing
+from sklearn.linear_model import Ridge
+from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.metrics import mean_absolute_error
+
+# Surfaces library base class and data
+from .._base_forecasting import BaseForecasting
+from ..datasets import DATASETS
+from surfaces.modifiers import BaseModifier
+
+def apply_time_series_features(
+    y: np.ndarray,
+    n_lags: int,
+    rolling_window: int,
+    differencing: int,
+) -> tuple[np.ndarray, np.ndarray]:
+    """
+    Build supervised learning features from a univariate time series.
+
+    Parameters
+    ----------
+    y            : 1-D array of observations (oldest → newest)
+    n_lags       : number of lag features (0 = none)
+    rolling_window: window size for rolling mean/std (0 = skip)
+    differencing : order of differencing applied before feature extraction
+    """
+    if n_lags == 0 and rolling_window == 0:
+        raise ValueError("At least one of n_lags or rolling_window must be > 0.")
+
+    if differencing > 0:
+        y = np.diff(y, n=differencing)
+
+    n_samples = len(y)
+    offset = max(n_lags, rolling_window)
+
+    if n_samples <= offset:
+        raise ValueError(
+            f"Series length {n_samples} is too short for "
+            f"n_lags={n_lags} / rolling_window={rolling_window}."
+        )
+
+    features = []
+
+    # Lag features
+    for lag in range(1, n_lags + 1):
+        features.append(y[offset - lag : n_samples - lag])
+
+    # Vectorised rolling statistics
+    if rolling_window > 0:
+        windows = sliding_window_view(y, window_shape=rolling_window)
+        # windows has shape (n_samples - rolling_window + 1, rolling_window)
+        # align to the same offset used by lag features
+        start = offset - rolling_window
+        features.append(windows[start:].mean(axis=1))
+        features.append(windows[start:].std(axis=1, ddof=1))
+
+    X = np.column_stack(features)
+    y_target = y[offset:]
+
+    return X, y_target
+
+class TimeSeriesPipelineForecasterFunction(BaseForecasting):
+    """
+    A hyperparameter-searchable time series forecasting pipeline that combines:
+      - Lag features and rolling statistics for feature engineering
+      - Optional differencing for stationarity
+      - Choice of scaler (none / standard / minmax)
+      - Choice of model (Ridge / RandomForest / GradientBoosting)
+      - Model-specific regularization parameters
+
+    The objective function returns negative MAE (higher = better),
+    compatible with a maximising optimiser.
+
+    Parameters
+    ----------
+    dataset      : Name of the dataset to load (must be a key in DATASETS).
+    objective    : Optimisation direction, default "maximize".
+    modifiers    : Optional list of BaseModifier instances.
+    memory       : Whether to enable caching in the base class.
+    collect_data : Whether to collect evaluation data in the base class.
+    train_size   : Fraction of data used for training (default 0.8).
+    **kwargs     : Passed through to BaseForecasting.
+    """
+
+    _name_ = "time_series_pipeline_forecaster"
+    _dependencies = {"ml": ["sklearn"]}
+
+    search_space_default = {
+        # Feature engineering
+        "n_lags": [3, 5, 7, 10, 14, 21],
+        "rolling_window": [0, 3, 7, 14],
+        "differencing": [0, 1, 2],
+        # Preprocessing
+        "scaler": ["none", "standard", "minmax"],
+        # Model selection
+        "model": ["ridge", "rf", "gb"],
+        # Model-specific regularization
+        # Ridge  -> alpha       (larger = stronger regularisation)
+        # RF     -> max_depth   (cast to int; larger = more complex)
+        # GB     -> learning_rate (smaller = more conservative)
+        "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0],
+    }
+
+    def __init__(
+        self,
+        dataset: str = "airline",
+        objective: str = "maximize",
+        modifiers: Optional[List] = None,
+        memory: bool = False,
+        collect_data: bool = True,
+        train_size: float = 0.8,
+        **kwargs: Any,
+    ) -> None:
+        if dataset not in DATASETS:
+            raise ValueError(
+                f"Unknown dataset '{dataset}'. "
+                f"Available datasets: {list(DATASETS.keys())}"
+            )
+        if not 0.0 < train_size < 1.0:
+            raise ValueError(
+                f"train_size must be between 0 and 1 exclusive, got {train_size}."
+            )
+
+        self.dataset = dataset
+        self.train_size = train_size
+        self._dataset_loader = DATASETS[dataset]
+        self._cached_data: Optional[tuple] = None
+
+        super().__init__(
+            objective=objective,
+            modifiers=modifiers,
+            memory=memory,
+            collect_data=collect_data,
+            **kwargs,
+        )
+
+    # ------------------------------------------------------------------
+    # Data loading
+    # ------------------------------------------------------------------
+
+    def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]:
+        """
+        Load and cache the dataset.  Returns (X_raw, y_raw) where
+        y_raw is the univariate target series used for feature engineering.
+        """
+        if self._cached_data is None:
+            self._cached_data = self._dataset_loader()
+        return self._cached_data
+
+    # ------------------------------------------------------------------
+    # Scaler factory
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _build_scaler(scaler_type: str):
+        """Return a fitted-ready scaler instance, or None for 'none'."""
+        if scaler_type == "standard":
+            return StandardScaler()
+        if scaler_type == "minmax":
+            return MinMaxScaler()
+        if scaler_type == "none":
+            return None
+        raise ValueError(f"Unknown scaler type: {scaler_type!r}")
+
+    # ------------------------------------------------------------------
+    # Model factory
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _build_model(model_type: str, reg: float):
+        """
+        Construct a scikit-learn regressor from the model type and the
+        shared regularization parameter, mapped per-model as follows:
+
+            ridge -> alpha        (float, e.g. 0.001 – 10.0)
+            rf    -> max_depth    (int cast of reg, clipped to >= 1)
+            gb    -> learning_rate (float, e.g. 0.001 – 1.0)
+        """
+        if model_type == "ridge":
+            return Ridge(alpha=reg)
+
+        if model_type == "rf":
+            return RandomForestRegressor(
+                n_estimators=100,
+                max_depth=max(1, int(reg)),
+                random_state=42,
+            )
+
+        if model_type == "gb":
+            return GradientBoostingRegressor(
+                n_estimators=100,
+                learning_rate=float(np.clip(reg, 1e-4, 1.0)),
+                random_state=42,
+            )
+
+        raise ValueError(f"Unknown model type: {model_type!r}")
+
+    # ------------------------------------------------------------------
+    # Objective
+    # ------------------------------------------------------------------
+
+    def _ml_objective(self, params: Dict[str, Any]) -> float:
+        """
+        Evaluate a single hyperparameter configuration.
+
+        Steps
+        -----
+        1. Load (cached) raw series.
+        2. Apply differencing, lag features, and rolling statistics.
+        3. Chronological train/test split.
+        4. Optionally scale features.
+        5. Fit the chosen model and return negative MAE.
+
+        Returns
+        -------
+        float
+            Negative MAE — higher is better, compatible with maximisation.
+        """
+        # 1. Raw data
+        _, y_raw = self._get_training_data()
+
+        # 2. Feature engineering
+        try:
+            X, y = apply_time_series_features(
+                y_raw,
+                n_lags=params["n_lags"],
+                rolling_window=params["rolling_window"],
+                differencing=params["differencing"],
+            )
+        except ValueError as exc:
+            # Config produced an unusable feature matrix (e.g. series too short)
+            # Return a very poor score so the optimiser discards this config.
+            return -float("inf")
+
+        # 3. Chronological split
+        split_idx = int(len(X) * self.train_size)
+        if split_idx == 0 or split_idx == len(X):
+            # Degenerate split — not enough data for this param combination
+            return -float("inf")
+
+        X_train, X_test = X[:split_idx], X[split_idx:]
+        y_train, y_test = y[:split_idx], y[split_idx:]
+
+        # 4. Scaling
+        scaler = self._build_scaler(params["scaler"])
+        if scaler is not None:
+            X_train = scaler.fit_transform(X_train)
+            X_test = scaler.transform(X_test)
+
+        # 5. Model training and evaluation
+        model = self._build_model(params["model"], params["model__regularization"])
+        model.fit(X_train, y_train)
+        mae = mean_absolute_error(y_test, model.predict(X_test))
+
+        return -mae
+
+    # ------------------------------------------------------------------
+    # Dunder helpers
+    # ------------------------------------------------------------------
+
+    def __repr__(self) -> str:
+        return (
+            f"{self.__class__.__name__}("
+            f"dataset={self.dataset!r}, "
+            f"train_size={self.train_size!r})"
+        )
\ No newline at end of file

From beedeeca3f0786a2f53321745111c6e41130fd44 Mon Sep 17 00:00:00 2001
From: Himel Das <himeldas077@gmail.com>
Date: Sat, 25 Apr 2026 22:06:54 +0600
Subject: [PATCH 2/4] Compliance: ensure all ML attributes and lazy imports
 follow library standards

---
 .../time_series_pipeline_forecaster.py        | 87 ++++++++++++-------
 tests/full/suites/test_ml.py                  | 17 ++++
 2 files changed, 71 insertions(+), 33 deletions(-)

diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
index 5c8463e3..1bec0033 100644
--- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
@@ -2,16 +2,10 @@
 from typing import Any, Dict, List, Optional
 from numpy.lib.stride_tricks import sliding_window_view
 
-# model and preprocessing
-from sklearn.linear_model import Ridge
-from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
-from sklearn.metrics import mean_absolute_error
-
 # Surfaces library base class and data
 from .._base_forecasting import BaseForecasting
 from ..datasets import DATASETS
-from surfaces.modifiers import BaseModifier
+
 
 def apply_time_series_features(
     y: np.ndarray,
@@ -90,22 +84,42 @@ class TimeSeriesPipelineForecasterFunction(BaseForecasting):
     _name_ = "time_series_pipeline_forecaster"
     _dependencies = {"ml": ["sklearn"]}
 
-    search_space_default = {
-        # Feature engineering
-        "n_lags": [3, 5, 7, 10, 14, 21],
-        "rolling_window": [0, 3, 7, 14],
-        "differencing": [0, 1, 2],
-        # Preprocessing
-        "scaler": ["none", "standard", "minmax"],
-        # Model selection
-        "model": ["ridge", "rf", "gb"],
-        # Model-specific regularization
-        # Ridge  -> alpha       (larger = stronger regularisation)
-        # RF     -> max_depth   (cast to int; larger = more complex)
-        # GB     -> learning_rate (smaller = more conservative)
-        "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0],
-    }
-
+    para_names = [
+        "n_lags",
+        "rolling_window",
+        "differencing",
+        "scaler",
+        "model",
+        "model__regularization"
+    ]
+
+    n_lags_default = [3, 5, 7, 10, 14, 21]
+    rolling_window_default = [0, 3, 7, 14]
+    differencing_default = [0, 1, 2]
+    scaler_default = ["none", "standard", "minmax"]
+    model_default = ["ridge", "rf", "gb"]
+    model__regularization_default = [0.001, 0.01, 0.1, 1.0, 10.0]
+
+    def _default_search_space(self) -> Dict[str, List]:
+        """Define the default hyperparameter search space for this function."""
+
+        return {
+            "n_lags": [3, 5, 7, 10, 14, 21],
+            "rolling_window": [0, 3, 7, 14],
+            "differencing": [0, 1, 2],
+            "scaler": ["none", "standard", "minmax"],
+            "model": ["ridge", "rf", "gb"],
+            "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0],
+        }
+    
+    def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Include fixed parameters for surrogate model support."""
+        return {
+            **params,
+            "dataset": self.dataset,
+            "train_size": self.train_size
+        }
+    
     def __init__(
         self,
         dataset: str = "airline",
@@ -139,9 +153,9 @@ def __init__(
             **kwargs,
         )
 
-    # ------------------------------------------------------------------
+
     # Data loading
-    # ------------------------------------------------------------------
+
 
     def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]:
         """
@@ -152,9 +166,9 @@ def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]:
             self._cached_data = self._dataset_loader()
         return self._cached_data
 
-    # ------------------------------------------------------------------
+
     # Scaler factory
-    # ------------------------------------------------------------------
+
 
     @staticmethod
     def _build_scaler(scaler_type: str):
@@ -167,9 +181,9 @@ def _build_scaler(scaler_type: str):
             return None
         raise ValueError(f"Unknown scaler type: {scaler_type!r}")
 
-    # ------------------------------------------------------------------
+
     # Model factory
-    # ------------------------------------------------------------------
+
 
     @staticmethod
     def _build_model(model_type: str, reg: float):
@@ -181,6 +195,9 @@ def _build_model(model_type: str, reg: float):
             rf    -> max_depth    (int cast of reg, clipped to >= 1)
             gb    -> learning_rate (float, e.g. 0.001 – 1.0)
         """
+        from sklearn.linear_model import Ridge
+        from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+
         if model_type == "ridge":
             return Ridge(alpha=reg)
 
@@ -200,9 +217,9 @@ def _build_model(model_type: str, reg: float):
 
         raise ValueError(f"Unknown model type: {model_type!r}")
 
-    # ------------------------------------------------------------------
+
     # Objective
-    # ------------------------------------------------------------------
+
 
     def _ml_objective(self, params: Dict[str, Any]) -> float:
         """
@@ -221,6 +238,10 @@ def _ml_objective(self, params: Dict[str, Any]) -> float:
         float
             Negative MAE — higher is better, compatible with maximisation.
         """
+        # model and preprocessing
+        from sklearn.preprocessing import StandardScaler, MinMaxScaler
+        from sklearn.metrics import mean_absolute_error
+
         # 1. Raw data
         _, y_raw = self._get_training_data()
 
@@ -259,9 +280,9 @@ def _ml_objective(self, params: Dict[str, Any]) -> float:
 
         return -mae
 
-    # ------------------------------------------------------------------
+
     # Dunder helpers
-    # ------------------------------------------------------------------
+
 
     def __repr__(self) -> str:
         return (
diff --git a/tests/full/suites/test_ml.py b/tests/full/suites/test_ml.py
index 4d81de33..d4d67cf9 100644
--- a/tests/full/suites/test_ml.py
+++ b/tests/full/suites/test_ml.py
@@ -413,3 +413,20 @@ def test_polynomial_feature_transformation(self, quick_ml_params):
 
         assert isinstance(result, (int, float))
         assert np.isfinite(result)
+
+@pytest.mark.ml
+class TestTimeSeriesFunctions:
+    """Test time-series ML functions."""
+
+    def test_time_series_pipeline_forecaster(self):
+        """TimeSeriesPipelineForecasterFunction evaluates correctly."""
+
+        from surfaces.test_functions.machine_learning.hyperparameter_optimization.timeseries.forecasting.test_functions.time_series_pipeline_forecaster import TimeSeriesPipelineForecasterFunction
+
+        func = TimeSeriesPipelineForecasterFunction()
+        params = get_sample_params(func)
+
+        result = func(params)
+
+        assert isinstance(result, (int, float))
+        assert np.isfinite(result)

From fa7151ef2e85b2df04d56deba0f546709bf8d56c Mon Sep 17 00:00:00 2001
From: Himel Das <himeldas077@gmail.com>
Date: Sat, 25 Apr 2026 22:40:22 +0600
Subject: [PATCH 3/4] Cleanup: removed unused imports and verified with full
 test suite

---
 .../test_functions/time_series_pipeline_forecaster.py            | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
index 1bec0033..de259cbf 100644
--- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
@@ -239,7 +239,6 @@ def _ml_objective(self, params: Dict[str, Any]) -> float:
             Negative MAE — higher is better, compatible with maximisation.
         """
         # model and preprocessing
-        from sklearn.preprocessing import StandardScaler, MinMaxScaler
         from sklearn.metrics import mean_absolute_error
 
         # 1. Raw data

From e72427dea72f835f7109023f65f478e7c6109a77 Mon Sep 17 00:00:00 2001
From: Himel Das <himeldas077@gmail.com>
Date: Sat, 25 Apr 2026 23:26:05 +0600
Subject: [PATCH 4/4] Final fix: resolved code quality and lazy import issues

---
 .../time_series_pipeline_forecaster.py        | 112 +++---------------
 1 file changed, 18 insertions(+), 94 deletions(-)

diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
index de259cbf..ab2996b7 100644
--- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
+++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py
@@ -47,8 +47,6 @@ def apply_time_series_features(
     # Vectorised rolling statistics
     if rolling_window > 0:
         windows = sliding_window_view(y, window_shape=rolling_window)
-        # windows has shape (n_samples - rolling_window + 1, rolling_window)
-        # align to the same offset used by lag features
         start = offset - rolling_window
         features.append(windows[start:].mean(axis=1))
         features.append(windows[start:].std(axis=1, ddof=1))
@@ -58,27 +56,12 @@ def apply_time_series_features(
 
     return X, y_target
 
+
 class TimeSeriesPipelineForecasterFunction(BaseForecasting):
     """
-    A hyperparameter-searchable time series forecasting pipeline that combines:
-      - Lag features and rolling statistics for feature engineering
-      - Optional differencing for stationarity
-      - Choice of scaler (none / standard / minmax)
-      - Choice of model (Ridge / RandomForest / GradientBoosting)
-      - Model-specific regularization parameters
-
-    The objective function returns negative MAE (higher = better),
-    compatible with a maximising optimiser.
+    A hyperparameter-searchable time series forecasting pipeline.
 
-    Parameters
-    ----------
-    dataset      : Name of the dataset to load (must be a key in DATASETS).
-    objective    : Optimisation direction, default "maximize".
-    modifiers    : Optional list of BaseModifier instances.
-    memory       : Whether to enable caching in the base class.
-    collect_data : Whether to collect evaluation data in the base class.
-    train_size   : Fraction of data used for training (default 0.8).
-    **kwargs     : Passed through to BaseForecasting.
+    The objective function returns negative MAE (higher = better).
     """
 
     _name_ = "time_series_pipeline_forecaster"
@@ -101,8 +84,7 @@ class TimeSeriesPipelineForecasterFunction(BaseForecasting):
     model__regularization_default = [0.001, 0.01, 0.1, 1.0, 10.0]
 
     def _default_search_space(self) -> Dict[str, List]:
-        """Define the default hyperparameter search space for this function."""
-
+        """Define the default hyperparameter search space."""
         return {
             "n_lags": [3, 5, 7, 10, 14, 21],
             "rolling_window": [0, 3, 7, 14],
@@ -111,7 +93,7 @@ def _default_search_space(self) -> Dict[str, List]:
             "model": ["ridge", "rf", "gb"],
             "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0],
         }
-    
+
     def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
         """Include fixed parameters for surrogate model support."""
         return {
@@ -119,7 +101,7 @@ def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]:
             "dataset": self.dataset,
             "train_size": self.train_size
         }
-    
+
     def __init__(
         self,
         dataset: str = "airline",
@@ -131,14 +113,9 @@ def __init__(
         **kwargs: Any,
     ) -> None:
         if dataset not in DATASETS:
-            raise ValueError(
-                f"Unknown dataset '{dataset}'. "
-                f"Available datasets: {list(DATASETS.keys())}"
-            )
+            raise ValueError(f"Unknown dataset '{dataset}'.")
         if not 0.0 < train_size < 1.0:
-            raise ValueError(
-                f"train_size must be between 0 and 1 exclusive, got {train_size}."
-            )
+            raise ValueError("train_size must be between 0 and 1 exclusive.")
 
         self.dataset = dataset
         self.train_size = train_size
@@ -153,26 +130,16 @@ def __init__(
             **kwargs,
         )
 
-
-    # Data loading
-
-
     def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]:
-        """
-        Load and cache the dataset.  Returns (X_raw, y_raw) where
-        y_raw is the univariate target series used for feature engineering.
-        """
+        """Load and cache the dataset."""
         if self._cached_data is None:
             self._cached_data = self._dataset_loader()
         return self._cached_data
 
-
-    # Scaler factory
-
-
     @staticmethod
     def _build_scaler(scaler_type: str):
-        """Return a fitted-ready scaler instance, or None for 'none'."""
+        """Return a fitted-ready scaler instance with Lazy Import."""
+        from sklearn.preprocessing import StandardScaler, MinMaxScaler
         if scaler_type == "standard":
             return StandardScaler()
         if scaler_type == "minmax":
@@ -181,70 +148,37 @@ def _build_scaler(scaler_type: str):
             return None
         raise ValueError(f"Unknown scaler type: {scaler_type!r}")
 
-
-    # Model factory
-
-
     @staticmethod
     def _build_model(model_type: str, reg: float):
-        """
-        Construct a scikit-learn regressor from the model type and the
-        shared regularization parameter, mapped per-model as follows:
-
-            ridge -> alpha        (float, e.g. 0.001 – 10.0)
-            rf    -> max_depth    (int cast of reg, clipped to >= 1)
-            gb    -> learning_rate (float, e.g. 0.001 – 1.0)
-        """
+        """Construct a scikit-learn regressor."""
         from sklearn.linear_model import Ridge
-        from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
+        from sklearn.ensemble import (
+            RandomForestRegressor,
+            GradientBoostingRegressor
+        )
 
         if model_type == "ridge":
             return Ridge(alpha=reg)
-
         if model_type == "rf":
             return RandomForestRegressor(
                 n_estimators=100,
                 max_depth=max(1, int(reg)),
                 random_state=42,
             )
-
         if model_type == "gb":
             return GradientBoostingRegressor(
                 n_estimators=100,
                 learning_rate=float(np.clip(reg, 1e-4, 1.0)),
                 random_state=42,
             )
-
         raise ValueError(f"Unknown model type: {model_type!r}")
 
-
-    # Objective
-
-
     def _ml_objective(self, params: Dict[str, Any]) -> float:
-        """
-        Evaluate a single hyperparameter configuration.
-
-        Steps
-        -----
-        1. Load (cached) raw series.
-        2. Apply differencing, lag features, and rolling statistics.
-        3. Chronological train/test split.
-        4. Optionally scale features.
-        5. Fit the chosen model and return negative MAE.
-
-        Returns
-        -------
-        float
-            Negative MAE — higher is better, compatible with maximisation.
-        """
-        # model and preprocessing
+        """Evaluate a single hyperparameter configuration."""
         from sklearn.metrics import mean_absolute_error
 
-        # 1. Raw data
         _, y_raw = self._get_training_data()
 
-        # 2. Feature engineering
         try:
             X, y = apply_time_series_features(
                 y_raw,
@@ -252,37 +186,27 @@ def _ml_objective(self, params: Dict[str, Any]) -> float:
                 rolling_window=params["rolling_window"],
                 differencing=params["differencing"],
             )
-        except ValueError as exc:
-            # Config produced an unusable feature matrix (e.g. series too short)
-            # Return a very poor score so the optimiser discards this config.
+        except ValueError:
             return -float("inf")
 
-        # 3. Chronological split
         split_idx = int(len(X) * self.train_size)
         if split_idx == 0 or split_idx == len(X):
-            # Degenerate split — not enough data for this param combination
             return -float("inf")
 
         X_train, X_test = X[:split_idx], X[split_idx:]
         y_train, y_test = y[:split_idx], y[split_idx:]
 
-        # 4. Scaling
         scaler = self._build_scaler(params["scaler"])
         if scaler is not None:
             X_train = scaler.fit_transform(X_train)
             X_test = scaler.transform(X_test)
 
-        # 5. Model training and evaluation
         model = self._build_model(params["model"], params["model__regularization"])
         model.fit(X_train, y_train)
         mae = mean_absolute_error(y_test, model.predict(X_test))
 
         return -mae
 
-
-    # Dunder helpers
-
-
     def __repr__(self) -> str:
         return (
             f"{self.__class__.__name__}("