From 27fb4a12c2fcce7188de7a9d56b94a0277aa4ea1 Mon Sep 17 00:00:00 2001 From: Himel Das <151542219+himelds@users.noreply.github.com> Date: Wed, 22 Apr 2026 16:10:24 +0600 Subject: [PATCH 1/4] feat: add hyperparameter-searchable time series forecasting pipeline --- .../timeseries/__init__.py | 2 + .../timeseries/forecasting/__init__.py | 2 + .../forecasting/test_functions/__init__.py | 3 +- .../time_series_pipeline_forecaster.py | 271 ++++++++++++++++++ 4 files changed, 277 insertions(+), 1 deletion(-) create mode 100644 src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py index 9918fe48..a14e5e4d 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/__init__.py @@ -11,6 +11,7 @@ ExpSmoothingForecasterFunction, GradientBoostingForecasterFunction, RandomForestForecasterFunction, + TimeSeriesPipelineForecasterFunction, ) __all__ = [ @@ -18,6 +19,7 @@ "GradientBoostingForecasterFunction", "RandomForestForecasterFunction", "ExpSmoothingForecasterFunction", + "TimeSeriesPipelineForecasterFunction", # Classification "RandomForestTSClassifierFunction", "KNNTSClassifierFunction", diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py index 2346069a..c56b0ac8 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/__init__.py @@ -6,10 +6,12 @@ ExpSmoothingForecasterFunction, GradientBoostingForecasterFunction, RandomForestForecasterFunction, + TimeSeriesPipelineForecasterFunction, ) __all__ = [ "GradientBoostingForecasterFunction", "RandomForestForecasterFunction", "ExpSmoothingForecasterFunction", + "TimeSeriesPipelineForecasterFunction", ] diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py index fd78b384..d4118f8d 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/__init__.py @@ -5,9 +5,10 @@ from .exp_smoothing_forecaster import ExpSmoothingForecasterFunction from .gradient_boosting_forecaster import GradientBoostingForecasterFunction from .random_forest_forecaster import RandomForestForecasterFunction - +from .time_series_pipeline_forecaster import TimeSeriesPipelineForecasterFunction __all__ = [ "GradientBoostingForecasterFunction", "RandomForestForecasterFunction", "ExpSmoothingForecasterFunction", + "TimeSeriesPipelineForecasterFunction", ] diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py new file mode 100644 index 00000000..5c8463e3 --- /dev/null +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py @@ -0,0 +1,271 @@ +import numpy as np +from typing import Any, Dict, List, Optional +from numpy.lib.stride_tricks import sliding_window_view + +# model and preprocessing +from sklearn.linear_model import Ridge +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor +from sklearn.preprocessing import StandardScaler, MinMaxScaler +from sklearn.metrics import mean_absolute_error + +# Surfaces library base class and data +from .._base_forecasting import BaseForecasting +from ..datasets import DATASETS +from surfaces.modifiers import BaseModifier + +def apply_time_series_features( + y: np.ndarray, + n_lags: int, + rolling_window: int, + differencing: int, +) -> tuple[np.ndarray, np.ndarray]: + """ + Build supervised learning features from a univariate time series. + + Parameters + ---------- + y : 1-D array of observations (oldest → newest) + n_lags : number of lag features (0 = none) + rolling_window: window size for rolling mean/std (0 = skip) + differencing : order of differencing applied before feature extraction + """ + if n_lags == 0 and rolling_window == 0: + raise ValueError("At least one of n_lags or rolling_window must be > 0.") + + if differencing > 0: + y = np.diff(y, n=differencing) + + n_samples = len(y) + offset = max(n_lags, rolling_window) + + if n_samples <= offset: + raise ValueError( + f"Series length {n_samples} is too short for " + f"n_lags={n_lags} / rolling_window={rolling_window}." + ) + + features = [] + + # Lag features + for lag in range(1, n_lags + 1): + features.append(y[offset - lag : n_samples - lag]) + + # Vectorised rolling statistics + if rolling_window > 0: + windows = sliding_window_view(y, window_shape=rolling_window) + # windows has shape (n_samples - rolling_window + 1, rolling_window) + # align to the same offset used by lag features + start = offset - rolling_window + features.append(windows[start:].mean(axis=1)) + features.append(windows[start:].std(axis=1, ddof=1)) + + X = np.column_stack(features) + y_target = y[offset:] + + return X, y_target + +class TimeSeriesPipelineForecasterFunction(BaseForecasting): + """ + A hyperparameter-searchable time series forecasting pipeline that combines: + - Lag features and rolling statistics for feature engineering + - Optional differencing for stationarity + - Choice of scaler (none / standard / minmax) + - Choice of model (Ridge / RandomForest / GradientBoosting) + - Model-specific regularization parameters + + The objective function returns negative MAE (higher = better), + compatible with a maximising optimiser. + + Parameters + ---------- + dataset : Name of the dataset to load (must be a key in DATASETS). + objective : Optimisation direction, default "maximize". + modifiers : Optional list of BaseModifier instances. + memory : Whether to enable caching in the base class. + collect_data : Whether to collect evaluation data in the base class. + train_size : Fraction of data used for training (default 0.8). + **kwargs : Passed through to BaseForecasting. + """ + + _name_ = "time_series_pipeline_forecaster" + _dependencies = {"ml": ["sklearn"]} + + search_space_default = { + # Feature engineering + "n_lags": [3, 5, 7, 10, 14, 21], + "rolling_window": [0, 3, 7, 14], + "differencing": [0, 1, 2], + # Preprocessing + "scaler": ["none", "standard", "minmax"], + # Model selection + "model": ["ridge", "rf", "gb"], + # Model-specific regularization + # Ridge -> alpha (larger = stronger regularisation) + # RF -> max_depth (cast to int; larger = more complex) + # GB -> learning_rate (smaller = more conservative) + "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0], + } + + def __init__( + self, + dataset: str = "airline", + objective: str = "maximize", + modifiers: Optional[List] = None, + memory: bool = False, + collect_data: bool = True, + train_size: float = 0.8, + **kwargs: Any, + ) -> None: + if dataset not in DATASETS: + raise ValueError( + f"Unknown dataset '{dataset}'. " + f"Available datasets: {list(DATASETS.keys())}" + ) + if not 0.0 < train_size < 1.0: + raise ValueError( + f"train_size must be between 0 and 1 exclusive, got {train_size}." + ) + + self.dataset = dataset + self.train_size = train_size + self._dataset_loader = DATASETS[dataset] + self._cached_data: Optional[tuple] = None + + super().__init__( + objective=objective, + modifiers=modifiers, + memory=memory, + collect_data=collect_data, + **kwargs, + ) + + # ------------------------------------------------------------------ + # Data loading + # ------------------------------------------------------------------ + + def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]: + """ + Load and cache the dataset. Returns (X_raw, y_raw) where + y_raw is the univariate target series used for feature engineering. + """ + if self._cached_data is None: + self._cached_data = self._dataset_loader() + return self._cached_data + + # ------------------------------------------------------------------ + # Scaler factory + # ------------------------------------------------------------------ + + @staticmethod + def _build_scaler(scaler_type: str): + """Return a fitted-ready scaler instance, or None for 'none'.""" + if scaler_type == "standard": + return StandardScaler() + if scaler_type == "minmax": + return MinMaxScaler() + if scaler_type == "none": + return None + raise ValueError(f"Unknown scaler type: {scaler_type!r}") + + # ------------------------------------------------------------------ + # Model factory + # ------------------------------------------------------------------ + + @staticmethod + def _build_model(model_type: str, reg: float): + """ + Construct a scikit-learn regressor from the model type and the + shared regularization parameter, mapped per-model as follows: + + ridge -> alpha (float, e.g. 0.001 – 10.0) + rf -> max_depth (int cast of reg, clipped to >= 1) + gb -> learning_rate (float, e.g. 0.001 – 1.0) + """ + if model_type == "ridge": + return Ridge(alpha=reg) + + if model_type == "rf": + return RandomForestRegressor( + n_estimators=100, + max_depth=max(1, int(reg)), + random_state=42, + ) + + if model_type == "gb": + return GradientBoostingRegressor( + n_estimators=100, + learning_rate=float(np.clip(reg, 1e-4, 1.0)), + random_state=42, + ) + + raise ValueError(f"Unknown model type: {model_type!r}") + + # ------------------------------------------------------------------ + # Objective + # ------------------------------------------------------------------ + + def _ml_objective(self, params: Dict[str, Any]) -> float: + """ + Evaluate a single hyperparameter configuration. + + Steps + ----- + 1. Load (cached) raw series. + 2. Apply differencing, lag features, and rolling statistics. + 3. Chronological train/test split. + 4. Optionally scale features. + 5. Fit the chosen model and return negative MAE. + + Returns + ------- + float + Negative MAE — higher is better, compatible with maximisation. + """ + # 1. Raw data + _, y_raw = self._get_training_data() + + # 2. Feature engineering + try: + X, y = apply_time_series_features( + y_raw, + n_lags=params["n_lags"], + rolling_window=params["rolling_window"], + differencing=params["differencing"], + ) + except ValueError as exc: + # Config produced an unusable feature matrix (e.g. series too short) + # Return a very poor score so the optimiser discards this config. + return -float("inf") + + # 3. Chronological split + split_idx = int(len(X) * self.train_size) + if split_idx == 0 or split_idx == len(X): + # Degenerate split — not enough data for this param combination + return -float("inf") + + X_train, X_test = X[:split_idx], X[split_idx:] + y_train, y_test = y[:split_idx], y[split_idx:] + + # 4. Scaling + scaler = self._build_scaler(params["scaler"]) + if scaler is not None: + X_train = scaler.fit_transform(X_train) + X_test = scaler.transform(X_test) + + # 5. Model training and evaluation + model = self._build_model(params["model"], params["model__regularization"]) + model.fit(X_train, y_train) + mae = mean_absolute_error(y_test, model.predict(X_test)) + + return -mae + + # ------------------------------------------------------------------ + # Dunder helpers + # ------------------------------------------------------------------ + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(" + f"dataset={self.dataset!r}, " + f"train_size={self.train_size!r})" + ) \ No newline at end of file From beedeeca3f0786a2f53321745111c6e41130fd44 Mon Sep 17 00:00:00 2001 From: Himel Das Date: Sat, 25 Apr 2026 22:06:54 +0600 Subject: [PATCH 2/4] Compliance: ensure all ML attributes and lazy imports follow library standards --- .../time_series_pipeline_forecaster.py | 87 ++++++++++++------- tests/full/suites/test_ml.py | 17 ++++ 2 files changed, 71 insertions(+), 33 deletions(-) diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py index 5c8463e3..1bec0033 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py @@ -2,16 +2,10 @@ from typing import Any, Dict, List, Optional from numpy.lib.stride_tricks import sliding_window_view -# model and preprocessing -from sklearn.linear_model import Ridge -from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor -from sklearn.preprocessing import StandardScaler, MinMaxScaler -from sklearn.metrics import mean_absolute_error - # Surfaces library base class and data from .._base_forecasting import BaseForecasting from ..datasets import DATASETS -from surfaces.modifiers import BaseModifier + def apply_time_series_features( y: np.ndarray, @@ -90,22 +84,42 @@ class TimeSeriesPipelineForecasterFunction(BaseForecasting): _name_ = "time_series_pipeline_forecaster" _dependencies = {"ml": ["sklearn"]} - search_space_default = { - # Feature engineering - "n_lags": [3, 5, 7, 10, 14, 21], - "rolling_window": [0, 3, 7, 14], - "differencing": [0, 1, 2], - # Preprocessing - "scaler": ["none", "standard", "minmax"], - # Model selection - "model": ["ridge", "rf", "gb"], - # Model-specific regularization - # Ridge -> alpha (larger = stronger regularisation) - # RF -> max_depth (cast to int; larger = more complex) - # GB -> learning_rate (smaller = more conservative) - "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0], - } - + para_names = [ + "n_lags", + "rolling_window", + "differencing", + "scaler", + "model", + "model__regularization" + ] + + n_lags_default = [3, 5, 7, 10, 14, 21] + rolling_window_default = [0, 3, 7, 14] + differencing_default = [0, 1, 2] + scaler_default = ["none", "standard", "minmax"] + model_default = ["ridge", "rf", "gb"] + model__regularization_default = [0.001, 0.01, 0.1, 1.0, 10.0] + + def _default_search_space(self) -> Dict[str, List]: + """Define the default hyperparameter search space for this function.""" + + return { + "n_lags": [3, 5, 7, 10, 14, 21], + "rolling_window": [0, 3, 7, 14], + "differencing": [0, 1, 2], + "scaler": ["none", "standard", "minmax"], + "model": ["ridge", "rf", "gb"], + "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0], + } + + def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]: + """Include fixed parameters for surrogate model support.""" + return { + **params, + "dataset": self.dataset, + "train_size": self.train_size + } + def __init__( self, dataset: str = "airline", @@ -139,9 +153,9 @@ def __init__( **kwargs, ) - # ------------------------------------------------------------------ + # Data loading - # ------------------------------------------------------------------ + def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]: """ @@ -152,9 +166,9 @@ def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]: self._cached_data = self._dataset_loader() return self._cached_data - # ------------------------------------------------------------------ + # Scaler factory - # ------------------------------------------------------------------ + @staticmethod def _build_scaler(scaler_type: str): @@ -167,9 +181,9 @@ def _build_scaler(scaler_type: str): return None raise ValueError(f"Unknown scaler type: {scaler_type!r}") - # ------------------------------------------------------------------ + # Model factory - # ------------------------------------------------------------------ + @staticmethod def _build_model(model_type: str, reg: float): @@ -181,6 +195,9 @@ def _build_model(model_type: str, reg: float): rf -> max_depth (int cast of reg, clipped to >= 1) gb -> learning_rate (float, e.g. 0.001 – 1.0) """ + from sklearn.linear_model import Ridge + from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor + if model_type == "ridge": return Ridge(alpha=reg) @@ -200,9 +217,9 @@ def _build_model(model_type: str, reg: float): raise ValueError(f"Unknown model type: {model_type!r}") - # ------------------------------------------------------------------ + # Objective - # ------------------------------------------------------------------ + def _ml_objective(self, params: Dict[str, Any]) -> float: """ @@ -221,6 +238,10 @@ def _ml_objective(self, params: Dict[str, Any]) -> float: float Negative MAE — higher is better, compatible with maximisation. """ + # model and preprocessing + from sklearn.preprocessing import StandardScaler, MinMaxScaler + from sklearn.metrics import mean_absolute_error + # 1. Raw data _, y_raw = self._get_training_data() @@ -259,9 +280,9 @@ def _ml_objective(self, params: Dict[str, Any]) -> float: return -mae - # ------------------------------------------------------------------ + # Dunder helpers - # ------------------------------------------------------------------ + def __repr__(self) -> str: return ( diff --git a/tests/full/suites/test_ml.py b/tests/full/suites/test_ml.py index 4d81de33..d4d67cf9 100644 --- a/tests/full/suites/test_ml.py +++ b/tests/full/suites/test_ml.py @@ -413,3 +413,20 @@ def test_polynomial_feature_transformation(self, quick_ml_params): assert isinstance(result, (int, float)) assert np.isfinite(result) + +@pytest.mark.ml +class TestTimeSeriesFunctions: + """Test time-series ML functions.""" + + def test_time_series_pipeline_forecaster(self): + """TimeSeriesPipelineForecasterFunction evaluates correctly.""" + + from surfaces.test_functions.machine_learning.hyperparameter_optimization.timeseries.forecasting.test_functions.time_series_pipeline_forecaster import TimeSeriesPipelineForecasterFunction + + func = TimeSeriesPipelineForecasterFunction() + params = get_sample_params(func) + + result = func(params) + + assert isinstance(result, (int, float)) + assert np.isfinite(result) From fa7151ef2e85b2df04d56deba0f546709bf8d56c Mon Sep 17 00:00:00 2001 From: Himel Das Date: Sat, 25 Apr 2026 22:40:22 +0600 Subject: [PATCH 3/4] Cleanup: removed unused imports and verified with full test suite --- .../test_functions/time_series_pipeline_forecaster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py index 1bec0033..de259cbf 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py @@ -239,7 +239,6 @@ def _ml_objective(self, params: Dict[str, Any]) -> float: Negative MAE — higher is better, compatible with maximisation. """ # model and preprocessing - from sklearn.preprocessing import StandardScaler, MinMaxScaler from sklearn.metrics import mean_absolute_error # 1. Raw data From e72427dea72f835f7109023f65f478e7c6109a77 Mon Sep 17 00:00:00 2001 From: Himel Das Date: Sat, 25 Apr 2026 23:26:05 +0600 Subject: [PATCH 4/4] Final fix: resolved code quality and lazy import issues --- .../time_series_pipeline_forecaster.py | 112 +++--------------- 1 file changed, 18 insertions(+), 94 deletions(-) diff --git a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py index de259cbf..ab2996b7 100644 --- a/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py +++ b/src/surfaces/test_functions/machine_learning/hyperparameter_optimization/timeseries/forecasting/test_functions/time_series_pipeline_forecaster.py @@ -47,8 +47,6 @@ def apply_time_series_features( # Vectorised rolling statistics if rolling_window > 0: windows = sliding_window_view(y, window_shape=rolling_window) - # windows has shape (n_samples - rolling_window + 1, rolling_window) - # align to the same offset used by lag features start = offset - rolling_window features.append(windows[start:].mean(axis=1)) features.append(windows[start:].std(axis=1, ddof=1)) @@ -58,27 +56,12 @@ def apply_time_series_features( return X, y_target + class TimeSeriesPipelineForecasterFunction(BaseForecasting): """ - A hyperparameter-searchable time series forecasting pipeline that combines: - - Lag features and rolling statistics for feature engineering - - Optional differencing for stationarity - - Choice of scaler (none / standard / minmax) - - Choice of model (Ridge / RandomForest / GradientBoosting) - - Model-specific regularization parameters - - The objective function returns negative MAE (higher = better), - compatible with a maximising optimiser. + A hyperparameter-searchable time series forecasting pipeline. - Parameters - ---------- - dataset : Name of the dataset to load (must be a key in DATASETS). - objective : Optimisation direction, default "maximize". - modifiers : Optional list of BaseModifier instances. - memory : Whether to enable caching in the base class. - collect_data : Whether to collect evaluation data in the base class. - train_size : Fraction of data used for training (default 0.8). - **kwargs : Passed through to BaseForecasting. + The objective function returns negative MAE (higher = better). """ _name_ = "time_series_pipeline_forecaster" @@ -101,8 +84,7 @@ class TimeSeriesPipelineForecasterFunction(BaseForecasting): model__regularization_default = [0.001, 0.01, 0.1, 1.0, 10.0] def _default_search_space(self) -> Dict[str, List]: - """Define the default hyperparameter search space for this function.""" - + """Define the default hyperparameter search space.""" return { "n_lags": [3, 5, 7, 10, 14, 21], "rolling_window": [0, 3, 7, 14], @@ -111,7 +93,7 @@ def _default_search_space(self) -> Dict[str, List]: "model": ["ridge", "rf", "gb"], "model__regularization": [0.001, 0.01, 0.1, 1.0, 10.0], } - + def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]: """Include fixed parameters for surrogate model support.""" return { @@ -119,7 +101,7 @@ def _get_surrogate_params(self, params: Dict[str, Any]) -> Dict[str, Any]: "dataset": self.dataset, "train_size": self.train_size } - + def __init__( self, dataset: str = "airline", @@ -131,14 +113,9 @@ def __init__( **kwargs: Any, ) -> None: if dataset not in DATASETS: - raise ValueError( - f"Unknown dataset '{dataset}'. " - f"Available datasets: {list(DATASETS.keys())}" - ) + raise ValueError(f"Unknown dataset '{dataset}'.") if not 0.0 < train_size < 1.0: - raise ValueError( - f"train_size must be between 0 and 1 exclusive, got {train_size}." - ) + raise ValueError("train_size must be between 0 and 1 exclusive.") self.dataset = dataset self.train_size = train_size @@ -153,26 +130,16 @@ def __init__( **kwargs, ) - - # Data loading - - def _get_training_data(self) -> tuple[np.ndarray, np.ndarray]: - """ - Load and cache the dataset. Returns (X_raw, y_raw) where - y_raw is the univariate target series used for feature engineering. - """ + """Load and cache the dataset.""" if self._cached_data is None: self._cached_data = self._dataset_loader() return self._cached_data - - # Scaler factory - - @staticmethod def _build_scaler(scaler_type: str): - """Return a fitted-ready scaler instance, or None for 'none'.""" + """Return a fitted-ready scaler instance with Lazy Import.""" + from sklearn.preprocessing import StandardScaler, MinMaxScaler if scaler_type == "standard": return StandardScaler() if scaler_type == "minmax": @@ -181,70 +148,37 @@ def _build_scaler(scaler_type: str): return None raise ValueError(f"Unknown scaler type: {scaler_type!r}") - - # Model factory - - @staticmethod def _build_model(model_type: str, reg: float): - """ - Construct a scikit-learn regressor from the model type and the - shared regularization parameter, mapped per-model as follows: - - ridge -> alpha (float, e.g. 0.001 – 10.0) - rf -> max_depth (int cast of reg, clipped to >= 1) - gb -> learning_rate (float, e.g. 0.001 – 1.0) - """ + """Construct a scikit-learn regressor.""" from sklearn.linear_model import Ridge - from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor + from sklearn.ensemble import ( + RandomForestRegressor, + GradientBoostingRegressor + ) if model_type == "ridge": return Ridge(alpha=reg) - if model_type == "rf": return RandomForestRegressor( n_estimators=100, max_depth=max(1, int(reg)), random_state=42, ) - if model_type == "gb": return GradientBoostingRegressor( n_estimators=100, learning_rate=float(np.clip(reg, 1e-4, 1.0)), random_state=42, ) - raise ValueError(f"Unknown model type: {model_type!r}") - - # Objective - - def _ml_objective(self, params: Dict[str, Any]) -> float: - """ - Evaluate a single hyperparameter configuration. - - Steps - ----- - 1. Load (cached) raw series. - 2. Apply differencing, lag features, and rolling statistics. - 3. Chronological train/test split. - 4. Optionally scale features. - 5. Fit the chosen model and return negative MAE. - - Returns - ------- - float - Negative MAE — higher is better, compatible with maximisation. - """ - # model and preprocessing + """Evaluate a single hyperparameter configuration.""" from sklearn.metrics import mean_absolute_error - # 1. Raw data _, y_raw = self._get_training_data() - # 2. Feature engineering try: X, y = apply_time_series_features( y_raw, @@ -252,37 +186,27 @@ def _ml_objective(self, params: Dict[str, Any]) -> float: rolling_window=params["rolling_window"], differencing=params["differencing"], ) - except ValueError as exc: - # Config produced an unusable feature matrix (e.g. series too short) - # Return a very poor score so the optimiser discards this config. + except ValueError: return -float("inf") - # 3. Chronological split split_idx = int(len(X) * self.train_size) if split_idx == 0 or split_idx == len(X): - # Degenerate split — not enough data for this param combination return -float("inf") X_train, X_test = X[:split_idx], X[split_idx:] y_train, y_test = y[:split_idx], y[split_idx:] - # 4. Scaling scaler = self._build_scaler(params["scaler"]) if scaler is not None: X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) - # 5. Model training and evaluation model = self._build_model(params["model"], params["model__regularization"]) model.fit(X_train, y_train) mae = mean_absolute_error(y_test, model.predict(X_test)) return -mae - - # Dunder helpers - - def __repr__(self) -> str: return ( f"{self.__class__.__name__}("