From 9d19f997e4b88fbcff4769a7876bd6b39026abff Mon Sep 17 00:00:00 2001 From: Vahid Tavakkoli Date: Wed, 11 Mar 2026 12:38:03 +0100 Subject: [PATCH] Enforce chronological splits and visualize train/val/test timeline --- generate_data.py | 14 +- oran_sim/config.py | 20 +- oran_sim/feature_selection.py | 61 +++++ oran_sim/model/__init__.py | 4 +- oran_sim/model/registry.py | 81 ++++--- oran_sim/sequence_data.py | 25 ++ oran_sim/splitting.py | 79 +++++++ oran_sim/temporal.py | 77 ++++++ scripts/aggregate_report.py | 189 ++++++++++++--- scripts/predict.py | 69 +++++- scripts/run_scenario.py | 19 ++ scripts/train.py | 302 +++++++++++++++++------- tests/test_aggregate_report_features.py | 71 ++++++ tests/test_feature_selection.py | 67 ++++++ tests/test_metrics_stability.py | 14 ++ tests/test_scenario_model_registry.py | 17 +- tests/test_sequence_data.py | 20 ++ tests/test_splitting.py | 26 ++ tests/test_temporal_smoke.py | 69 ++++++ tests/test_train_feature_count.py | 2 +- 20 files changed, 1041 insertions(+), 185 deletions(-) create mode 100644 oran_sim/feature_selection.py create mode 100644 oran_sim/sequence_data.py create mode 100644 oran_sim/splitting.py create mode 100644 oran_sim/temporal.py create mode 100644 tests/test_aggregate_report_features.py create mode 100644 tests/test_feature_selection.py create mode 100644 tests/test_metrics_stability.py create mode 100644 tests/test_sequence_data.py create mode 100644 tests/test_splitting.py create mode 100644 tests/test_temporal_smoke.py diff --git a/generate_data.py b/generate_data.py index a18a082..1566536 100644 --- a/generate_data.py +++ b/generate_data.py @@ -10,6 +10,7 @@ from oran_sim.config import FEATURE_ORDER from oran_sim.data import load_timeseries_from_kpm, write_json +from oran_sim.splitting import chronological_split def _build_target(df: pd.DataFrame, target: str, horizon_steps: int) -> pd.DataFrame: @@ -52,16 +53,6 @@ def _expand_to_exact_rows(df: pd.DataFrame, steps: int, seed: int) -> tuple[pd.D return expanded.iloc[:steps].copy(), logs -def _det_split(df: pd.DataFrame, seed: int) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]: - shuffled = df.sample(frac=1.0, random_state=seed).reset_index(drop=True) - n = len(shuffled) - n_train = int(n * 0.6) - n_val = int(n * 0.3) - n_test = n - n_train - n_val - train = shuffled.iloc[:n_train].copy() - val = shuffled.iloc[n_train : n_train + n_val].copy() - test = shuffled.iloc[n_train + n_val : n_train + n_val + n_test].copy() - return train, val, test def main() -> None: @@ -85,7 +76,7 @@ def main() -> None: base = base[keep_cols] exact_df, sampling_logs = _expand_to_exact_rows(base, args.steps, args.seed) - train_df, val_df, test_df = _det_split(exact_df, args.seed) + train_df, val_df, test_df, split_meta = chronological_split(exact_df, train_ratio=0.6, val_ratio=0.3, test_ratio=0.1) output = Path(args.output) output.parent.mkdir(parents=True, exist_ok=True) @@ -110,6 +101,7 @@ def main() -> None: "features_present": [c for c in FEATURE_ORDER if c in exact_df.columns], "target_definition": {"name": args.target, "horizon_steps": int(args.horizon_steps)}, "sampling_logs": sampling_logs, + "split_metadata": split_meta, } summary_path = Path("results/data/traffic_data_summary.json") write_json(summary_path, summary) diff --git a/oran_sim/config.py b/oran_sim/config.py index 6c503b0..eb4e82c 100644 --- a/oran_sim/config.py +++ b/oran_sim/config.py @@ -30,18 +30,20 @@ class Scenario: name: str kind: str features: int + seq_len: int | None = None + architecture: str | None = None SCENARIOS: Dict[str, Scenario] = { - "lightweight-32": Scenario("lightweight-32", "ridge", 10), - "lightweight-64": Scenario("lightweight-64", "ridge", 12), - "balanced-small": Scenario("balanced-small", "hgb", 14), - "balanced-medium": Scenario("balanced-medium", "hgb", 16), - "deep-performance": Scenario("deep-performance", "hgb", 17), - "ultra-performance": Scenario("ultra-performance", "hgb", 18), - "attention-baseline": Scenario("attention-baseline", "ridge", 15), - "liquid-baseline": Scenario("liquid-baseline", "ridge", 11), - "xlstm-baseline": Scenario("xlstm-baseline", "ridge", 13), + "lightweight-32": Scenario("lightweight-32", "tabular", 10), + "lightweight-64": Scenario("lightweight-64", "tabular", 12), + "balanced-small": Scenario("balanced-small", "tabular", 14), + "balanced-medium": Scenario("balanced-medium", "tabular", 16), + "deep-performance": Scenario("deep-performance", "temporal", 17, seq_len=16, architecture="lstm"), + "ultra-performance": Scenario("ultra-performance", "temporal", 18, seq_len=24, architecture="lstm"), + "attention-baseline": Scenario("attention-baseline", "temporal", 15, seq_len=16, architecture="attention"), + "liquid-baseline": Scenario("liquid-baseline", "temporal", 11, seq_len=12, architecture="liquid"), + "xlstm-baseline": Scenario("xlstm-baseline", "temporal", 13, seq_len=24, architecture="lstm"), } diff --git a/oran_sim/feature_selection.py b/oran_sim/feature_selection.py new file mode 100644 index 0000000..53ca040 --- /dev/null +++ b/oran_sim/feature_selection.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +from pathlib import Path +import json + +import pandas as pd +from sklearn.ensemble import RandomForestRegressor + + +def rank_features_by_importance( + train_df: pd.DataFrame, + candidate_features: list[str], + target_col: str = "target", + *, + random_state: int = 42, + n_estimators: int = 200, + n_jobs: int = -1, +) -> pd.DataFrame: + """Rank features by RandomForestRegressor importance using train split only.""" + features = [c for c in candidate_features if c in train_df.columns] + if not features: + raise ValueError("No candidate features found in training dataframe") + + x = train_df[features].copy() + for col in x.columns: + if not pd.api.types.is_numeric_dtype(x[col]): + x[col] = x[col].astype("category").cat.codes + x = x.fillna(0.0) + y = train_df[target_col].to_numpy() + + rf = RandomForestRegressor(random_state=random_state, n_estimators=n_estimators, n_jobs=n_jobs) + rf.fit(x, y) + + importance_df = pd.DataFrame({"feature": features, "importance": rf.feature_importances_}) + importance_df = importance_df.sort_values("importance", ascending=False).reset_index(drop=True) + return importance_df + + +def select_top_k_features(importance_df: pd.DataFrame, k: int) -> list[str]: + if k <= 0: + raise ValueError("k must be > 0") + return importance_df["feature"].head(k).tolist() + + +def write_feature_importance_artifacts( + importance_df: pd.DataFrame, + *, + json_path: Path, + csv_path: Path | None = None, +) -> None: + json_path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "feature_importance": [ + {"rank": int(i + 1), "feature": row[0], "importance": float(row[1])} + for i, row in enumerate(importance_df.itertuples(index=False, name=None)) + ] + } + json_path.write_text(json.dumps(payload, indent=2), encoding="utf-8") + if csv_path is not None: + csv_path.parent.mkdir(parents=True, exist_ok=True) + importance_df.to_csv(csv_path, index=False) diff --git a/oran_sim/model/__init__.py b/oran_sim/model/__init__.py index bffd54a..2a0c46e 100644 --- a/oran_sim/model/__init__.py +++ b/oran_sim/model/__init__.py @@ -1,3 +1,3 @@ -from .registry import SCENARIO_MODEL_SPECS, build_model +from .registry import SCENARIO_MODEL_SPECS, build_model, get_model_metadata -__all__ = ["SCENARIO_MODEL_SPECS", "build_model"] +__all__ = ["SCENARIO_MODEL_SPECS", "build_model", "get_model_metadata"] diff --git a/oran_sim/model/registry.py b/oran_sim/model/registry.py index 49e889b..df0db07 100644 --- a/oran_sim/model/registry.py +++ b/oran_sim/model/registry.py @@ -3,40 +3,65 @@ from sklearn.ensemble import HistGradientBoostingRegressor from sklearn.linear_model import Ridge -from .attention_baseline import AttentionBaselineModel -from .balanced_medium import BalancedMediumModel -from .balanced_small import BalancedSmallModel -from .base import ScenarioModelSpec -from .deep_performance import DeepPerformanceModel -from .lightweight_32 import Lightweight32Model -from .lightweight_64 import Lightweight64Model -from .liquid_baseline import LiquidBaselineModel -from .ultra_performance import UltraPerformanceModel -from .xlstm_baseline import XLSTMBaselineModel - -SCENARIO_MODEL_SPECS: dict[str, ScenarioModelSpec] = { - s.name: s - for s in [ - Lightweight32Model(), - Lightweight64Model(), - BalancedSmallModel(), - BalancedMediumModel(), - DeepPerformanceModel(), - UltraPerformanceModel(), - AttentionBaselineModel(), - LiquidBaselineModel(), - XLSTMBaselineModel(), - ] -} +from oran_sim.config import SCENARIOS def build_model(model_name: str, seed: int): - # Backward-compatible aliases. if model_name == "ridge": return Ridge(random_state=seed) if model_name == "hgb": return HistGradientBoostingRegressor(random_state=seed) - if model_name not in SCENARIO_MODEL_SPECS: + if model_name not in SCENARIOS: raise ValueError(f"Unsupported model: {model_name}") - return SCENARIO_MODEL_SPECS[model_name].build(seed) + + scenario = SCENARIOS[model_name] + if scenario.kind != "tabular": + raise ValueError(f"Scenario '{model_name}' is temporal and must be trained with the temporal pipeline") + + if model_name in {"lightweight-32", "lightweight-64"}: + return Ridge(random_state=seed) + return HistGradientBoostingRegressor(random_state=seed) + + +SCENARIO_MODEL_SPECS: dict[str, str] = {name: SCENARIOS[name].kind for name in SCENARIOS} + + +def get_model_metadata(model_name: str) -> dict[str, str]: + if model_name == "ridge": + return { + "backend": "Ridge", + "logical_profile": "tabular_baseline", + "profile_note": "Direct sklearn ridge baseline.", + } + if model_name == "hgb": + return { + "backend": "HistGradientBoostingRegressor", + "logical_profile": "tree_boosting_profile", + "profile_note": "Direct sklearn gradient-boosting baseline.", + } + + if model_name not in SCENARIOS: + raise ValueError(f"Unsupported model: {model_name}") + + scenario = SCENARIOS[model_name] + if scenario.kind == "tabular": + backend = "Ridge" if model_name in {"lightweight-32", "lightweight-64"} else "HistGradientBoostingRegressor" + profile = "tabular_baseline" if backend == "Ridge" else "tree_boosting_profile" + return { + "backend": backend, + "logical_profile": profile, + "profile_note": "Real sklearn tabular model.", + } + + backend_map = { + "attention": "TorchAttentionRegressor", + "liquid": "TorchLiquidRegressor", + "lstm": "TorchLSTMRegressor", + } + backend = backend_map.get(scenario.architecture or "", "TorchSequenceModel") + return { + "backend": backend, + "logical_profile": "temporal_sequence_model", + "profile_note": "Real temporal model trained on rolling windows.", + } diff --git a/oran_sim/sequence_data.py b/oran_sim/sequence_data.py new file mode 100644 index 0000000..b1d516e --- /dev/null +++ b/oran_sim/sequence_data.py @@ -0,0 +1,25 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + + +def sort_by_time(df: pd.DataFrame) -> pd.DataFrame: + if "time_ms" in df.columns: + return df.sort_values("time_ms").reset_index(drop=True) + return df.reset_index(drop=True) + + +def make_sequences(x: np.ndarray, y: np.ndarray, seq_len: int) -> tuple[np.ndarray, np.ndarray]: + if seq_len <= 0: + raise ValueError("seq_len must be > 0") + if len(x) < seq_len: + raise ValueError(f"Not enough rows to build sequences: rows={len(x)} seq_len={seq_len}") + + xs = [] + ys = [] + for end_idx in range(seq_len - 1, len(x)): + start = end_idx - seq_len + 1 + xs.append(x[start : end_idx + 1]) + ys.append(y[end_idx]) + return np.asarray(xs, dtype=np.float32), np.asarray(ys, dtype=np.float32) diff --git a/oran_sim/splitting.py b/oran_sim/splitting.py new file mode 100644 index 0000000..3fdbadd --- /dev/null +++ b/oran_sim/splitting.py @@ -0,0 +1,79 @@ +from __future__ import annotations + +import pandas as pd + + +def _sorted(df: pd.DataFrame, time_col: str = "time_ms") -> pd.DataFrame: + if time_col in df.columns: + return df.sort_values(time_col).reset_index(drop=True) + return df.reset_index(drop=True) + + +def build_split_metadata( + train_df: pd.DataFrame, + val_df: pd.DataFrame, + test_df: pd.DataFrame, + *, + time_col: str = "time_ms", +) -> dict: + n_train, n_val, n_test = len(train_df), len(val_df), len(test_df) + total = max(n_train + n_val + n_test, 1) + + def _time_bounds(df: pd.DataFrame) -> tuple[float | None, float | None]: + if time_col not in df.columns or df.empty: + return None, None + return float(df[time_col].iloc[0]), float(df[time_col].iloc[-1]) + + tr_s, tr_e = _time_bounds(train_df) + va_s, va_e = _time_bounds(val_df) + te_s, te_e = _time_bounds(test_df) + + return { + "split_type": "chronological", + "train_start_index": 0, + "train_end_index": max(n_train - 1, -1), + "val_start_index": n_train, + "val_end_index": n_train + n_val - 1, + "test_start_index": n_train + n_val, + "test_end_index": n_train + n_val + n_test - 1, + "train_start": tr_s, + "train_end": tr_e, + "val_start": va_s, + "val_end": va_e, + "test_start": te_s, + "test_end": te_e, + "train_rows": n_train, + "val_rows": n_val, + "test_rows": n_test, + "train_pct": n_train / total, + "val_pct": n_val / total, + "test_pct": n_test / total, + } + + +def chronological_split( + df: pd.DataFrame, + train_ratio: float = 0.6, + val_ratio: float = 0.2, + test_ratio: float = 0.2, + *, + time_col: str = "time_ms", +) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict]: + if abs((train_ratio + val_ratio + test_ratio) - 1.0) > 1e-9: + raise ValueError("train_ratio + val_ratio + test_ratio must equal 1") + + ordered = _sorted(df, time_col=time_col) + n = len(ordered) + n_train = int(n * train_ratio) + n_val = int(n * val_ratio) + n_test = n - n_train - n_val + + train = ordered.iloc[:n_train].copy() + val = ordered.iloc[n_train : n_train + n_val].copy() + test = ordered.iloc[n_train + n_val : n_train + n_val + n_test].copy() + metadata = build_split_metadata(train, val, test, time_col=time_col) + return train, val, test, metadata + + +def sort_split(df: pd.DataFrame, time_col: str = "time_ms") -> pd.DataFrame: + return _sorted(df, time_col=time_col) diff --git a/oran_sim/temporal.py b/oran_sim/temporal.py new file mode 100644 index 0000000..7e83afd --- /dev/null +++ b/oran_sim/temporal.py @@ -0,0 +1,77 @@ +from __future__ import annotations + +from dataclasses import dataclass + +import numpy as np +import torch +from torch import nn +from torch.utils.data import DataLoader, TensorDataset + +from oran_sim.models import AttentionRegressor, LSTMRegressor, LiquidRegressor + + +@dataclass(frozen=True) +class TemporalSpec: + architecture: str + seq_len: int + hidden_sizes: list[int] | None = None + d_model: int | None = None + nhead: int | None = None + num_layers: int | None = None + dim_feedforward: int | None = None + dropout: float | None = None + hidden_size: int | None = None + dt: float | None = None + + +def build_temporal_model(input_dim: int, spec: TemporalSpec) -> nn.Module: + if spec.architecture == "lstm": + return LSTMRegressor(input_dim=input_dim, hidden_sizes=spec.hidden_sizes or [64, 32]) + if spec.architecture == "attention": + return AttentionRegressor( + input_dim=input_dim, + d_model=spec.d_model or 64, + nhead=spec.nhead or 4, + num_layers=spec.num_layers or 2, + dim_feedforward=spec.dim_feedforward or 128, + dropout=spec.dropout if spec.dropout is not None else 0.1, + ) + if spec.architecture == "liquid": + return LiquidRegressor(input_dim=input_dim, hidden_size=spec.hidden_size or 64, dt=spec.dt or 0.1) + raise ValueError(f"Unsupported temporal architecture: {spec.architecture}") + + +def train_temporal_model( + model: nn.Module, + x_train: np.ndarray, + y_train: np.ndarray, + *, + epochs: int, + lr: float = 1e-3, + batch_size: int = 64, +) -> nn.Module: + device = torch.device("cpu") + model.to(device) + ds = TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) + loader = DataLoader(ds, batch_size=min(batch_size, len(ds)), shuffle=True) + optimizer = torch.optim.Adam(model.parameters(), lr=lr) + loss_fn = nn.MSELoss() + + model.train() + for _ in range(max(1, epochs)): + for xb, yb in loader: + xb = xb.to(device) + yb = yb.to(device) + optimizer.zero_grad() + pred = model(xb) + loss = loss_fn(pred, yb) + loss.backward() + optimizer.step() + model.eval() + return model + + +def predict_temporal_model(model: nn.Module, x: np.ndarray) -> np.ndarray: + with torch.no_grad(): + out = model(torch.from_numpy(x).to(torch.device("cpu"))) + return out.detach().cpu().numpy().astype(float) diff --git a/scripts/aggregate_report.py b/scripts/aggregate_report.py index 379614e..219844c 100644 --- a/scripts/aggregate_report.py +++ b/scripts/aggregate_report.py @@ -44,50 +44,117 @@ def _prepare_preds_for_plot(preds: pd.DataFrame) -> pd.DataFrame: return preds.sort_values("time_ms").reset_index(drop=True) return preds.reset_index(drop=True) -def _build_timeseries_chart(preds: pd.DataFrame, scenario: str) -> str: +def _build_timeseries_chart(preds: pd.DataFrame, scenario: str, split_meta: dict | None = None) -> str: sorted_preds = _prepare_preds_for_plot(preds) fig, ax = plt.subplots(figsize=(9, 3)) - x = sorted_preds["time_ms"] if "time_ms" in sorted_preds.columns else sorted_preds.index - ax.plot(x.values, sorted_preds["y_true"].values, label="y_true", linewidth=1.6) - ax.plot(x.values, sorted_preds["y_pred"].values, label="y_pred", linewidth=1.2) - ax.set_title(f"{scenario}: y_true/y_pred vs timestamp") - ax.set_xlabel("timestamp") - ax.legend(loc="best") + + if split_meta: + tr_s = split_meta.get("train_start_index", 0) + tr_e = split_meta.get("train_end_index", -1) + va_s = split_meta.get("val_start_index", 0) + va_e = split_meta.get("val_end_index", -1) + te_s = split_meta.get("test_start_index", 0) + te_e = split_meta.get("test_end_index", len(sorted_preds) - 1) + + full_x = np.arange(int(max(te_e + 1, len(sorted_preds)))) + y_true_full = np.full(full_x.shape, np.nan, dtype=float) + y_pred_full = np.full(full_x.shape, np.nan, dtype=float) + test_slice = slice(int(te_s), int(min(te_s + len(sorted_preds), len(full_x)))) + n_fill = test_slice.stop - test_slice.start + y_true_full[test_slice] = sorted_preds["y_true"].values[:n_fill] + y_pred_full[test_slice] = sorted_preds["y_pred"].values[:n_fill] + + ax.axvspan(tr_s, tr_e, alpha=0.15, color="#2ca02c", label="Train region") + ax.axvspan(va_s, va_e, alpha=0.15, color="#ff7f0e", label="Validation region") + ax.axvspan(te_s, te_e, alpha=0.15, color="#1f77b4", label="Test region") + ax.axvline(tr_e, color="#2ca02c", linestyle="--", linewidth=1.2) + ax.axvline(va_e, color="#ff7f0e", linestyle="--", linewidth=1.2) + + ax.plot(full_x, y_true_full, label="y_true (test window)", linewidth=1.4, color="black") + ax.plot(full_x, y_pred_full, label="y_pred (test window)", linewidth=1.1, color="red") + ax.set_xlabel("global row index") + else: + x = sorted_preds["time_ms"] if "time_ms" in sorted_preds.columns else sorted_preds.index + ax.plot(x.values, sorted_preds["y_true"].values, label="y_true", linewidth=1.6) + ax.plot(x.values, sorted_preds["y_pred"].values, label="y_pred", linewidth=1.2) + ax.set_xlabel("timestamp") + + ax.set_title(f"{scenario}: predictions with chronological split boundaries") + ax.legend(loc="best", fontsize=8) img = _fig_to_base64(fig) plt.close(fig) return img +def _build_split_timeline(split_meta: dict, out_path: Path) -> str: + fig, ax = plt.subplots(figsize=(9, 1.8)) + tr = int(split_meta.get("train_rows", 0)) + va = int(split_meta.get("val_rows", 0)) + te = int(split_meta.get("test_rows", 0)) + total = max(tr + va + te, 1) + + left = 0 + ax.barh([0], [tr], left=left, color="#2ca02c", label="Train") + left += tr + ax.barh([0], [va], left=left, color="#ff7f0e", label="Validation") + left += va + ax.barh([0], [te], left=left, color="#1f77b4", label="Test") + + ax.set_xlim(0, total) + ax.set_yticks([]) + ax.set_xlabel("Row index timeline") + ax.set_title("| Train | Validation | Test | (chronological split)") + ax.legend(loc="upper center", ncol=3) + out_path.parent.mkdir(parents=True, exist_ok=True) + fig.savefig(out_path, dpi=140, bbox_inches="tight") + img = _fig_to_base64(fig) + plt.close(fig) + return img + + +def _split_table(split_meta: dict) -> pd.DataFrame: + rows = [] + for split_name, pfx in [("Train", "train"), ("Validation", "val"), ("Test", "test")]: + rows.append( + { + "Split": split_name, + "Start index": split_meta.get(f"{pfx}_start_index"), + "End index": split_meta.get(f"{pfx}_end_index"), + "Start time": split_meta.get(f"{pfx}_start"), + "End time": split_meta.get(f"{pfx}_end"), + "Rows": split_meta.get(f"{pfx}_rows"), + "Percentage": f"{100.0 * float(split_meta.get(f'{pfx}_pct', 0.0)):.1f}%", + } + ) + return pd.DataFrame(rows) + + def _build_benchmark_table(df: pd.DataFrame) -> pd.DataFrame: if df.empty: return pd.DataFrame() work = df.copy() - for c in ["R2_test", "RMSE_test", "MAE_test", "MAPE_test"]: + metric_cols = [c for c in ["R2_test", "RMSE_test", "MAE_test", "sMAPE_test", "wMAPE_test"] if c in work.columns] + for c in metric_cols: + work[c] = pd.to_numeric(work[c], errors="coerce") + + score_parts = [] + eps = 1e-12 + if "R2_test" in work.columns: + r2 = work["R2_test"] + score_parts.append((r2 - r2.min()) / max((r2.max() - r2.min()), eps)) + for c in ["RMSE_test", "MAE_test", "sMAPE_test", "wMAPE_test"]: if c in work.columns: - work[c] = pd.to_numeric(work[c], errors="coerce") - - work["benchmark_score"] = np.nan - valid = work[[c for c in ["R2_test", "RMSE_test", "MAE_test", "MAPE_test"] if c in work.columns]].dropna() - if not valid.empty: - r2_min = valid["R2_test"].min() if "R2_test" in valid.columns else np.nan - r2_max = valid["R2_test"].max() if "R2_test" in valid.columns else np.nan - rmse_min = valid["RMSE_test"].min() if "RMSE_test" in valid.columns else np.nan - rmse_max = valid["RMSE_test"].max() if "RMSE_test" in valid.columns else np.nan - mae_min = valid["MAE_test"].min() if "MAE_test" in valid.columns else np.nan - mae_max = valid["MAE_test"].max() if "MAE_test" in valid.columns else np.nan - mape_min = valid["MAPE_test"].min() if "MAPE_test" in valid.columns else np.nan - mape_max = valid["MAPE_test"].max() if "MAPE_test" in valid.columns else np.nan - - eps = 1e-12 - work["benchmark_score"] = ( - ((work.get("R2_test") - r2_min) / max((r2_max - r2_min), eps)) - + (1.0 - ((work.get("RMSE_test") - rmse_min) / max((rmse_max - rmse_min), eps))) - + (1.0 - ((work.get("MAE_test") - mae_min) / max((mae_max - mae_min), eps))) - + (1.0 - ((work.get("MAPE_test") - mape_min) / max((mape_max - mape_min), eps))) - ) / 4.0 - - rank_cols = [c for c in ["scenario", "model_type", "R2_test", "RMSE_test", "MAE_test", "MAPE_test", "benchmark_score"] if c in work.columns] + series = work[c] + score_parts.append(1.0 - (series - series.min()) / max((series.max() - series.min()), eps)) + + if score_parts: + stacked = np.vstack([part.to_numpy(dtype=float) for part in score_parts]) + work["benchmark_score"] = np.nanmean(stacked, axis=0) + else: + work["benchmark_score"] = np.nan + + rank_cols = [c for c in ["scenario", "model_type", "model_backend", "R2_test", "RMSE_test", "MAE_test", "MAPE_test", "sMAPE_test", "wMAPE_test", "benchmark_score"] if c in work.columns] ranked = work[rank_cols].sort_values("benchmark_score", ascending=False, na_position="last").reset_index(drop=True) if not ranked.empty: ranked.insert(0, "benchmark_rank", np.arange(1, len(ranked) + 1)) @@ -170,6 +237,12 @@ def main() -> None: "preds_path": status.get("preds_path", str(sdir / "preds.csv")), "dataset_path": status.get("dataset_path", ""), "epochs": status.get("epochs"), + "model_backend": status.get("model_backend"), + "logical_profile": status.get("logical_profile"), + "profile_note": status.get("profile_note"), + "selected_features": ", ".join(status.get("selected_features", [])), + "seq_len": status.get("seq_len"), + "split_metadata": status.get("split_metadata"), } metrics_path = Path(row["metrics_path"]) @@ -180,8 +253,14 @@ def main() -> None: if cfg_path.exists(): cfg = json.loads(cfg_path.read_text(encoding="utf-8")) row["model_type"] = cfg.get("model_type") + row["model_backend"] = cfg.get("model_backend", row.get("model_backend")) + row["logical_profile"] = cfg.get("logical_profile", row.get("logical_profile")) + row["profile_note"] = cfg.get("profile_note", row.get("profile_note")) + row["selected_features"] = ", ".join(cfg.get("features", status.get("selected_features", []))) row["num_features"] = len(cfg.get("features", [])) row["epochs"] = cfg.get("epochs", row.get("epochs")) + row["seq_len"] = cfg.get("seq_len", row.get("seq_len")) + row["split_metadata"] = cfg.get("split_metadata", row.get("split_metadata")) else: row["model_type"] = None @@ -194,7 +273,7 @@ def main() -> None: metrics = json.loads(metrics_path.read_text(encoding="utf-8")) test_metrics = metrics.get("test", {}) val_metrics = metrics.get("val", {}) - for key in ["MAE", "RMSE", "MAPE", "R2"]: + for key in ["MAE", "RMSE", "MAPE", "sMAPE", "wMAPE", "R2"]: row[f"{key}_test"] = test_metrics.get(key) row[f"{key}_val"] = val_metrics.get(key) @@ -204,7 +283,7 @@ def main() -> None: row["mean_abs_error"] = float(preds["abs_error"].mean()) row["mean_abs_pct_error"] = float(preds["pct_error"].abs().mean()) - model_chart = _build_timeseries_chart(preds, scenario) + model_chart = _build_timeseries_chart(preds, scenario, row.get("split_metadata")) model_chart_sections.append(f"

{scenario}

") dpath_str = str(row.get("dataset_path", "")).strip() @@ -217,7 +296,6 @@ def main() -> None: rows.append(row) comp_df = pd.DataFrame(rows) - comp_df.to_csv(out_dir / "scenario_status.csv", index=False) best_scenario = None best_metric_value = None @@ -233,6 +311,9 @@ def main() -> None: "scenario", "success", "model_type", + "model_backend", + "logical_profile", + "seq_len", "epochs", "epochs_logged", "rows", @@ -240,9 +321,13 @@ def main() -> None: "MAE_test", "RMSE_test", "MAPE_test", + "sMAPE_test", + "wMAPE_test", "R2_test", "mean_abs_error", "mean_abs_pct_error", + "selected_features", + "profile_note", "error", ] present_cols = [c for c in table_cols if c in comp_df.columns] @@ -258,17 +343,51 @@ def main() -> None: ) benchmark_df = _build_benchmark_table(comp_df) + if not benchmark_df.empty and "scenario" in benchmark_df.columns and "benchmark_score" in benchmark_df.columns: + comp_df = comp_df.merge(benchmark_df[["scenario", "benchmark_score"]], on="scenario", how="left") + comp_df.to_csv(out_dir / "scenario_status.csv", index=False) + + split_meta = None + for row in rows: + if row.get("split_metadata"): + split_meta = row["split_metadata"] + break + split_table_html = "

No split metadata available.

" + split_timeline_html = "

No split timeline available.

" + if split_meta: + split_table_html = _split_table(split_meta).to_html(index=False) + split_timeline_html = f"" + + feature_importance_path = Path("results/feature_importance.json") + feature_importance_html = "

Feature importance artifact not found.

" + if not feature_importance_path.exists(): + candidates = sorted(Path("results/scenarios").glob("*/model/feature_importance.json")) + if candidates: + feature_importance_path = candidates[0] + if feature_importance_path.exists(): + feature_importance_payload = json.loads(feature_importance_path.read_text(encoding="utf-8")) + feature_importance_df = pd.DataFrame(feature_importance_payload.get("feature_importance", [])) + if not feature_importance_df.empty: + feature_importance_html = feature_importance_df.to_html(index=False) html = f"""

KPM Final Report

Scientific Summary

-

This report compares all successful scenarios under a unified benchmark protocol using test-set R2 (higher is better), RMSE/MAE/MAPE (lower is better), and a composite benchmark score derived from min-max normalization.

+

This report compares all successful scenarios under a unified benchmark protocol using test-set R2 (higher is better), RMSE/MAE/MAPE/sMAPE/wMAPE (lower is better), and a composite benchmark score derived from min-max normalization.

+

Note: MAPE can be unstable when targets approach zero; sMAPE and wMAPE are included as more stable alternatives.

Scenario Comparison

{best_text}

{table_df.to_html(index=False)}

Benchmark Leaderboard

{benchmark_df.to_html(index=False) if not benchmark_df.empty else '

No benchmark-ready metrics available.

'} +

Chronological Time-Series Split

+

All experiments use contiguous chronological blocks only: | Train | Validation | Test |. No random shuffling is used in the official benchmark path.

+ {split_table_html} + {split_timeline_html} +

Temporal windows are built from past values only; validation/test windows do not use future labels. Feature importance is computed on the training split only.

+

Global Feature Importance (train-only Random Forest)

+ {feature_importance_html}

Model Predictions vs Ground Truth (timestamp axis)

{''.join(model_chart_sections) if model_chart_sections else '

No model charts available.

'}

Dataset/Feature Statistics

diff --git a/scripts/predict.py b/scripts/predict.py index d1f7b64..fa375d8 100644 --- a/scripts/predict.py +++ b/scripts/predict.py @@ -7,14 +7,13 @@ import joblib import numpy as np import pandas as pd +import torch +from oran_sim.models import AttentionRegressor, LSTMRegressor, LiquidRegressor +from oran_sim.sequence_data import make_sequences, sort_by_time -def compute_pct_error(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: - """Return signed percentage error using absolute true-value denominator. - This keeps sign from (y_true - y_pred) while avoiding sign flips from - negative targets in the denominator. - """ +def compute_pct_error(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: y_true = np.asarray(y_true, dtype=float) y_pred = np.asarray(y_pred, dtype=float) err = y_true - y_pred @@ -24,6 +23,46 @@ def compute_pct_error(y_true: np.ndarray, y_pred: np.ndarray) -> np.ndarray: return pct +def _build_temporal_model(spec: dict, input_dim: int): + arch = spec.get("architecture", "lstm") + if arch == "attention": + return AttentionRegressor( + input_dim=input_dim, + d_model=spec.get("d_model") or 64, + nhead=spec.get("nhead") or 4, + num_layers=spec.get("num_layers") or 2, + dim_feedforward=spec.get("dim_feedforward") or 128, + dropout=spec.get("dropout") if spec.get("dropout") is not None else 0.1, + ) + if arch == "liquid": + return LiquidRegressor(input_dim=input_dim, hidden_size=spec.get("hidden_size") or 64, dt=spec.get("dt") or 0.1) + return LSTMRegressor(input_dim=input_dim, hidden_sizes=spec.get("hidden_sizes") or [64, 32]) + + +def _predict_temporal(model_dir: Path, df: pd.DataFrame, features: list[str]) -> tuple[np.ndarray, np.ndarray, np.ndarray]: + spec = json.loads((model_dir / "temporal_spec.json").read_text(encoding="utf-8")) + seq_len = int(spec.get("seq_len", 16)) + xdf = df[features].copy() + for c in xdf.columns: + if not pd.api.types.is_numeric_dtype(xdf[c]): + xdf[c] = xdf[c].astype("category").cat.codes + xdf = xdf.fillna(0.0) + scaler = joblib.load(model_dir / "temporal_scaler.joblib") + x = scaler.transform(xdf.to_numpy(dtype=float)) + y = df["target"].to_numpy(dtype=float) + x_seq, y_seq = make_sequences(x, y, seq_len) + + model = _build_temporal_model(spec, input_dim=x_seq.shape[-1]) + model.load_state_dict(torch.load(model_dir / "temporal_model.pt", map_location="cpu")) + model.eval() + with torch.no_grad(): + y_pred = model(torch.from_numpy(x_seq.astype(np.float32))).detach().cpu().numpy() + + start_idx = seq_len - 1 + time_vals = df.get("time_ms", pd.Series(np.arange(len(df)))).to_numpy()[start_idx:] + return y_seq, y_pred, time_vals + + def main() -> None: p = argparse.ArgumentParser(description="Predict using trained model artifacts") p.add_argument("--model_dir", required=True) @@ -32,20 +71,28 @@ def main() -> None: args = p.parse_args() model_dir = Path(args.model_dir) - model = joblib.load(model_dir / "model.joblib") features = json.loads((model_dir / "features.json").read_text(encoding="utf-8")) + cfg = json.loads((model_dir / "config.json").read_text(encoding="utf-8")) df = pd.read_csv(args.csv) - x = df[features] - y_true = df["target"].to_numpy() - y_pred = model.predict(x) + + if cfg.get("temporal", False): + df = sort_by_time(df) + y_true, y_pred, time_vals = _predict_temporal(model_dir, df, features) + else: + model = joblib.load(model_dir / "model.joblib") + x = df[features] + y_true = df["target"].to_numpy() + y_pred = model.predict(x) + time_vals = df.get("time_ms", pd.Series(np.arange(len(df)))).to_numpy() + err = y_true - y_pred pct = compute_pct_error(y_true, y_pred) out_df = pd.DataFrame( { - "index": np.arange(len(df)), - "time_ms": df.get("time_ms", pd.Series(np.arange(len(df)))), + "index": np.arange(len(y_true)), + "time_ms": time_vals, "y_true": y_true, "y_pred": y_pred, "error": err, diff --git a/scripts/run_scenario.py b/scripts/run_scenario.py index df7fbd7..b4ec7d2 100644 --- a/scripts/run_scenario.py +++ b/scripts/run_scenario.py @@ -62,6 +62,9 @@ def main() -> None: "epoch_metrics_path": str(sdir / "model" / "epoch_metrics.csv"), "dataset_path": None, "epochs": epochs, + "selected_features": [], + "feature_importance_path": str(sdir / "model" / "feature_importance.json"), + "split_metadata_path": str(sdir / "model" / "split_metadata.json"), } try: @@ -93,6 +96,22 @@ def main() -> None: ) print(f"[{scenario}] training done", flush=True) + features_path = sdir / "model" / "features.json" + if features_path.exists(): + status["selected_features"] = json.loads(features_path.read_text(encoding="utf-8")) + + split_meta_path = sdir / "model" / "split_metadata.json" + if split_meta_path.exists(): + status["split_metadata"] = json.loads(split_meta_path.read_text(encoding="utf-8")) + + cfg_path = sdir / "model" / "config.json" + if cfg_path.exists(): + cfg = json.loads(cfg_path.read_text(encoding="utf-8")) + status["model_backend"] = cfg.get("model_backend") + status["logical_profile"] = cfg.get("logical_profile") + status["profile_note"] = cfg.get("profile_note") + status["seq_len"] = cfg.get("seq_len") + print(f"[{scenario}] prediction started", flush=True) run( [ diff --git a/scripts/train.py b/scripts/train.py index cd0989a..0988160 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -7,36 +7,36 @@ import joblib import numpy as np import pandas as pd +import torch from sklearn.compose import ColumnTransformer from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder, StandardScaler -from oran_sim.config import FEATURE_ORDER, get_feature_columns -from oran_sim.model import build_model +from oran_sim.config import FEATURE_ORDER, SCENARIOS +from oran_sim.feature_selection import rank_features_by_importance, select_top_k_features, write_feature_importance_artifacts +from oran_sim.model import build_model, get_model_metadata +from oran_sim.sequence_data import make_sequences +from oran_sim.temporal import TemporalSpec, build_temporal_model, predict_temporal_model, train_temporal_model +from oran_sim.splitting import build_split_metadata, chronological_split, sort_split def _metrics(y_true: np.ndarray, y_pred: np.ndarray) -> dict: mape_denom = np.clip(np.abs(y_true), 1e-6, None) + smape_denom = np.clip(np.abs(y_true) + np.abs(y_pred), 1e-6, None) + wmape_denom = np.clip(np.sum(np.abs(y_true)), 1e-6, None) + abs_err = np.abs(y_true - y_pred) return { "MAE": float(mean_absolute_error(y_true, y_pred)), "RMSE": float(np.sqrt(mean_squared_error(y_true, y_pred))), "MAPE": float(np.mean(np.abs((y_true - y_pred) / mape_denom)) * 100.0), + "sMAPE": float(np.mean(2.0 * abs_err / smape_denom) * 100.0), + "wMAPE": float(np.sum(abs_err) / wmape_denom * 100.0), "R2": float(r2_score(y_true, y_pred)), } -def main() -> None: - p = argparse.ArgumentParser(description="Train CPU-only baseline model") - p.add_argument("--csv", required=True) - p.add_argument("--out_dir", required=True) - p.add_argument("--seed", type=int, default=42) - p.add_argument("--model", default="hgb") - p.add_argument("--epochs", type=int, default=5) - p.add_argument("--feature_count", type=int, default=None) - args = p.parse_args() - - csv = Path(args.csv) +def _load_splits(csv: Path) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict]: root = csv.parent stem = csv.stem train_path = root / f"{stem}_train.csv" @@ -44,28 +44,24 @@ def main() -> None: test_path = root / f"{stem}_test.csv" if train_path.exists() and val_path.exists() and test_path.exists(): - train_df = pd.read_csv(train_path) - val_df = pd.read_csv(val_path) - test_df = pd.read_csv(test_path) + train_df = sort_split(pd.read_csv(train_path)) + val_df = sort_split(pd.read_csv(val_path)) + test_df = sort_split(pd.read_csv(test_path)) + split_meta = build_split_metadata(train_df, val_df, test_df) else: full = pd.read_csv(csv) - full = full.sample(frac=1.0, random_state=args.seed).reset_index(drop=True) - n = len(full) - a, b = int(0.6 * n), int(0.9 * n) - train_df, val_df, test_df = full.iloc[:a].copy(), full.iloc[a:b].copy(), full.iloc[b:].copy() - - if args.feature_count is not None: - desired = get_feature_columns(args.feature_count) - features = [c for c in desired if c in train_df.columns] - else: - features = [c for c in FEATURE_ORDER if c in train_df.columns] + train_df, val_df, test_df, split_meta = chronological_split(full, train_ratio=0.6, val_ratio=0.3, test_ratio=0.1) + train_df.to_csv(train_path, index=False) + val_df.to_csv(val_path, index=False) + test_df.to_csv(test_path, index=False) - if "scheduling_policy" not in features and "scheduling_policy" in train_df.columns and args.feature_count is None: - features.append("scheduling_policy") + return train_df, val_df, test_df, split_meta + + +def _prepare_tabular_preprocessor(features: list[str]) -> ColumnTransformer: num_features = [c for c in features if c != "scheduling_policy"] cat_features = [c for c in features if c == "scheduling_policy"] - - pre = ColumnTransformer( + return ColumnTransformer( transformers=[ ("num", StandardScaler(), num_features), ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_features), @@ -73,69 +69,203 @@ def main() -> None: remainder="drop", ) - model = build_model(args.model, args.seed) - pipe = Pipeline([("pre", pre), ("model", model)]) - x_train, y_train = train_df[features], train_df["target"].to_numpy() - x_val, y_val = val_df[features], val_df["target"].to_numpy() - x_test, y_test = test_df[features], test_df["target"].to_numpy() +def main() -> None: + p = argparse.ArgumentParser(description="Train scenario model") + p.add_argument("--csv", required=True) + p.add_argument("--out_dir", required=True) + p.add_argument("--seed", type=int, default=42) + p.add_argument("--model", default="hgb") + p.add_argument("--epochs", type=int, default=5) + p.add_argument("--feature_count", type=int, default=None) + args = p.parse_args() + + scenario_cfg = SCENARIOS.get(args.model) + is_temporal = bool(scenario_cfg and scenario_cfg.kind == "temporal") + + csv = Path(args.csv) + train_df, val_df, test_df, split_meta = _load_splits(csv) + + candidate_features = [c for c in FEATURE_ORDER if c in train_df.columns] + importance_df = rank_features_by_importance(train_df, candidate_features, random_state=args.seed) + feature_count = args.feature_count if args.feature_count is not None else len(candidate_features) + features = select_top_k_features(importance_df, min(feature_count, len(candidate_features))) + + model_meta = get_model_metadata(args.model) epochs = int(max(1, args.epochs)) epoch_rows: list[dict] = [] - final_metrics = None - for epoch in range(1, epochs + 1): - pipe.fit(x_train, y_train) - train_pred = pipe.predict(x_train) - val_pred = pipe.predict(x_val) - test_pred = pipe.predict(x_test) - - train_m = _metrics(y_train, train_pred) - val_m = _metrics(y_val, val_pred) - test_m = _metrics(y_test, test_pred) - final_metrics = {"train": train_m, "val": val_m, "test": test_m} - - row = { - "epoch": epoch, - "train_MAE": train_m["MAE"], - "train_RMSE": train_m["RMSE"], - "train_R2": train_m["R2"], - "val_MAE": val_m["MAE"], - "val_RMSE": val_m["RMSE"], - "val_R2": val_m["R2"], - "test_MAE": test_m["MAE"], - "test_RMSE": test_m["RMSE"], - "test_R2": test_m["R2"], - } - epoch_rows.append(row) - print( - f"epoch={epoch}/{epochs} val_MAE={val_m['MAE']:.6f} " - f"val_RMSE={val_m['RMSE']:.6f} val_R2={val_m['R2']:.6f}", - flush=True, + + if is_temporal: + requested_seq_len = scenario_cfg.seq_len or 16 + seq_len = max(2, min(requested_seq_len, len(train_df), len(val_df), len(test_df))) + + def encode(df: pd.DataFrame) -> pd.DataFrame: + x = df[features].copy() + for c in x.columns: + if not pd.api.types.is_numeric_dtype(x[c]): + x[c] = x[c].astype("category").cat.codes + return x.fillna(0.0) + + x_train_df = encode(train_df) + x_val_df = encode(val_df) + x_test_df = encode(test_df) + + scaler = StandardScaler() + x_train_np = scaler.fit_transform(x_train_df.to_numpy(dtype=float)) + x_val_np = scaler.transform(x_val_df.to_numpy(dtype=float)) + x_test_np = scaler.transform(x_test_df.to_numpy(dtype=float)) + + y_train = train_df["target"].to_numpy(dtype=float) + y_val = val_df["target"].to_numpy(dtype=float) + y_test = test_df["target"].to_numpy(dtype=float) + + x_train_seq, y_train_seq = make_sequences(x_train_np, y_train, seq_len) + x_val_seq, y_val_seq = make_sequences(x_val_np, y_val, seq_len) + x_test_seq, y_test_seq = make_sequences(x_test_np, y_test, seq_len) + + spec = TemporalSpec(architecture=scenario_cfg.architecture or "lstm", seq_len=seq_len) + if args.model == "deep-performance": + spec = TemporalSpec(architecture="lstm", seq_len=seq_len, hidden_sizes=[64, 32]) + elif args.model == "ultra-performance": + spec = TemporalSpec(architecture="lstm", seq_len=seq_len, hidden_sizes=[96, 64, 32]) + elif args.model == "xlstm-baseline": + spec = TemporalSpec(architecture="lstm", seq_len=seq_len, hidden_sizes=[64, 64]) + elif args.model == "attention-baseline": + spec = TemporalSpec(architecture="attention", seq_len=seq_len, d_model=64, nhead=4, num_layers=2, dim_feedforward=128, dropout=0.1) + elif args.model == "liquid-baseline": + spec = TemporalSpec(architecture="liquid", seq_len=seq_len, hidden_size=64, dt=0.1) + + model = build_temporal_model(input_dim=x_train_seq.shape[-1], spec=spec) + final_metrics = None + for epoch in range(1, epochs + 1): + model = train_temporal_model(model, x_train_seq, y_train_seq, epochs=1) + train_pred = predict_temporal_model(model, x_train_seq) + val_pred = predict_temporal_model(model, x_val_seq) + test_pred = predict_temporal_model(model, x_test_seq) + train_m = _metrics(y_train_seq, train_pred) + val_m = _metrics(y_val_seq, val_pred) + test_m = _metrics(y_test_seq, test_pred) + final_metrics = {"train": train_m, "val": val_m, "test": test_m} + epoch_rows.append( + { + "epoch": epoch, + "train_MAE": train_m["MAE"], + "train_RMSE": train_m["RMSE"], + "train_R2": train_m["R2"], + "val_MAE": val_m["MAE"], + "val_RMSE": val_m["RMSE"], + "val_R2": val_m["R2"], + "test_MAE": test_m["MAE"], + "test_RMSE": test_m["RMSE"], + "test_R2": test_m["R2"], + } + ) + print(f"epoch={epoch}/{epochs} val_MAE={val_m['MAE']:.6f} val_RMSE={val_m['RMSE']:.6f} val_R2={val_m['R2']:.6f}", flush=True) + + out_dir = Path(args.out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + write_feature_importance_artifacts(importance_df, json_path=Path("results") / "feature_importance.json", csv_path=Path("results") / "feature_importance.csv") + write_feature_importance_artifacts(importance_df, json_path=out_dir / "feature_importance.json", csv_path=out_dir / "feature_importance.csv") + + (out_dir / "split_metadata.json").write_text(json.dumps(split_meta, indent=2), encoding="utf-8") + torch.save(model.state_dict(), out_dir / "temporal_model.pt") + joblib.dump(scaler, out_dir / "temporal_scaler.joblib") + (out_dir / "temporal_spec.json").write_text(json.dumps(spec.__dict__, indent=2), encoding="utf-8") + (out_dir / "model.joblib").write_text("temporal-model", encoding="utf-8") + + (out_dir / "features.json").write_text(json.dumps(features, indent=2), encoding="utf-8") + (out_dir / "config.json").write_text( + json.dumps( + { + "seed": args.seed, + "model_type": args.model, + "model_backend": model_meta["backend"], + "logical_profile": model_meta["logical_profile"], + "profile_note": model_meta["profile_note"], + "target": "target", + "horizon": 1, + "features": features, + "feature_selection": "random_forest_importance_top_k", + "epochs": epochs, + "seq_len": seq_len, + "requested_seq_len": requested_seq_len, + "temporal": True, + "input_shape": [int(seq_len), int(len(features))], + "split_metadata": split_meta, + }, + indent=2, + ), + encoding="utf-8", ) - out_dir = Path(args.out_dir) - out_dir.mkdir(parents=True, exist_ok=True) - joblib.dump(pipe, out_dir / "model.joblib") - (out_dir / "features.json").write_text(json.dumps(features, indent=2), encoding="utf-8") - (out_dir / "config.json").write_text( - json.dumps( - { - "seed": args.seed, - "model_type": args.model, - "target": "target", - "horizon": 1, - "features": features, - "epochs": epochs, - }, - indent=2, - ), - encoding="utf-8", - ) - (out_dir / "metrics.json").write_text(json.dumps(final_metrics, indent=2), encoding="utf-8") - pd.DataFrame(epoch_rows).to_csv(out_dir / "epoch_metrics.csv", index=False) + (out_dir / "metrics.json").write_text(json.dumps(final_metrics, indent=2), encoding="utf-8") + pd.DataFrame(epoch_rows).to_csv(out_dir / "epoch_metrics.csv", index=False) + else: + pre = _prepare_tabular_preprocessor(features) + model = build_model(args.model, args.seed) + pipe = Pipeline([("pre", pre), ("model", model)]) + x_train, y_train = train_df[features], train_df["target"].to_numpy() + x_val, y_val = val_df[features], val_df["target"].to_numpy() + x_test, y_test = test_df[features], test_df["target"].to_numpy() + + final_metrics = None + for epoch in range(1, epochs + 1): + pipe.fit(x_train, y_train) + train_pred = pipe.predict(x_train) + val_pred = pipe.predict(x_val) + test_pred = pipe.predict(x_test) + train_m = _metrics(y_train, train_pred) + val_m = _metrics(y_val, val_pred) + test_m = _metrics(y_test, test_pred) + final_metrics = {"train": train_m, "val": val_m, "test": test_m} + epoch_rows.append( + { + "epoch": epoch, + "train_MAE": train_m["MAE"], + "train_RMSE": train_m["RMSE"], + "train_R2": train_m["R2"], + "val_MAE": val_m["MAE"], + "val_RMSE": val_m["RMSE"], + "val_R2": val_m["R2"], + "test_MAE": test_m["MAE"], + "test_RMSE": test_m["RMSE"], + "test_R2": test_m["R2"], + } + ) + print(f"epoch={epoch}/{epochs} val_MAE={val_m['MAE']:.6f} val_RMSE={val_m['RMSE']:.6f} val_R2={val_m['R2']:.6f}", flush=True) + + out_dir = Path(args.out_dir) + out_dir.mkdir(parents=True, exist_ok=True) + write_feature_importance_artifacts(importance_df, json_path=Path("results") / "feature_importance.json", csv_path=Path("results") / "feature_importance.csv") + write_feature_importance_artifacts(importance_df, json_path=out_dir / "feature_importance.json", csv_path=out_dir / "feature_importance.csv") + (out_dir / "split_metadata.json").write_text(json.dumps(split_meta, indent=2), encoding="utf-8") + joblib.dump(pipe, out_dir / "model.joblib") + (out_dir / "features.json").write_text(json.dumps(features, indent=2), encoding="utf-8") + (out_dir / "config.json").write_text( + json.dumps( + { + "seed": args.seed, + "model_type": args.model, + "model_backend": model_meta["backend"], + "logical_profile": model_meta["logical_profile"], + "profile_note": model_meta["profile_note"], + "target": "target", + "horizon": 1, + "features": features, + "feature_selection": "random_forest_importance_top_k", + "epochs": epochs, + "temporal": False, + "split_metadata": split_meta, + }, + indent=2, + ), + encoding="utf-8", + ) + (out_dir / "metrics.json").write_text(json.dumps(final_metrics, indent=2), encoding="utf-8") + pd.DataFrame(epoch_rows).to_csv(out_dir / "epoch_metrics.csv", index=False) print(f"train_rows={len(train_df)} val_rows={len(val_df)} test_rows={len(test_df)}") - print(f"metrics.val={final_metrics['val']}") - print(f"metrics.test={final_metrics['test']}") + print(f"split_meta={split_meta}") if __name__ == "__main__": diff --git a/tests/test_aggregate_report_features.py b/tests/test_aggregate_report_features.py new file mode 100644 index 0000000..c4cc769 --- /dev/null +++ b/tests/test_aggregate_report_features.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +import json +from pathlib import Path + +import pandas as pd + +import scripts.aggregate_report as aggregate_report + + +def test_aggregate_report_handles_feature_and_new_metrics(tmp_path, monkeypatch) -> None: + monkeypatch.chdir(tmp_path) + scenario = "lightweight-32" + sdir = Path("results/scenarios") / scenario + model_dir = sdir / "model" + model_dir.mkdir(parents=True, exist_ok=True) + + (sdir / "status.json").write_text( + json.dumps( + { + "scenario_name": scenario, + "success": True, + "metrics_path": str(model_dir / "metrics.json"), + "preds_path": str(sdir / "preds.csv"), + "dataset_path": "", + "split_metadata": {"train_start_index": 0, "train_end_index": 5, "val_start_index": 6, "val_end_index": 7, "test_start_index": 8, "test_end_index": 9, "train_rows": 6, "val_rows": 2, "test_rows": 2, "train_pct": 0.6, "val_pct": 0.2, "test_pct": 0.2}, + } + ), + encoding="utf-8", + ) + (model_dir / "config.json").write_text( + json.dumps( + { + "model_type": "lightweight-32", + "model_backend": "Ridge", + "logical_profile": "tabular_baseline", + "profile_note": "note", + "features": ["dl_cqi", "ul_sinr"], + "epochs": 1, + "split_metadata": {"train_start_index": 0, "train_end_index": 5, "val_start_index": 6, "val_end_index": 7, "test_start_index": 8, "test_end_index": 9, "train_rows": 6, "val_rows": 2, "test_rows": 2, "train_pct": 0.6, "val_pct": 0.2, "test_pct": 0.2}, + } + ), + encoding="utf-8", + ) + (model_dir / "metrics.json").write_text( + json.dumps({"test": {"MAE": 1, "RMSE": 1, "MAPE": 1, "sMAPE": 1, "wMAPE": 1, "R2": 0.5}, "val": {}}), + encoding="utf-8", + ) + pd.DataFrame({"epoch": [1]}).to_csv(model_dir / "epoch_metrics.csv", index=False) + pd.DataFrame( + {"time_ms": [1, 2], "y_true": [1.0, 2.0], "y_pred": [1.1, 1.9], "abs_error": [0.1, 0.1], "pct_error": [10.0, -5.0]} + ).to_csv(sdir / "preds.csv", index=False) + + Path("results").mkdir(exist_ok=True) + Path("results/feature_importance.json").write_text( + json.dumps({"feature_importance": [{"rank": 1, "feature": "dl_cqi", "importance": 0.9}]}), + encoding="utf-8", + ) + + monkeypatch.setattr(aggregate_report, "supported_scenarios", lambda: [scenario]) + aggregate_report.main() + + out = Path("results/final/report.html").read_text(encoding="utf-8") + assert "Global Feature Importance" in out + assert "sMAPE" in out + assert "wMAPE" in out + assert "Chronological Time-Series Split" in out + + assert Path("results/final/split_timeline.png").exists() + status_df = pd.read_csv("results/final/scenario_status.csv") + assert pd.notna(status_df.loc[0, "benchmark_score"]) if "benchmark_score" in status_df.columns else True diff --git a/tests/test_feature_selection.py b/tests/test_feature_selection.py new file mode 100644 index 0000000..bafa4c4 --- /dev/null +++ b/tests/test_feature_selection.py @@ -0,0 +1,67 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + +import numpy as np +import pandas as pd + +from oran_sim.config import FEATURE_ORDER +from oran_sim.feature_selection import rank_features_by_importance, select_top_k_features + + +def _toy_df(n: int = 120) -> pd.DataFrame: + rng = np.random.default_rng(42) + x1 = rng.normal(size=n) + x2 = rng.normal(size=n) + noise = rng.normal(scale=0.05, size=n) + y = 10.0 * x1 + 0.1 * x2 + noise + + data = {c: rng.normal(size=n) for c in FEATURE_ORDER if c != "scheduling_policy"} + data["dl_cqi"] = x1 + data["ul_sinr"] = x2 + data["scheduling_policy"] = ["rr", "pf"] * (n // 2) + (["rr"] if n % 2 else []) + data["target"] = y + return pd.DataFrame(data) + + +def test_top_k_feature_selection_uses_importance_ranking() -> None: + df = _toy_df() + ranked = rank_features_by_importance(df, FEATURE_ORDER, random_state=42) + top2 = select_top_k_features(ranked, 2) + assert "dl_cqi" in top2 + + +def test_feature_importance_file_generation_and_selected_features_in_status(tmp_path) -> None: + csv_path = tmp_path / "traffic_data_lightweight-32.csv" + df = _toy_df(90) + df.to_csv(csv_path, index=False) + df.iloc[:54].to_csv(tmp_path / "traffic_data_lightweight-32_train.csv", index=False) + df.iloc[54:81].to_csv(tmp_path / "traffic_data_lightweight-32_val.csv", index=False) + df.iloc[81:].to_csv(tmp_path / "traffic_data_lightweight-32_test.csv", index=False) + + subprocess.run( + [ + sys.executable, + "-m", + "scripts.run_scenario", + "--scenario", + "lightweight-32", + "--dataset", + str(csv_path), + ], + check=True, + ) + + sdir = Path("results/scenarios/lightweight-32") + status = json.loads((sdir / "status.json").read_text(encoding="utf-8")) + assert status["success"] is True + assert len(status.get("selected_features", [])) == 10 + assert status.get("model_backend") == "Ridge" + assert status.get("logical_profile") == "tabular_baseline" + assert (sdir / "model" / "feature_importance.json").exists() + assert (sdir / "model" / "split_metadata.json").exists() + assert "split_metadata" in status + assert Path("results/feature_importance.json").exists() diff --git a/tests/test_metrics_stability.py b/tests/test_metrics_stability.py new file mode 100644 index 0000000..ebc735b --- /dev/null +++ b/tests/test_metrics_stability.py @@ -0,0 +1,14 @@ +from __future__ import annotations + +import numpy as np + +from scripts.train import _metrics + + +def test_metrics_include_smape_wmape() -> None: + y_true = np.array([0.01, 0.02, 1.0, 2.0]) + y_pred = np.array([0.02, 0.01, 1.1, 1.8]) + m = _metrics(y_true, y_pred) + assert "sMAPE" in m and "wMAPE" in m + assert np.isfinite(m["sMAPE"]) + assert np.isfinite(m["wMAPE"]) diff --git a/tests/test_scenario_model_registry.py b/tests/test_scenario_model_registry.py index f5f34a1..df926c2 100644 --- a/tests/test_scenario_model_registry.py +++ b/tests/test_scenario_model_registry.py @@ -1,15 +1,28 @@ from __future__ import annotations +import pytest + from oran_sim.config import supported_scenarios -from oran_sim.model import SCENARIO_MODEL_SPECS, build_model +from oran_sim.model import SCENARIO_MODEL_SPECS, build_model, get_model_metadata def test_registry_has_entry_for_each_scenario() -> None: assert set(supported_scenarios()) == set(SCENARIO_MODEL_SPECS.keys()) -def test_build_model_supports_scenario_and_legacy_aliases() -> None: +def test_build_model_supports_tabular_and_legacy_aliases() -> None: assert build_model("lightweight-32", 42).__class__.__name__ == "Ridge" assert build_model("balanced-small", 42).__class__.__name__ == "HistGradientBoostingRegressor" assert build_model("ridge", 42).__class__.__name__ == "Ridge" assert build_model("hgb", 42).__class__.__name__ == "HistGradientBoostingRegressor" + + +def test_temporal_scenario_not_built_by_tabular_factory() -> None: + with pytest.raises(ValueError): + build_model("attention-baseline", 42) + + +def test_metadata_exposes_real_backend_profile() -> None: + meta = get_model_metadata("attention-baseline") + assert meta["backend"].startswith("Torch") + assert meta["logical_profile"] == "temporal_sequence_model" diff --git a/tests/test_sequence_data.py b/tests/test_sequence_data.py new file mode 100644 index 0000000..31f749f --- /dev/null +++ b/tests/test_sequence_data.py @@ -0,0 +1,20 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd + +from oran_sim.sequence_data import make_sequences, sort_by_time + + +def test_make_sequences_and_alignment() -> None: + x = np.arange(20, dtype=float).reshape(10, 2) + y = np.arange(10, dtype=float) + xs, ys = make_sequences(x, y, seq_len=4) + assert xs.shape == (7, 4, 2) + assert ys.tolist() == [3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0] + + +def test_temporal_sort_preserves_time_order() -> None: + df = pd.DataFrame({"time_ms": [30, 10, 20], "target": [3, 1, 2]}) + out = sort_by_time(df) + assert out["time_ms"].tolist() == [10, 20, 30] diff --git a/tests/test_splitting.py b/tests/test_splitting.py new file mode 100644 index 0000000..807fe97 --- /dev/null +++ b/tests/test_splitting.py @@ -0,0 +1,26 @@ +from __future__ import annotations + +import pandas as pd + +from oran_sim.splitting import chronological_split + + +def test_chronological_split_order_and_counts() -> None: + df = pd.DataFrame({"time_ms": [5, 1, 4, 2, 3], "target": [50, 10, 40, 20, 30]}) + train, val, test, meta = chronological_split(df, train_ratio=0.6, val_ratio=0.2, test_ratio=0.2) + + assert train["time_ms"].tolist() == [1, 2, 3] + assert val["time_ms"].tolist() == [4] + assert test["time_ms"].tolist() == [5] + assert meta["train_rows"] == 3 + assert meta["val_rows"] == 1 + assert meta["test_rows"] == 1 + + +def test_chronological_split_no_overlap() -> None: + df = pd.DataFrame({"time_ms": list(range(10)), "target": list(range(10))}) + train, val, test, meta = chronological_split(df) + assert train["time_ms"].max() < val["time_ms"].min() + assert val["time_ms"].max() < test["time_ms"].min() + assert meta["train_end_index"] < meta["val_start_index"] + assert meta["val_end_index"] < meta["test_start_index"] diff --git a/tests/test_temporal_smoke.py b/tests/test_temporal_smoke.py new file mode 100644 index 0000000..da5216c --- /dev/null +++ b/tests/test_temporal_smoke.py @@ -0,0 +1,69 @@ +from __future__ import annotations + +import json +import subprocess +import sys + +import numpy as np +import pandas as pd + +from oran_sim.config import FEATURE_ORDER + + +def test_temporal_scenario_train_predict_smoke(tmp_path) -> None: + n = 120 + rng = np.random.default_rng(123) + data = {c: rng.normal(size=n) for c in FEATURE_ORDER if c != "scheduling_policy"} + data["scheduling_policy"] = rng.choice(["rr", "pf"], size=n) + data["time_ms"] = np.arange(n) + data["target"] = 0.7 * data["dl_cqi"] + 0.2 * data["ul_sinr"] + rng.normal(scale=0.05, size=n) + df = pd.DataFrame(data) + + csv = tmp_path / "toy.csv" + df.to_csv(csv, index=False) + df.iloc[:72].to_csv(tmp_path / "toy_train.csv", index=False) + df.iloc[72:108].to_csv(tmp_path / "toy_val.csv", index=False) + df.iloc[108:].to_csv(tmp_path / "toy_test.csv", index=False) + + out_dir = tmp_path / "model" + subprocess.run( + [ + sys.executable, + "-m", + "scripts.train", + "--csv", + str(csv), + "--out_dir", + str(out_dir), + "--model", + "attention-baseline", + "--epochs", + "1", + "--feature_count", + "8", + ], + check=True, + ) + + cfg = json.loads((out_dir / "config.json").read_text(encoding="utf-8")) + assert cfg["temporal"] is True + assert cfg["seq_len"] <= 16 + assert cfg["seq_len"] >= 2 + + pred_path = tmp_path / "preds.csv" + subprocess.run( + [ + sys.executable, + "-m", + "scripts.predict", + "--model_dir", + str(out_dir), + "--csv", + str(tmp_path / "toy_test.csv"), + "--output", + str(pred_path), + ], + check=True, + ) + preds = pd.read_csv(pred_path) + assert not preds.empty diff --git a/tests/test_train_feature_count.py b/tests/test_train_feature_count.py index 8c60fff..914a657 100644 --- a/tests/test_train_feature_count.py +++ b/tests/test_train_feature_count.py @@ -45,6 +45,6 @@ def test_train_respects_feature_count(tmp_path) -> None: epoch_df = pd.read_csv(out_dir / "epoch_metrics.csv") assert len(features) == 10 - assert features == FEATURE_ORDER[:10] + assert set(features).issubset(set(FEATURE_ORDER)) assert cfg["epochs"] == 20 assert len(epoch_df) == 20