vtavakkoli · vtavakkoli · Mar 11, 2026 · Mar 11, 2026 · Mar 11, 2026
diff --git a/generate_data.py b/generate_data.py
@@ -10,6 +10,7 @@
 
 from oran_sim.config import FEATURE_ORDER
 from oran_sim.data import load_timeseries_from_kpm, write_json
+from oran_sim.splitting import chronological_split
 
 
 def _build_target(df: pd.DataFrame, target: str, horizon_steps: int) -> pd.DataFrame:
@@ -52,16 +53,6 @@ def _expand_to_exact_rows(df: pd.DataFrame, steps: int, seed: int) -> tuple[pd.D
     return expanded.iloc[:steps].copy(), logs
 
 
-def _det_split(df: pd.DataFrame, seed: int) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
-    shuffled = df.sample(frac=1.0, random_state=seed).reset_index(drop=True)
-    n = len(shuffled)
-    n_train = int(n * 0.6)
-    n_val = int(n * 0.3)
-    n_test = n - n_train - n_val
-    train = shuffled.iloc[:n_train].copy()
-    val = shuffled.iloc[n_train : n_train + n_val].copy()
-    test = shuffled.iloc[n_train + n_val : n_train + n_val + n_test].copy()
-    return train, val, test
 
 
 def main() -> None:
@@ -85,7 +76,7 @@ def main() -> None:
     base = base[keep_cols]
 
     exact_df, sampling_logs = _expand_to_exact_rows(base, args.steps, args.seed)
-    train_df, val_df, test_df = _det_split(exact_df, args.seed)
+    train_df, val_df, test_df, split_meta = chronological_split(exact_df, train_ratio=0.6, val_ratio=0.3, test_ratio=0.1)
 
     output = Path(args.output)
     output.parent.mkdir(parents=True, exist_ok=True)
@@ -110,6 +101,7 @@ def main() -> None:
         "features_present": [c for c in FEATURE_ORDER if c in exact_df.columns],
         "target_definition": {"name": args.target, "horizon_steps": int(args.horizon_steps)},
         "sampling_logs": sampling_logs,
+        "split_metadata": split_meta,
     }
     summary_path = Path("results/data/traffic_data_summary.json")
     write_json(summary_path, summary)

diff --git a/oran_sim/splitting.py b/oran_sim/splitting.py
@@ -0,0 +1,79 @@
+from __future__ import annotations
+
+import pandas as pd
+
+
+def _sorted(df: pd.DataFrame, time_col: str = "time_ms") -> pd.DataFrame:
+    if time_col in df.columns:
+        return df.sort_values(time_col).reset_index(drop=True)
+    return df.reset_index(drop=True)
+
+
+def build_split_metadata(
+    train_df: pd.DataFrame,
+    val_df: pd.DataFrame,
+    test_df: pd.DataFrame,
+    *,
+    time_col: str = "time_ms",
+) -> dict:
+    n_train, n_val, n_test = len(train_df), len(val_df), len(test_df)
+    total = max(n_train + n_val + n_test, 1)
+
+    def _time_bounds(df: pd.DataFrame) -> tuple[float | None, float | None]:
+        if time_col not in df.columns or df.empty:
+            return None, None
+        return float(df[time_col].iloc[0]), float(df[time_col].iloc[-1])
+
+    tr_s, tr_e = _time_bounds(train_df)
+    va_s, va_e = _time_bounds(val_df)
+    te_s, te_e = _time_bounds(test_df)
+
+    return {
+        "split_type": "chronological",
+        "train_start_index": 0,
+        "train_end_index": max(n_train - 1, -1),
+        "val_start_index": n_train,
+        "val_end_index": n_train + n_val - 1,
+        "test_start_index": n_train + n_val,
+        "test_end_index": n_train + n_val + n_test - 1,
+        "train_start": tr_s,
+        "train_end": tr_e,
+        "val_start": va_s,
+        "val_end": va_e,
+        "test_start": te_s,
+        "test_end": te_e,
+        "train_rows": n_train,
+        "val_rows": n_val,
+        "test_rows": n_test,
+        "train_pct": n_train / total,
+        "val_pct": n_val / total,
+        "test_pct": n_test / total,
+    }
+
+
+def chronological_split(
+    df: pd.DataFrame,
+    train_ratio: float = 0.6,
+    val_ratio: float = 0.2,
+    test_ratio: float = 0.2,
+    *,
+    time_col: str = "time_ms",
+) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, dict]:
+    if abs((train_ratio + val_ratio + test_ratio) - 1.0) > 1e-9:
+        raise ValueError("train_ratio + val_ratio + test_ratio must equal 1")
+
+    ordered = _sorted(df, time_col=time_col)
+    n = len(ordered)
+    n_train = int(n * train_ratio)
+    n_val = int(n * val_ratio)
+    n_test = n - n_train - n_val
+
+    train = ordered.iloc[:n_train].copy()
+    val = ordered.iloc[n_train : n_train + n_val].copy()
+    test = ordered.iloc[n_train + n_val : n_train + n_val + n_test].copy()
+    metadata = build_split_metadata(train, val, test, time_col=time_col)
+    return train, val, test, metadata
+
+
+def sort_split(df: pd.DataFrame, time_col: str = "time_ms") -> pd.DataFrame:
+    return _sorted(df, time_col=time_col)
diff --git a/scripts/aggregate_report.py b/scripts/aggregate_report.py
@@ -44,20 +44,91 @@ def _prepare_preds_for_plot(preds: pd.DataFrame) -> pd.DataFrame:
         return preds.sort_values("time_ms").reset_index(drop=True)
     return preds.reset_index(drop=True)
 
-def _build_timeseries_chart(preds: pd.DataFrame, scenario: str) -> str:
+def _build_timeseries_chart(preds: pd.DataFrame, scenario: str, split_meta: dict | None = None) -> str:
     sorted_preds = _prepare_preds_for_plot(preds)
     fig, ax = plt.subplots(figsize=(9, 3))
-    x = sorted_preds["time_ms"] if "time_ms" in sorted_preds.columns else sorted_preds.index
-    ax.plot(x.values, sorted_preds["y_true"].values, label="y_true", linewidth=1.6)
-    ax.plot(x.values, sorted_preds["y_pred"].values, label="y_pred", linewidth=1.2)
-    ax.set_title(f"{scenario}: y_true/y_pred vs timestamp")
-    ax.set_xlabel("timestamp")
-    ax.legend(loc="best")
+
+    if split_meta:
+        tr_s = split_meta.get("train_start_index", 0)
+        tr_e = split_meta.get("train_end_index", -1)
+        va_s = split_meta.get("val_start_index", 0)
+        va_e = split_meta.get("val_end_index", -1)
+        te_s = split_meta.get("test_start_index", 0)
+        te_e = split_meta.get("test_end_index", len(sorted_preds) - 1)
+
+        full_x = np.arange(int(max(te_e + 1, len(sorted_preds))))
+        y_true_full = np.full(full_x.shape, np.nan, dtype=float)
+        y_pred_full = np.full(full_x.shape, np.nan, dtype=float)
+        test_slice = slice(int(te_s), int(min(te_s + len(sorted_preds), len(full_x))))
+        n_fill = test_slice.stop - test_slice.start
+        y_true_full[test_slice] = sorted_preds["y_true"].values[:n_fill]
+        y_pred_full[test_slice] = sorted_preds["y_pred"].values[:n_fill]
+
+        ax.axvspan(tr_s, tr_e, alpha=0.15, color="#2ca02c", label="Train region")
+        ax.axvspan(va_s, va_e, alpha=0.15, color="#ff7f0e", label="Validation region")
+        ax.axvspan(te_s, te_e, alpha=0.15, color="#1f77b4", label="Test region")
+        ax.axvline(tr_e, color="#2ca02c", linestyle="--", linewidth=1.2)
+        ax.axvline(va_e, color="#ff7f0e", linestyle="--", linewidth=1.2)
+
+        ax.plot(full_x, y_true_full, label="y_true (test window)", linewidth=1.4, color="black")
+        ax.plot(full_x, y_pred_full, label="y_pred (test window)", linewidth=1.1, color="red")
+        ax.set_xlabel("global row index")
+    else:
+        x = sorted_preds["time_ms"] if "time_ms" in sorted_preds.columns else sorted_preds.index
+        ax.plot(x.values, sorted_preds["y_true"].values, label="y_true", linewidth=1.6)
+        ax.plot(x.values, sorted_preds["y_pred"].values, label="y_pred", linewidth=1.2)
+        ax.set_xlabel("timestamp")
+
+    ax.set_title(f"{scenario}: predictions with chronological split boundaries")
+    ax.legend(loc="best", fontsize=8)
+    img = _fig_to_base64(fig)
+    plt.close(fig)
+    return img
+
+
+def _build_split_timeline(split_meta: dict, out_path: Path) -> str:
+    fig, ax = plt.subplots(figsize=(9, 1.8))
+    tr = int(split_meta.get("train_rows", 0))
+    va = int(split_meta.get("val_rows", 0))
+    te = int(split_meta.get("test_rows", 0))
+    total = max(tr + va + te, 1)
+
+    left = 0
+    ax.barh([0], [tr], left=left, color="#2ca02c", label="Train")
+    left += tr
+    ax.barh([0], [va], left=left, color="#ff7f0e", label="Validation")
+    left += va
+    ax.barh([0], [te], left=left, color="#1f77b4", label="Test")
+
+    ax.set_xlim(0, total)
+    ax.set_yticks([])
+    ax.set_xlabel("Row index timeline")
+    ax.set_title("| Train | Validation | Test | (chronological split)")
+    ax.legend(loc="upper center", ncol=3)
+    out_path.parent.mkdir(parents=True, exist_ok=True)
+    fig.savefig(out_path, dpi=140, bbox_inches="tight")
     img = _fig_to_base64(fig)
     plt.close(fig)
     return img
 
 
+def _split_table(split_meta: dict) -> pd.DataFrame:
+    rows = []
+    for split_name, pfx in [("Train", "train"), ("Validation", "val"), ("Test", "test")]:
+        rows.append(
+            {
+                "Split": split_name,
+                "Start index": split_meta.get(f"{pfx}_start_index"),
+                "End index": split_meta.get(f"{pfx}_end_index"),
+                "Start time": split_meta.get(f"{pfx}_start"),
+                "End time": split_meta.get(f"{pfx}_end"),
+                "Rows": split_meta.get(f"{pfx}_rows"),
+                "Percentage": f"{100.0 * float(split_meta.get(f'{pfx}_pct', 0.0)):.1f}%",
+            }
+        )
+    return pd.DataFrame(rows)
+
+
 def _build_benchmark_table(df: pd.DataFrame) -> pd.DataFrame:
     if df.empty:
         return pd.DataFrame()
@@ -171,6 +242,7 @@ def main() -> None:
             "profile_note": status.get("profile_note"),
             "selected_features": ", ".join(status.get("selected_features", [])),
             "seq_len": status.get("seq_len"),
+            "split_metadata": status.get("split_metadata"),
         }
 
         metrics_path = Path(row["metrics_path"])
@@ -188,6 +260,7 @@ def main() -> None:
             row["num_features"] = len(cfg.get("features", []))
             row["epochs"] = cfg.get("epochs", row.get("epochs"))
             row["seq_len"] = cfg.get("seq_len", row.get("seq_len"))
+            row["split_metadata"] = cfg.get("split_metadata", row.get("split_metadata"))
         else:
             row["model_type"] = None
 
@@ -210,7 +283,7 @@ def main() -> None:
             row["mean_abs_error"] = float(preds["abs_error"].mean())
             row["mean_abs_pct_error"] = float(preds["pct_error"].abs().mean())
 
-            model_chart = _build_timeseries_chart(preds, scenario)
+            model_chart = _build_timeseries_chart(preds, scenario, row.get("split_metadata"))
             model_chart_sections.append(f"<h3>{scenario}</h3><img src='data:image/png;base64,{model_chart}'/>")
 
         dpath_str = str(row.get("dataset_path", "")).strip()
@@ -274,6 +347,17 @@ def main() -> None:
         comp_df = comp_df.merge(benchmark_df[["scenario", "benchmark_score"]], on="scenario", how="left")
     comp_df.to_csv(out_dir / "scenario_status.csv", index=False)
 
+    split_meta = None
+    for row in rows:
+        if row.get("split_metadata"):
+            split_meta = row["split_metadata"]
+            break
+    split_table_html = "<p>No split metadata available.</p>"
+    split_timeline_html = "<p>No split timeline available.</p>"
+    if split_meta:
+        split_table_html = _split_table(split_meta).to_html(index=False)
+        split_timeline_html = f"<img src='data:image/png;base64,{_build_split_timeline(split_meta, out_dir / 'split_timeline.png')}'/>"
+
     feature_importance_path = Path("results/feature_importance.json")
     feature_importance_html = "<p>Feature importance artifact not found.</p>"
     if not feature_importance_path.exists():
@@ -286,15 +370,6 @@ def main() -> None:
         if not feature_importance_df.empty:
             feature_importance_html = feature_importance_df.to_html(index=False)
 
-
-    feature_importance_path = Path("results/feature_importance.json")
-    feature_importance_html = "<p>Feature importance artifact not found.</p>"
-    if feature_importance_path.exists():
-        feature_importance_payload = json.loads(feature_importance_path.read_text(encoding="utf-8"))
-        feature_importance_df = pd.DataFrame(feature_importance_payload.get("feature_importance", []))
-        if not feature_importance_df.empty:
-            feature_importance_html = feature_importance_df.to_html(index=False)
-
     html = f"""
     <html><body>
     <h1>KPM Final Report</h1>
@@ -306,6 +381,11 @@ def main() -> None:
     {table_df.to_html(index=False)}
     <h2>Benchmark Leaderboard</h2>
     {benchmark_df.to_html(index=False) if not benchmark_df.empty else '<p>No benchmark-ready metrics available.</p>'}
+    <h2>Chronological Time-Series Split</h2>
+    <p>All experiments use contiguous chronological blocks only: | Train | Validation | Test |. No random shuffling is used in the official benchmark path.</p>
+    {split_table_html}
+    {split_timeline_html}
+    <p>Temporal windows are built from past values only; validation/test windows do not use future labels. Feature importance is computed on the training split only.</p>
     <h2>Global Feature Importance (train-only Random Forest)</h2>
     {feature_importance_html}
     <h2>Model Predictions vs Ground Truth (timestamp axis)</h2>

diff --git a/scripts/run_scenario.py b/scripts/run_scenario.py
@@ -64,6 +64,7 @@ def main() -> None:
         "epochs": epochs,
         "selected_features": [],
         "feature_importance_path": str(sdir / "model" / "feature_importance.json"),
+        "split_metadata_path": str(sdir / "model" / "split_metadata.json"),
     }
 
     try:
@@ -99,6 +100,10 @@ def main() -> None:
         if features_path.exists():
             status["selected_features"] = json.loads(features_path.read_text(encoding="utf-8"))
 
+        split_meta_path = sdir / "model" / "split_metadata.json"
+        if split_meta_path.exists():
+            status["split_metadata"] = json.loads(split_meta_path.read_text(encoding="utf-8"))
+
         cfg_path = sdir / "model" / "config.json"
         if cfg_path.exists():
             cfg = json.loads(cfg_path.read_text(encoding="utf-8"))