From 306c94c9f4958eceec4d733ed7db4b22be618b7b Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sun, 31 May 2026 13:21:13 +0200 Subject: [PATCH 1/2] Fix E9: move default XGBoost hyperparameters into bundled JSON files Replace hard-coded Python dicts XGB_REGRESSION_HYPERPARAMETERS and XGB_CLASSIFICATION_HYPERPARAMETERS with versioned JSON files under src/eventdisplay_ml/configs/. The hyper_parameters module now always loads from a file (bundled default or user --hyperparameter_config), making the full training configuration auditable and reproducible. Also add missing docs/changes fragments for the unit-test suite and this fix. Closes #66 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- docs/changes/66.feature.md | 5 ++ docs/changes/unit-tests.maintenance.md | 3 + pyproject.toml | 3 + ...efault_hyperparameters_classification.json | 18 ++++++ .../default_hyperparameters_stereo.json | 19 ++++++ src/eventdisplay_ml/hyper_parameters.py | 62 +++++-------------- tests/test_hyper_parameters.py | 7 ++- 7 files changed, 66 insertions(+), 51 deletions(-) create mode 100644 docs/changes/66.feature.md create mode 100644 docs/changes/unit-tests.maintenance.md create mode 100644 src/eventdisplay_ml/configs/default_hyperparameters_classification.json create mode 100644 src/eventdisplay_ml/configs/default_hyperparameters_stereo.json diff --git a/docs/changes/66.feature.md b/docs/changes/66.feature.md new file mode 100644 index 0000000..63519c4 --- /dev/null +++ b/docs/changes/66.feature.md @@ -0,0 +1,5 @@ +Move default XGBoost hyperparameters from hard-coded Python dicts into versioned JSON files +(`src/eventdisplay_ml/configs/default_hyperparameters_stereo.json` and +`default_hyperparameters_classification.json`). The `hyper_parameters` module now always loads +from a file — either the bundled default or a user-supplied `--hyperparameter_config` path — +so the full training configuration is captured in a single auditable artifact. diff --git a/docs/changes/unit-tests.maintenance.md b/docs/changes/unit-tests.maintenance.md new file mode 100644 index 0000000..44c91be --- /dev/null +++ b/docs/changes/unit-tests.maintenance.md @@ -0,0 +1,3 @@ +Add comprehensive unit-test suite covering all production modules (`config`, `data_processing`, +`diagnostic_utils`, `evaluate`, `features`, `geomag`, `hyper_parameters`, `models`, `optimize_classification`, +`utils`). Overall line coverage exceeds 90 %. diff --git a/pyproject.toml b/pyproject.toml index de4ca5b..ddbfa9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -81,6 +81,9 @@ package-dir = { "" = "src" } where = [ "src" ] exclude = [ "eventdisplay_ml._dev_version.*" ] +[tool.setuptools.package-data] +"eventdisplay_ml" = ["configs/*.json"] + [tool.setuptools_scm] write_to = "src/eventdisplay_ml/_version.py" diff --git a/src/eventdisplay_ml/configs/default_hyperparameters_classification.json b/src/eventdisplay_ml/configs/default_hyperparameters_classification.json new file mode 100644 index 0000000..75c934c --- /dev/null +++ b/src/eventdisplay_ml/configs/default_hyperparameters_classification.json @@ -0,0 +1,18 @@ +{ + "xgboost": { + "model": null, + "hyper_parameters": { + "objective": "binary:logistic", + "eval_metric": ["logloss", "auc"], + "n_estimators": 5000, + "early_stopping_rounds": 100, + "max_depth": 4, + "learning_rate": 0.02, + "gamma": 0.2, + "subsample": 0.8, + "colsample_bytree": 0.6, + "random_state": null, + "n_jobs": 96 + } + } +} diff --git a/src/eventdisplay_ml/configs/default_hyperparameters_stereo.json b/src/eventdisplay_ml/configs/default_hyperparameters_stereo.json new file mode 100644 index 0000000..3876217 --- /dev/null +++ b/src/eventdisplay_ml/configs/default_hyperparameters_stereo.json @@ -0,0 +1,19 @@ +{ + "xgboost": { + "model": null, + "hyper_parameters": { + "n_estimators": 10000, + "early_stopping_rounds": 50, + "eval_metric": ["rmse"], + "learning_rate": 0.02, + "max_depth": 7, + "min_child_weight": 10.0, + "objective": "reg:squarederror", + "n_jobs": 8, + "random_state": null, + "tree_method": "hist", + "subsample": 0.7, + "colsample_bytree": 0.7 + } + } +} diff --git a/src/eventdisplay_ml/hyper_parameters.py b/src/eventdisplay_ml/hyper_parameters.py index dc52ffe..e03a577 100644 --- a/src/eventdisplay_ml/hyper_parameters.py +++ b/src/eventdisplay_ml/hyper_parameters.py @@ -2,48 +2,12 @@ import json import logging +from importlib.resources import files +from pathlib import Path _logger = logging.getLogger(__name__) - -XGB_REGRESSION_HYPERPARAMETERS = { - "xgboost": { - "model": None, - "hyper_parameters": { - "n_estimators": 10000, - "early_stopping_rounds": 50, - "eval_metric": ["rmse"], - "learning_rate": 0.02, # Shrinkage - "max_depth": 7, - "min_child_weight": 10.0, # Equivalent to MinNodeSize=1.0% for XGBoost - "objective": "reg:squarederror", - "n_jobs": 8, - "random_state": None, - "tree_method": "hist", - "subsample": 0.7, # Default sensible value - "colsample_bytree": 0.7, # Default sensible value - }, - } -} - -XGB_CLASSIFICATION_HYPERPARAMETERS = { - "xgboost": { - "model": None, - "hyper_parameters": { - "objective": "binary:logistic", - "eval_metric": ["logloss", "auc"], - "n_estimators": 5000, - "early_stopping_rounds": 100, - "max_depth": 4, - "learning_rate": 0.02, - "gamma": 0.2, - "subsample": 0.8, - "colsample_bytree": 0.6, - "random_state": None, - "n_jobs": 96, - }, - } -} +_CONFIGS_DIR = files("eventdisplay_ml") / "configs" PRE_CUTS_REGRESSION = [] @@ -69,23 +33,25 @@ def hyper_parameters(analysis_type, config_file=None): def regression_hyper_parameters(config_file=None): """Get hyperparameters for XGBoost regression model.""" - if config_file: - return _load_hyper_parameters_from_file(config_file) - _logger.info(f"Default hyperparameters: {XGB_REGRESSION_HYPERPARAMETERS}") - return XGB_REGRESSION_HYPERPARAMETERS + path = ( + Path(config_file) if config_file else _CONFIGS_DIR / "default_hyperparameters_stereo.json" + ) + return _load_hyper_parameters_from_file(path) def classification_hyper_parameters(config_file=None): """Get hyperparameters for XGBoost classification model.""" - if config_file: - return _load_hyper_parameters_from_file(config_file) - _logger.info(f"Default hyperparameters: {XGB_CLASSIFICATION_HYPERPARAMETERS}") - return XGB_CLASSIFICATION_HYPERPARAMETERS + path = ( + Path(config_file) + if config_file + else _CONFIGS_DIR / "default_hyperparameters_classification.json" + ) + return _load_hyper_parameters_from_file(path) def _load_hyper_parameters_from_file(config_file): """Load hyperparameters from a JSON file.""" - with open(config_file) as f: + with config_file.open() as f: hyperparameters = json.load(f) _logger.info(f"Loaded hyperparameters from {config_file}: {hyperparameters}") return hyperparameters diff --git a/tests/test_hyper_parameters.py b/tests/test_hyper_parameters.py index 967d223..fa0348a 100644 --- a/tests/test_hyper_parameters.py +++ b/tests/test_hyper_parameters.py @@ -60,11 +60,12 @@ def test_classification_hyper_parameters_has_expected_keys(): assert key in hp -def test_regression_hyper_parameters_returns_same_default_dict_object(): - """regression_hyper_parameters returns the module-level default dict (no defensive copy).""" +def test_regression_hyper_parameters_default_loads_bundled_json(): + """Default call loads the bundled JSON file and returns a fresh dict each time.""" r1 = regression_hyper_parameters() r2 = regression_hyper_parameters() - assert r1 is r2 + assert r1 == r2 + assert r1 is not r2 # --------------------------------------------------------------------------- From aa8168aecdbc260470f5a72bb5f7ece866ba0d27 Mon Sep 17 00:00:00 2001 From: Gernot Maier Date: Sun, 31 May 2026 13:26:10 +0200 Subject: [PATCH 2/2] correct changelog naming --- docs/changes/{352.bugfix.md => 63.bugfix.md} | 0 docs/changes/{unit-tests.maintenance.md => 63.maintenance.md} | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename docs/changes/{352.bugfix.md => 63.bugfix.md} (100%) rename docs/changes/{unit-tests.maintenance.md => 63.maintenance.md} (100%) diff --git a/docs/changes/352.bugfix.md b/docs/changes/63.bugfix.md similarity index 100% rename from docs/changes/352.bugfix.md rename to docs/changes/63.bugfix.md diff --git a/docs/changes/unit-tests.maintenance.md b/docs/changes/63.maintenance.md similarity index 100% rename from docs/changes/unit-tests.maintenance.md rename to docs/changes/63.maintenance.md