Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
3 changes: 3 additions & 0 deletions docs/changes/63.maintenance.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add comprehensive unit-test suite covering all production modules (`config`, `data_processing`,
`diagnostic_utils`, `evaluate`, `features`, `geomag`, `hyper_parameters`, `models`, `optimize_classification`,
`utils`). Overall line coverage exceeds 90 %.
5 changes: 5 additions & 0 deletions docs/changes/66.feature.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Move default XGBoost hyperparameters from hard-coded Python dicts into versioned JSON files
(`src/eventdisplay_ml/configs/default_hyperparameters_stereo.json` and
`default_hyperparameters_classification.json`). The `hyper_parameters` module now always loads
from a file — either the bundled default or a user-supplied `--hyperparameter_config` path —
so the full training configuration is captured in a single auditable artifact.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ package-dir = { "" = "src" }
where = [ "src" ]
exclude = [ "eventdisplay_ml._dev_version.*" ]

[tool.setuptools.package-data]
"eventdisplay_ml" = ["configs/*.json"]

[tool.setuptools_scm]
write_to = "src/eventdisplay_ml/_version.py"

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"xgboost": {
"model": null,
"hyper_parameters": {
"objective": "binary:logistic",
"eval_metric": ["logloss", "auc"],
"n_estimators": 5000,
"early_stopping_rounds": 100,
"max_depth": 4,
"learning_rate": 0.02,
"gamma": 0.2,
"subsample": 0.8,
"colsample_bytree": 0.6,
"random_state": null,
"n_jobs": 96
}
}
}
19 changes: 19 additions & 0 deletions src/eventdisplay_ml/configs/default_hyperparameters_stereo.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"xgboost": {
"model": null,
"hyper_parameters": {
"n_estimators": 10000,
"early_stopping_rounds": 50,
"eval_metric": ["rmse"],
"learning_rate": 0.02,
"max_depth": 7,
"min_child_weight": 10.0,
"objective": "reg:squarederror",
"n_jobs": 8,
"random_state": null,
"tree_method": "hist",
"subsample": 0.7,
"colsample_bytree": 0.7
}
}
}
62 changes: 14 additions & 48 deletions src/eventdisplay_ml/hyper_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,48 +2,12 @@

import json
import logging
from importlib.resources import files
from pathlib import Path

_logger = logging.getLogger(__name__)


XGB_REGRESSION_HYPERPARAMETERS = {
"xgboost": {
"model": None,
"hyper_parameters": {
"n_estimators": 10000,
"early_stopping_rounds": 50,
"eval_metric": ["rmse"],
"learning_rate": 0.02, # Shrinkage
"max_depth": 7,
"min_child_weight": 10.0, # Equivalent to MinNodeSize=1.0% for XGBoost
"objective": "reg:squarederror",
"n_jobs": 8,
"random_state": None,
"tree_method": "hist",
"subsample": 0.7, # Default sensible value
"colsample_bytree": 0.7, # Default sensible value
},
}
}

XGB_CLASSIFICATION_HYPERPARAMETERS = {
"xgboost": {
"model": None,
"hyper_parameters": {
"objective": "binary:logistic",
"eval_metric": ["logloss", "auc"],
"n_estimators": 5000,
"early_stopping_rounds": 100,
"max_depth": 4,
"learning_rate": 0.02,
"gamma": 0.2,
"subsample": 0.8,
"colsample_bytree": 0.6,
"random_state": None,
"n_jobs": 96,
},
}
}
_CONFIGS_DIR = files("eventdisplay_ml") / "configs"

PRE_CUTS_REGRESSION = []

Expand All @@ -69,23 +33,25 @@ def hyper_parameters(analysis_type, config_file=None):

def regression_hyper_parameters(config_file=None):
"""Get hyperparameters for XGBoost regression model."""
if config_file:
return _load_hyper_parameters_from_file(config_file)
_logger.info(f"Default hyperparameters: {XGB_REGRESSION_HYPERPARAMETERS}")
return XGB_REGRESSION_HYPERPARAMETERS
path = (
Path(config_file) if config_file else _CONFIGS_DIR / "default_hyperparameters_stereo.json"
)
return _load_hyper_parameters_from_file(path)


def classification_hyper_parameters(config_file=None):
"""Get hyperparameters for XGBoost classification model."""
if config_file:
return _load_hyper_parameters_from_file(config_file)
_logger.info(f"Default hyperparameters: {XGB_CLASSIFICATION_HYPERPARAMETERS}")
return XGB_CLASSIFICATION_HYPERPARAMETERS
path = (
Path(config_file)
if config_file
else _CONFIGS_DIR / "default_hyperparameters_classification.json"
)
return _load_hyper_parameters_from_file(path)


def _load_hyper_parameters_from_file(config_file):
"""Load hyperparameters from a JSON file."""
with open(config_file) as f:
with config_file.open() as f:
hyperparameters = json.load(f)
_logger.info(f"Loaded hyperparameters from {config_file}: {hyperparameters}")
return hyperparameters
Expand Down
7 changes: 4 additions & 3 deletions tests/test_hyper_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,12 @@ def test_classification_hyper_parameters_has_expected_keys():
assert key in hp


def test_regression_hyper_parameters_returns_same_default_dict_object():
"""regression_hyper_parameters returns the module-level default dict (no defensive copy)."""
def test_regression_hyper_parameters_default_loads_bundled_json():
"""Default call loads the bundled JSON file and returns a fresh dict each time."""
r1 = regression_hyper_parameters()
r2 = regression_hyper_parameters()
assert r1 is r2
assert r1 == r2
assert r1 is not r2


# ---------------------------------------------------------------------------
Expand Down
Loading