Python Dynamic Maximum Entropy — a PyTorch implementation of Edwin (entropy-driven compression with interpretable nonlinear model discovery), named in honor of E. T. Jaynes.
Edwin simultaneously performs dimensionality reduction via the Dynamic Maximum Entropy (DME) principle and discovers sparse symbolic models governing latent dynamics and feature structure, using the SLIC sparse regression algorithm.
Edwin decomposes a non-negative data matrix X (shape n_features × n_timepoints)
into interpretable latent factors:
q = softmax_col( -Y @ z.T )
minimising the column-normalised KL divergence D_KL(X ‖ q). Here z ∈ ℝ^(T×K) are time-domain latents and Y ∈ ℝ^(N×K) are feature-domain latents, with K ≪ min(T, N).
Four model classes are provided:
| Class | Description |
|---|---|
Edwin |
Core DME compression: learn z and Y |
EdwinWithDynamics |
Adds sparse dynamics DW @ z ≈ W @ Θ(z) @ ξz |
EdwinWithFeatureModel |
Also constrains Y ≈ θy @ ξy via a feature library |
EdwinWithStableDynamics |
Enforces Lyapunov stability: ż = -M∇V(z) |
Sparse models (ξz, ξy) are discovered automatically via EnAdSR — an ensemble sparse regression method with SLIC / AICc / BIC model selection.
Backward-compatible aliases TMI, TMIWithDynamics, TMIWithFeatureModel
are exported for existing code.
pip install -e ".[dev]"Requires Python ≥ 3.10, PyTorch ≥ 2.0, NumPy ≥ 1.24.
Optional: SciPy (for scipy-based ODE integration in forecast),
scikit-learn (for KDE in traj_to_dist).
import numpy as np
from pyedwin import Edwin
X = np.random.exponential(1.0, (50, 100)) # (n_features, n_timepoints)
model = Edwin(K=5, normalize=True, lr=1e-3, max_iters=10_000)
model.fit(X)
z = model.z_ # (n_timepoints, K) — temporal latents
Y = model.Y_ # (n_features, K) — feature-domain latents
q = model.q_ # (n_features, n_timepoints) — reconstruction
kld = model.score(X) # mean per-column KL divergence (nats)from pyedwin import EdwinWithDynamics
from pyedwin.galerkin import build_weight_matrix, build_poly_degree_matrix
import numpy as np
t = np.linspace(0.2, 3.0, 100)
W = build_weight_matrix(t, window=11, d=0)
DW = build_weight_matrix(t, window=11, d=1)
N = build_poly_degree_matrix(K=2, order=3, include_constant=False)
model = EdwinWithDynamics(
K=2, W=W, DW=DW, N=N,
lambda_z=10.0,
sparse_iter=5_000,
lr=1e-3, max_iters=50_000,
)
model.fit(X)
xi_z = model.xi_z_ # (L, K) sparse dynamics coefficientsfrom pyedwin import EdwinWithFeatureModel
x = np.linspace(-6, 6, 50)
theta_y = np.column_stack([x, x**2, x**3]) # (n_features, P)
model = EdwinWithFeatureModel(
K=2, W=W, DW=DW, N=N, theta_y=theta_y,
lambda_z=10.0, lambda_y=10.0,
lr=1e-3, max_iters=50_000,
)
model.fit(X)
xi_y = model.xi_y_ # (P, K) sparse feature coefficientsfrom pyedwin import print_equations, equations_str
# Unicode (default) — readable in terminals and Jupyter
print_equations(model)
# ─────────────────────────────────────────────
# Discovered equations (EdwinWithDynamics)
# ─────────────────────────────────────────────
# dz₀/dt = -0.4982 z₀²
# LaTeX strings (no $ delimiters)
for eq in equations_str(model, fmt="latex"):
print(eq)
# \dot{z}_{0} = -0.4982 z_{0}^{2}
# Plain ASCII
equations_str(model, fmt="plain")
# ['dz0/dt = -0.4982 z0**2']
# Attach human-readable names for the feature library columns
model.theta_y_names_ = ["x", "x²", "x³"]
print_equations(model) # Y₀ = 0.51 x²from pyedwin import forecast, adapt_and_forecast
import numpy as np
t_fc = np.linspace(1.0, 5.0, 100)
z0 = model.z_[0].cpu()
# Integrate dz/dt = Θ(z) @ ξz forward from z0
z_forecast = forecast(model, z0, t_fc, method="rk4") # (100, K)
z_fc, q_fc = forecast(model, z0, t_fc, return_data=True) # also get data recon
# Adapt ξz from a short observed prefix, then forecast
X_prefix = X[:, :30]
z_adapt, q_adapt = adapt_and_forecast(
model, X_prefix, t_fc,
prefix_iters=300, lr=5e-3, return_data=True,
)from pyedwin import traj_to_dist, traj_to_dist_grid
import numpy as np
z_np = model.z_.cpu().numpy().squeeze() # (T,)
x_grid = np.linspace(-6, 6, 60)
# KDE: produces a normalised distribution at each time point
dist = traj_to_dist(z_np, x_grid, bandwidth="scott") # (60, T)
# Histogram: faster, no extra dependencies
dist_hist, bin_centres = traj_to_dist_grid(z_np, bins=40) # (40, T)from pyedwin import EdwinWithStableDynamics
model_stable = EdwinWithStableDynamics(
K=2, W=W, DW=DW, N=N,
lambda_z=10.0, lambda_V0=1.0,
lr=1e-3, max_iters=50_000,
)
model_stable.fit(X)
# V(z) = Θ(z) @ ξV is a Lyapunov function; V̇ ≤ 0 by construction
print_equations(model_stable, lyapunov=True)
V_coeffs = model_stable.xi_V_ # (L, 1)
M = model_stable.M_ # (K, K) PSD matrixX3 = np.random.exponential(1.0, (50, 100, 20)) # 20 trials
model = Edwin(K=3).fit(X3)
# model.z_.shape == (n_timepoints, K, n_trials)
# model.Y_.shape == (n_features, K)from pyedwin.models import scan_hyperparams, score_models
results = scan_hyperparams(
X, EdwinWithFeatureModel,
param_grid={"lambda_z": [1., 10., 100.], "lambda_y": [1., 10.]},
K=3, W=W, DW=DW, N=N, theta_y=theta_y,
max_iters=20_000, verbose=False,
)
scores = score_models(results, N=N, W=W, DW=DW, theta_y=theta_y)
best = results[scores.index(min(scores))]All model classes accept GPU tensors natively:
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
X_gpu = torch.tensor(X, dtype=torch.float64, device=device)
model = EdwinWithDynamics(K=2, W=W, DW=DW, N=N, lambda_z=10.0)
model.fit(X_gpu)
# model.z_, model.Y_, model.xi_z_ are all on the same devicefrom pyedwin import AdSR, EnAdSR
import torch
theta = torch.randn(200, 10)
y = torch.randn(200, 3)
reg = AdSR(ic="slic", max_iter=10).fit(theta, y)
ereg = EnAdSR(ic="slic", num_batches=20, tol=0.7).fit(theta, y)
print(ereg.coef_) # (10, 3) sparse
print(ereg.inclusion_probs_) # (10, 3) fraction of ensemble runs each term was active# Run all tests (unit + integration)
pytest
# Unit tests only (fast, ~2 min)
pytest pyedwin/tests/ -v
# Integration tests — full physics recovery checks (~6 min on CPU)
pytest test_integration.py -v
# GPU tests (automatically skipped if CUDA unavailable)
pytest pyedwin/tests/test_gpu.py -v
# Force CPU-only device propagation tests
PYEDWIN_TEST_DEVICE=cpu pytest pyedwin/tests/test_gpu.py -vOn an HPC cluster with Slurm, use the scripts in cluster/ — see
cluster/README_cluster.md for details.
- Shared training loop: all Edwin variants share a single
_fit_loopin the base class; subclasses customise behaviour via_init_state,_extra_loss, and_post_stephooks. - Galerkin projection: rather than differentiating z directly,
build_weight_matrixcomputes integration-by-parts weight matrices that avoid numerical differentiation, improving noise robustness. - Sparse regression via EnAdSR: ξz and ξy are updated by ensemble sparse regression (not autograd), providing automatic model-order selection via SLIC.
- scikit-learn API: hyperparameters in
__init__,fit()returnsself, fitted attributes end in_. - GPU-native: all tensor operations respect the device of the input
data; no explicit
.to(device)calls are needed inside model code.