|
| 1 | +{ |
| 2 | + "title": "EITT Boundary Condition and Discovery Roadmap (Concise JSON for Claude)", |
| 3 | + "version": "1.1", |
| 4 | + "author": "Copilot (for Claude coworker)", |
| 5 | + "purpose": "Make the temporal-autocorrelation boundary explicit, state the open mathematical problem clearly, and provide a concrete, reproducible discovery plan (empirical, simulation, analytic) suitable for inclusion in a presentation or handed to collaborators.", |
| 6 | + "clear_boundary_condition": { |
| 7 | + "plain_statement": "EITT (empirical near-invariance of Shannon entropy under geometric-mean temporal decimation) is observed only when the compositional time series exhibits nontrivial temporal persistence at the decimation scale. Without temporal persistence the effect disappears or is not reliably upward.", |
| 8 | + "operational_tests": [ |
| 9 | + { |
| 10 | + "name": "CLR lag-1 ACF test", |
| 11 | + "description": "Compute CLR transform of the composition, compute per-component lag-1 autocorrelations, and report the mean and distribution. Require mean(lag1_acf) > tau_1 (suggested tau_1 = 0.1) as a minimal persistence indicator." |
| 12 | + }, |
| 13 | + { |
| 14 | + "name": "Block variance reduction test", |
| 15 | + "description": "For block size M, compute nonoverlapping block means and their covariance Cov_emp. Compare to iid null by permuting time order or resampling; compute block_variance_ratio = Var_iid(block_means) / Var_emp(block_means). Require ratio > R (suggested R = 1.5) to indicate meaningful persistence." |
| 16 | + }, |
| 17 | + { |
| 18 | + "name": "Direction consistency test", |
| 19 | + "description": "Compute pct_change_H across many independent windows; require sign consistency (majority upward) and nonzero mean with bootstrap CI not crossing zero." |
| 20 | + } |
| 21 | + ], |
| 22 | + "recommended_thresholds": { |
| 23 | + "lag1_acf_mean": 0.10, |
| 24 | + "block_variance_ratio": 1.5, |
| 25 | + "pct_change_H_tolerance": 0.001 |
| 26 | + }, |
| 27 | + "diagnostic_output_fields": [ |
| 28 | + "clr_lag1_acf_mean", |
| 29 | + "block_variance_ratio", |
| 30 | + "observed_pct_change_H_mean", |
| 31 | + "observed_pct_change_H_CI", |
| 32 | + "eitt_flag (true/false)" |
| 33 | + ] |
| 34 | + }, |
| 35 | + "open_mathematical_problem": { |
| 36 | + "concise_statement": "Derive a closed-form expression or tight asymptotic bound for the expected entropy change delta_H under M-block geometric decimation for a stationary compositional stochastic process, expressed in terms of the process's autocovariance structure and the Frechet/CLR center.", |
| 37 | + "target_formula_template": "delta_H(M) = E[H(geom_mean_M(X))] - H(x_bar) = LeadingTerm(Sigma(h), M, x_star) + Remainder(Sigma(h), M), with LeadingTerm = (1/2) tr(|Hess_H(x_star)| * Cov(x_bar_M)) under mild regularity.", |
| 38 | + "desired_properties_of_solution": [ |
| 39 | + "Explicit dependence on lag-h covariances Sigma(h) or an effective autocorrelation scalar rho_eff.", |
| 40 | + "Asymptotic correctness as M -> infinity and small-Cov expansion validity conditions.", |
| 41 | + "A bound on the remainder term with clear assumptions (mixing rates, moment bounds).", |
| 42 | + "Practical computability from estimated covariances." |
| 43 | + ], |
| 44 | + "why_this_matters": "A closed form or tight bound would (a) explain the small magnitude of observed delta_H, (b) convert the qualitative boundary into quantitative thresholds, and (c) connect EITT to information-geometric or spectral process properties." |
| 45 | + }, |
| 46 | + "conjectures_and_hypotheses": { |
| 47 | + "conjecture_1": { |
| 48 | + "text": "Second-order Hessian approximation suffices: for stationary, weakly dependent compositional processes with bounded moments, the leading term of entropy change under geometric decimation is delta_H = (1/2) tr( |Hess_H(x_star)| * Cov(x_bar_M) ) + o(||Cov||).", |
| 49 | + "implication": "If true, one can predict delta_H from estimated Cov(x_bar_M) and x_star without running decimation." |
| 50 | + }, |
| 51 | + "conjecture_2": { |
| 52 | + "text": "There exists a threshold function f(M, rho_eff) such that if rho_eff >= f(M) then EITT holds within tolerance epsilon (|delta_H| <= epsilon).", |
| 53 | + "implication": "This would convert the qualitative boundary condition into a quantitative threshold for autocorrelation required at each M." |
| 54 | + }, |
| 55 | + "conjecture_3": { |
| 56 | + "text": "Information-geometric interpretation: delta_H can be expressed as a quadratic form in the tangent space at x_star under the Fisher-Rao metric (or an Aitchison-compatible metric), linking Hessian footprint to curvature of the simplex.", |
| 57 | + "implication": "A geometric derivation could produce tighter, interpretable bounds and connect to rate-distortion or optimal compression." |
| 58 | + } |
| 59 | + }, |
| 60 | + "discovery_direction_and_plan": { |
| 61 | + "overview": "Three parallel tracks: empirical mapping, controlled simulation, and analytic derivation. Each track produces artifacts that cross-validate the others.", |
| 62 | + "empirical_track": { |
| 63 | + "goal": "Map where EITT holds across real datasets and quantify predictive power of the Hessian approximation.", |
| 64 | + "steps": [ |
| 65 | + "For each dataset and M in {1,7,30,91,365}: compute CLR, estimate x_star, compute Cov(x_bar_M), compute Hessian prediction delta_H_hess = 0.5 * tr(|Hess_H(x_star)| * Cov(x_bar_M)), compute observed delta_H and bootstrap CI, compute diagnostics (lag1_acf_mean, block_variance_ratio).", |
| 66 | + "Produce a per-dataset table: {dataset, M, lag1_acf_mean, block_variance_ratio, observed delta_H (pct, CI), predicted delta_H_hess (pct), bootstrap percentiles, EITT_flag}.", |
| 67 | + "Flag datasets where Hessian prediction explains > 80% of observed delta_H." |
| 68 | + ] |
| 69 | + }, |
| 70 | + "simulation_track": { |
| 71 | + "goal": "Control process parameters to map delta_H as a function of autocorrelation and noise scale; validate analytic approximations.", |
| 72 | + "designs": [ |
| 73 | + { |
| 74 | + "name": "CLR-Gaussian VAR(1)", |
| 75 | + "spec": "Simulate Z_t in CLR space as VAR(1), vary spectral radius rho in [0,0.99], vary innovation covariance scale, map back to simplex via inverse CLR, compute delta_H(M)." |
| 76 | + }, |
| 77 | + { |
| 78 | + "name": "Dirichlet autoregressive", |
| 79 | + "spec": "Simulate compositional autoregressive processes to test non-Gaussian behavior." |
| 80 | + }, |
| 81 | + { |
| 82 | + "name": "IID null and permutation", |
| 83 | + "spec": "Resample marginals iid and confirm delta_H approximately 0 or no consistent upward drift." |
| 84 | + } |
| 85 | + ], |
| 86 | + "sweeps": { |
| 87 | + "rho_values": [0.0, 0.2, 0.4, 0.6, 0.8, 0.95], |
| 88 | + "M_values": [1, 7, 30, 91, 365], |
| 89 | + "noise_scales": ["small", "medium", "large"], |
| 90 | + "n_repeats": 200 |
| 91 | + } |
| 92 | + }, |
| 93 | + "analytic_track": { |
| 94 | + "goal": "Prove the leading term and bound the remainder under clear assumptions.", |
| 95 | + "first_steps": [ |
| 96 | + "Write Taylor expansion of H around x_star for geometric mean of M samples; express geometric mean in terms of log-space averages and propagate via multivariate Taylor expansion.", |
| 97 | + "Express Cov(x_bar_M) in terms of Sigma(h): Cov(x_bar_M) = (1/M^2) sum_{i,j=0}^{M-1} Sigma(i-j) and substitute into the Hessian term.", |
| 98 | + "Use mixing conditions or spectral density to bound remainder terms; derive conditions under which remainder = o(||Cov||)." |
| 99 | + ], |
| 100 | + "mathematical_tools": [ |
| 101 | + "Matrix calculus for Hessian and trace manipulations", |
| 102 | + "Lyapunov sums for linear VAR covariance aggregation", |
| 103 | + "Spectral density / Wiener-Khinchin theorem for stationary processes", |
| 104 | + "Cumulant/Edgeworth expansions for dependent data" |
| 105 | + ], |
| 106 | + "concrete_first_proofs_to_try": [ |
| 107 | + "Prove that for a CLR-Gaussian VAR(1) with spectral radius rho<1 and small innovation covariance, the leading term of E[H(geom_mean)] - H(x_bar) equals 0.5 tr(|Hess_H(x_star)| * Cov(x_bar_M)) + o(||Cov||).", |
| 108 | + "Show that Cov(x_bar_M) = (1/M^2) sum_{h=-(M-1)}^{M-1} (M-|h|) Sigma(h) and substitute into the Hessian formula to get an explicit dependence on Sigma(h)." |
| 109 | + ] |
| 110 | + } |
| 111 | + }, |
| 112 | + "presentation_snippets_for_slides": { |
| 113 | + "slide_1": { |
| 114 | + "title": "EITT - Statement and Boundary", |
| 115 | + "bullets": [ |
| 116 | + "Empirical near-invariance: Shannon entropy changes very little under geometric-mean decimation for autocorrelated compositional time series.", |
| 117 | + "Boundary condition: requires temporal persistence at the decimation scale (diagnostic thresholds provided)." |
| 118 | + ] |
| 119 | + }, |
| 120 | + "slide_2": { |
| 121 | + "title": "Mechanism (Hessian Footprint)", |
| 122 | + "bullets": [ |
| 123 | + "Geometric mean reduces variance; entropy is concave.", |
| 124 | + "Leading correction: delta_H = 0.5 * tr(|Hess_H(x_star)| * Cov(block_mean)).", |
| 125 | + "This explains systematic upward drift and its small magnitude." |
| 126 | + ] |
| 127 | + }, |
| 128 | + "slide_3": { |
| 129 | + "title": "Open Problem", |
| 130 | + "bullets": [ |
| 131 | + "Derive closed form or tight bound for delta_H(M) in terms of Sigma(h) and x_star.", |
| 132 | + "Desired: asymptotic correctness, computability from estimated covariances, and explicit remainder bounds." |
| 133 | + ] |
| 134 | + }, |
| 135 | + "slide_4": { |
| 136 | + "title": "Discovery Roadmap (Actionable)", |
| 137 | + "bullets": [ |
| 138 | + "Empirical sweep across datasets (produce master CSV).", |
| 139 | + "Simulation grid (rho, M, noise) and plots delta_H vs rho.", |
| 140 | + "Analytic derivation of leading term and remainder bound.", |
| 141 | + "Deliverables: master CSV, simulation plots, technical note." |
| 142 | + ] |
| 143 | + }, |
| 144 | + "slide_5": { |
| 145 | + "title": "Invitation", |
| 146 | + "bullets": [ |
| 147 | + "We provide data and scripts. Can you derive the closed form or tighten the bound?", |
| 148 | + "Contact: repository and scripts included in the package." |
| 149 | + ] |
| 150 | + } |
| 151 | + }, |
| 152 | + "expected_timeline_and_priorities": { |
| 153 | + "priority_order": [ |
| 154 | + "Empirical sweep (fast, high value) - 1-3 days", |
| 155 | + "Simulation grid (medium effort) - 3-7 days", |
| 156 | + "Analytic derivation (research effort) - open ended" |
| 157 | + ], |
| 158 | + "quick_wins": [ |
| 159 | + "Run empirical sweep and produce master CSV and a one-page summary.", |
| 160 | + "Run CLR VAR(1) simulation for a small rho grid and produce delta_H vs rho plots for M=365 and M=30." |
| 161 | + ] |
| 162 | + }, |
| 163 | + "governance": { |
| 164 | + "state": "CGS-2 (n=3)", |
| 165 | + "gdof": 264, |
| 166 | + "session": "S016" |
| 167 | + } |
| 168 | +} |
0 commit comments