From 95a9ec158c58f2b915eb832c7ca9a0c6d1e9bea2 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 21 Apr 2026 23:15:50 +0000 Subject: [PATCH 1/3] v3.46-trained: add train_v346.py and AMS_TRAINED_WEIGHTS loader hook Per SPRINT_CLOSEOUT_v3.46.md \u00a75.3/\u00a75.4 and \u00a75 loader note. train_v346.py - Copies the v344 driver template, points to scheme_b_v344 (= v3.46 SUT). - Asserts v3.46 Cfg invariants (use_top1_exclusive_content_bias=False, tail_slot_residual_dominant=False). - Requires CUDA by default; AMS_ALLOW_CPU_TRAIN=1 to override. - Logs pre/post "mechanism-level observable" probes per \u00a75.6: tail_head.slot_heads[1][0].weight.abs().mean and vocab_proj.proj[-1].weight.abs().mean. - Saves non-backbone state_dict + non-backbone buffers to ckpt/v346_trained.pt with provenance + Cfg snapshot. scheme_b_v344.MemLLM._maybe_load_trained_weights - New hook called at end of load(); opt-in via AMS_TRAINED_WEIGHTS env. - Loads non-backbone tensors into matching params/buffers; backbone excluded. - Strict shape check: raises on mismatch (protects against loading the v344/v348 ckpts per \u00a76 warning about shape incompatibility). Co-authored-by: FluffyAIcode --- scheme_b_v344.py | 54 ++++++++++++++++ train_v346.py | 157 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 211 insertions(+) create mode 100644 train_v346.py diff --git a/scheme_b_v344.py b/scheme_b_v344.py index 8e54cf1..a45b11a 100644 --- a/scheme_b_v344.py +++ b/scheme_b_v344.py @@ -2494,8 +2494,62 @@ def _capture_query_ids(module, args): self.backbone.register_forward_pre_hook(_capture_query_ids) self._build_wte_neighbor_cache() self._compute_filler_centroid() + self._maybe_load_trained_weights() return self + def _maybe_load_trained_weights(self): + """Optional hook: if env AMS_TRAINED_WEIGHTS points to a checkpoint written by + train_v346.py (or any sibling trainer), load non-backbone params/buffers with + strict=False. Backbone is intentionally excluded — trainer only saves trainables + + non-backbone buffers (see train_v346.py §5.3). Missing/unexpected keys are + logged but not fatal, so a partial-shape ckpt fails loud only on shape mismatch. + """ + path = os.environ.get("AMS_TRAINED_WEIGHTS", "").strip() + if not path: return + if not os.path.exists(path): + print(f" [AMS_TRAINED_WEIGHTS] file not found: {path} — skipping") + return + try: + blob = torch.load(path, map_location="cpu", weights_only=False) + except Exception as e: + print(f" [AMS_TRAINED_WEIGHTS] torch.load failed: {type(e).__name__}: {e}") + return + sd = blob.get("state_dict", blob) if isinstance(blob, dict) else blob + if not isinstance(sd, dict): + print(f" [AMS_TRAINED_WEIGHTS] unexpected format (no 'state_dict' mapping) — skipping") + return + dev = next(self.parameters()).device + own_params = dict(self.named_parameters()) + own_buffers = dict(self.named_buffers()) + loaded, skipped = 0, 0 + shape_errs = [] + with torch.no_grad(): + for n, t in sd.items(): + if n.startswith("backbone"): skipped += 1; continue + if n in own_params: + p = own_params[n] + if p.shape != t.shape: + shape_errs.append((n, tuple(p.shape), tuple(t.shape))); continue + p.data.copy_(t.to(dev, dtype=p.dtype)) + loaded += 1 + elif n in own_buffers: + b = own_buffers[n] + if b.shape != t.shape: + shape_errs.append((n, tuple(b.shape), tuple(t.shape))); continue + b.data.copy_(t.to(dev, dtype=b.dtype)) + loaded += 1 + else: + skipped += 1 + prov = blob.get("provenance", "?") if isinstance(blob, dict) else "?" + print(f" [AMS_TRAINED_WEIGHTS] loaded={loaded} skipped={skipped} " + f"shape_errs={len(shape_errs)} path={path} provenance={prov}") + if shape_errs: + for n, s_model, s_ckpt in shape_errs[:5]: + print(f" ! shape mismatch {n}: model={s_model} ckpt={s_ckpt}") + raise RuntimeError( + f"AMS_TRAINED_WEIGHTS shape mismatch on {len(shape_errs)} tensor(s); " + f"ckpt not compatible with current SUT shapes") + def _compute_filler_centroid(self): if self.content_classifier is None or self.backbone is None: self._filler_centroid = None; return diff --git a/train_v346.py b/train_v346.py new file mode 100644 index 0000000..2b0c545 --- /dev/null +++ b/train_v346.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +"""Training driver for v3.46-trained. + +Starts from v346-revertE-topk-nonexclusive-7e97 SUT (attention-pool ctx encoder, +cluster-crowding retrieval, refresh-on-write, additive tail residual, +top1-exclusive OFF, cond-buffer mirror). Runs N Trainer.step iterations +over a rotating corpus; saves non-backbone state_dict to ckpt/v346_trained.pt. + +Per SPRINT_CLOSEOUT_v3.46.md §5.3 / §5.4. +""" +import argparse, os, time, json, math, sys +import torch +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +import scheme_b_v344 as sb + +MUSIC = [ + "He practiced piano for hours perfecting a difficult Chopin nocturne.", + "She studied music theory and harmonic progression at the conservatory.", + "The orchestra performed Beethoven symphony with remarkable precision.", +] +SPACE = [ + "The telescope revealed distant galaxies beyond the Milky Way.", + "Astronauts trained for the Mars mission in simulated zero gravity.", + "The nebula emitted radiation across the electromagnetic spectrum.", +] +GENERIC = [ + "The pianist practiced arpeggios and Chopin nocturnes until midnight.", + "A musician refined finger technique, phrasing, and pedal control.", + "Classical interpretation often depends on dynamics, tempo rubato, and touch.", + "A conservatory student studied etudes, scales, and expressive keyboard skills.", + "Distant astronomers observed galaxies quasars and stellar evolution.", + "Space orbital mechanics explains satellites and planetary motion.", +] +ALL = MUSIC + SPACE + GENERIC + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--steps", type=int, default=60) + ap.add_argument("--batch", type=int, default=3) + ap.add_argument("--out", type=str, default="ckpt/v346_trained.pt") + ap.add_argument("--seed", type=int, default=42) + ap.add_argument("--log", type=str, default="ckpt/v346_train_log.jsonl") + args = ap.parse_args() + + os.makedirs(os.path.dirname(args.out) or ".", exist_ok=True) + log_dir = os.path.dirname(args.log) or "." + os.makedirs(log_dir, exist_ok=True) + torch.manual_seed(args.seed) + + c = sb.Cfg() + # Sanity: confirm v3.46 Cfg (same assert as §8 step 3, catches env corruption) + assert c.use_top1_exclusive_content_bias is False, \ + "Cfg.use_top1_exclusive_content_bias must be False on v3.46" + assert c.tail_slot_residual_dominant is False, \ + "Cfg.tail_slot_residual_dominant must be False on v3.46 (revert [B])" + + m = sb.MemLLM(c) + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + if device.type != "cuda": + if os.environ.get("AMS_ALLOW_CPU_TRAIN", "0") != "1": + raise AssertionError( + "train_v346 expects CUDA; CPU fallback is ~10x slower and not the intent. " + "Set AMS_ALLOW_CPU_TRAIN=1 to override explicitly.") + print("[build] WARNING: running on CPU (AMS_ALLOW_CPU_TRAIN=1)") + m.to(device); m.load(); m.to(device) + trainable = sum(p.numel() for p in m.parameters() if p.requires_grad) + total = sum(p.numel() for p in m.parameters()) + print(f"[build] device={device} params total={total:,} trainable={trainable:,}") + + for t in ALL: + m.write(t, training_mode=True) + try: + m.amm.maybe_recluster(force=True) + except Exception as e: + print(f"[build] amm.maybe_recluster skipped: {type(e).__name__}: {e}") + m._refresh_rare_keyword_indices() + m.eval() + print(f"[build] initial memory count = {len(m.amm.tree.store)}") + + # Pre-training mechanism snapshot (per §5.6): tail_head[1] + vocab_proj last weights + def _probe_weights(model): + out = {} + try: + w = model.bridge.tail_head.slot_heads[1][0].weight + out["tail_head_slot1_abs_mean"] = float(w.detach().abs().mean()) + except Exception as e: + out["tail_head_slot1_abs_mean"] = f"ERR {type(e).__name__}" + try: + w = model.vocab_proj.proj[-1].weight + out["vocab_proj_last_abs_mean"] = float(w.detach().abs().mean()) + except Exception as e: + out["vocab_proj_last_abs_mean"] = f"ERR {type(e).__name__}" + return out + pre_probe = _probe_weights(m) + print(f"[probe pre-train] {pre_probe}") + + trainer = sb.Trainer(m, c) + print(f"[train] Trainer built batch={args.batch} steps={args.steps}") + + t_start = time.time() + with open(args.log, "w") as flog: + for step in range(args.steps): + start = (step * args.batch) % len(ALL) + batch = [ALL[(start + i) % len(ALL)] for i in range(args.batch)] + t0 = time.time() + try: + stats = trainer.step(batch) + except Exception as e: + print(f"[step {step}] EXCEPTION: {type(e).__name__}: {e}") + raise + dt = time.time() - t0 + tot = stats.get("total") + print( + f"step {step:3d} total={tot:.4f} " + f"recon={stats.get('recon', 0):.3f} " + f"sa={stats.get('semantic_alignment', 0):.3f} " + f"tsa={stats.get('tail_semantic_anchor', 0):.3f} " + f"va={stats.get('vocab_anchor', 0):.3f} " + f"fs={stats.get('functional_suppression', 0):.3f} " + f"cs={stats.get('context_separation', 0):.3f} " + f"dt={dt:.1f}s" + ) + rec = {"step": step, "dt_s": dt, + **{k: v for k, v in stats.items() + if k not in ("grad_norms", "loss_weights")}} + flog.write(json.dumps(rec, ensure_ascii=False) + "\n") + flog.flush() + elapsed = time.time() - t_start + post_probe = _probe_weights(m) + print(f"[probe post-train] {post_probe}") + print(f"[train] elapsed {elapsed:.1f}s avg/step={elapsed/max(1,args.steps):.2f}s") + + sd = {n: p.detach().cpu() for n, p in m.named_parameters() if "backbone" not in n} + for n, b in m.named_buffers(): + if "backbone" not in n: + sd[n] = b.detach().cpu() + torch.save({ + "state_dict": sd, + "cfg_snapshot": {k: getattr(c, k) for k in ( + "L_mem", "d_ctx", "d_M", "d_F", "cfg_scale", + "use_top1_exclusive_content_bias", + "tail_slot_residual_dominant", + "use_inter_domain_margin", + "context_encoder_use_attention_pool", + )}, + "provenance": "AgentMemory/v346-revertE-topk-nonexclusive-7e97", + "steps": args.steps, + "elapsed_s": elapsed, + "pre_probe": pre_probe, + "post_probe": post_probe, + }, args.out) + print(f"[save] wrote {args.out} tensors={len(sd)}") + + +if __name__ == "__main__": + main() From d8d1a858851a37ecd5177ee46271fd5532160996 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Tue, 21 Apr 2026 23:57:39 +0000 Subject: [PATCH 2/3] v3.46-trained: AMS_TRAINED_WEIGHTS shape-mismatch is warn+skip, not fatal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: 4.25 prefix_length_scaling_probe intentionally builds model_b with L_mem doubled (default 8 -> 16). The checkpoint was trained with L_mem=8, so L_mem-dependent tensors (e.g. mem_tokens[L_mem, d_LLM]) legitimately don't fit model_b — this is not a corrupt/incompatible ckpt, it's a deliberate Cfg scan. Old behavior: raise RuntimeError on any shape mismatch -> errored 4.25. New behavior: - Per-tensor shape mismatch is logged and skipped (first 5 detailed, rest summarized). - Hard failure only when the ckpt had non-backbone content (>10 tensors) AND zero tensors loaded — that is the §6 'wrong-SUT ckpt' pattern we must catch. Keeps the §6 protection against loading v344_trained.pt / v348_stacked.pt against a v3.46 SUT (they would mostly shape-mismatch and hit the loaded==0 guard), while letting L_mem-scaling probes proceed. Co-authored-by: FluffyAIcode --- scheme_b_v344.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/scheme_b_v344.py b/scheme_b_v344.py index a45b11a..c67754d 100644 --- a/scheme_b_v344.py +++ b/scheme_b_v344.py @@ -2541,14 +2541,23 @@ def _maybe_load_trained_weights(self): else: skipped += 1 prov = blob.get("provenance", "?") if isinstance(blob, dict) else "?" + total_nonbb_ckpt = sum(1 for k in sd if not k.startswith("backbone")) print(f" [AMS_TRAINED_WEIGHTS] loaded={loaded} skipped={skipped} " f"shape_errs={len(shape_errs)} path={path} provenance={prov}") if shape_errs: for n, s_model, s_ckpt in shape_errs[:5]: - print(f" ! shape mismatch {n}: model={s_model} ckpt={s_ckpt}") + print(f" ! shape mismatch (skipped) {n}: model={s_model} ckpt={s_ckpt}") + if len(shape_errs) > 5: + print(f" ... and {len(shape_errs) - 5} more shape mismatches, all skipped") + # Raise only if essentially nothing loaded AND the ckpt had content to offer: + # this catches the "loaded a v344/v348 ckpt against v3.46 shapes" mistake + # warned about in SPRINT_CLOSEOUT_v3.46.md \u00a76, without breaking probes + # like 4.25 that scale L_mem and legitimately have a few mismatching tensors. + if loaded == 0 and total_nonbb_ckpt > 10: raise RuntimeError( - f"AMS_TRAINED_WEIGHTS shape mismatch on {len(shape_errs)} tensor(s); " - f"ckpt not compatible with current SUT shapes") + f"AMS_TRAINED_WEIGHTS loaded 0 non-backbone tensors " + f"(ckpt had {total_nonbb_ckpt}); shape_errs={len(shape_errs)}. " + f"ckpt appears incompatible with current SUT shapes") def _compute_filler_centroid(self): if self.content_classifier is None or self.backbone is None: From 19a4ec4544ebda193d148371fd0350d0603bc263 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Wed, 22 Apr 2026 00:27:30 +0000 Subject: [PATCH 3/3] v3.46-trained audit: 18/26 (-3 vs fresh 21/26), report + SPRINT update Child PR of #27. Training driver train_v346.py run for 60 steps on NVIDIA H200 (vast.ai), elapsed 335 s, mechanism observables per \u00a75.6 moved into target range (tail_head slot1 |w|_mean: 0 -> 7.30e-4; vocab_proj |w|_mean: 0 -> 5.49e-4, both in [1e-4, 1e-2]). Necessary conditions met; sufficient: not. Audit with AMS_TRAINED_WEIGHTS=ckpt/v346_trained.pt, AMS_DETERMINISTIC=1, elapsed 1250 s. Results (as data, per SPEC \u00a77.7 norm, no Delta-pass-count was predicted): PASS 18, FAIL 8 (was 21, 5). Zero cases flipped FAIL -> PASS. Three cases flipped PASS -> FAIL: 4.17 retrieval_prefix_decode_correlation_audit (prefix_l2_shift = 3.22e+11, correlation undefined -- trained prefix magnitude blew up) 4.20 rerank_stability_probe (space_P2 jaccard 0.429 < 0.6) 4.25 prefix_length_scaling_probe (L_mem 8->16 reduces starter mass to 0.82x, probe requires >1.10x) Regressions 4.8/4.21 also got worse: 'The pianist' unique_ratio 0.343 -> 0.296, avg_max_repeat 4.67 -> 5.0. Axis C: 8/11 -> 6/11. Axis D: 2/3 -> 1/3. Structural read (\u00a71.5): 60 steps on 12-text corpus with semantic_alignment weight 3.0 and no prefix-norm constraint caused the ctx encoder to saturate prefix magnitude while tail/vocab paths gained just enough weight to reinforce the corpus's own repetition pattern. This is \u00a75.7 option-A territory (pre-amplification gap) confirmed with data rather than predicted. Artifacts committed: reports/v346_trained_blackbox/report.{json,md} reports/v346_trained_blackbox/stdout.log reports/v346_trained_blackbox/train_log.jsonl reports/v346_trained_blackbox/train_stdout.log No Cfg changes (\u00a75.4), no Trainer loss additions (\u00a75.4). ckpt/v346_trained.pt is git-ignored per existing ckpt/*.pt rule; provenance recorded in the torch.save blob and in report metadata. Co-authored-by: FluffyAIcode --- SPRINT_CLOSEOUT_v3.46.md | 73 +- reports/v346_trained_blackbox/report.json | 5479 +++++++++++++++++ reports/v346_trained_blackbox/report.md | 3914 ++++++++++++ reports/v346_trained_blackbox/stdout.log | 291 + reports/v346_trained_blackbox/train_log.jsonl | 60 + .../v346_trained_blackbox/train_stdout.log | 72 + 6 files changed, 9882 insertions(+), 7 deletions(-) create mode 100644 reports/v346_trained_blackbox/report.json create mode 100644 reports/v346_trained_blackbox/report.md create mode 100644 reports/v346_trained_blackbox/stdout.log create mode 100644 reports/v346_trained_blackbox/train_log.jsonl create mode 100644 reports/v346_trained_blackbox/train_stdout.log diff --git a/SPRINT_CLOSEOUT_v3.46.md b/SPRINT_CLOSEOUT_v3.46.md index 2bfc2b7..7c33c80 100644 --- a/SPRINT_CLOSEOUT_v3.46.md +++ b/SPRINT_CLOSEOUT_v3.46.md @@ -1,9 +1,10 @@ -# Sprint Close-Out · v3.46 · fresh-init ceiling reached, training path blocked on GPU +# Sprint Close-Out · v3.46 · trained audit complete — 60-step training lowers score by 3 **Handoff from**: CPU-only cloud agent on VM without GPU -**Handoff to**: Cloud agent with GPU-enabled instance type -**Current branch**: `AgentMemory/v346-revertE-topk-nonexclusive-7e97` -**Current audit score**: **21/26** (elapsed 1456 s on CPU, fresh init, `AMS_DETERMINISTIC=1`) +**Handoff to (closed)**: GPU-enabled cloud agent via SSH to vast.ai (NVIDIA H200, cu128, torch 2.11.0) +**Current branch**: `AgentMemory/v346-trained-gpu-7e97` (child of `AgentMemory/v346-revertE-topk-nonexclusive-7e97`, PR #28) +**Trained audit score**: **18/26** (elapsed 1250 s on H200, `AMS_TRAINED_WEIGHTS=ckpt/v346_trained.pt`, `AMS_DETERMINISTIC=1`) +**Fresh-init baseline (for delta)**: 21/26 (unchanged, re-listed in §1.2) **Runner contract**: `v331_blackbox_eval.py` at v3.49 rev (4.24 substitution ban active) > This document is the full context for a new agent to pick up. Read this first, then read `V331_BLACKBOX_TEST_SPEC.md`, then the latest two SUT versions (`scheme_b_v344.py`, `scheme_b_v343.py` for comparison). Do not re-audit older versions — their numbers are in `reports/`. @@ -55,10 +56,69 @@ Axis A is structurally capped by per-memory `semantic_emb (d_LLM=1536 floats)` d --- +### 1.4 v3.46-trained audit table (60 training steps on H200, PR #28, reports/v346_trained_blackbox/) + +Training run: `python3 train_v346.py --steps 60` — 335 s wall on H200 (≈5.6 s/step), single-GPU, bf16 backbone, 113.8 M trainable non-backbone params, 11 memories stored pre-training. `Cfg` unchanged vs §1.1. Checkpoint: `ckpt/v346_trained.pt`, 455 MB, 202 non-backbone tensors, provenance `AgentMemory/v346-revertE-topk-nonexclusive-7e97`. + +**§5.6 mechanism observables (as data, per SPEC §7.7 norm)**: + +| Observable | Pre-train | Post-train | §5.6 target range | In range? | +|---|---|---|---|---| +| `bridge.tail_head.slot_heads[1][0].weight.abs().mean()` | `0.0` | `7.30e-4` | `[1e-4, 1e-2]` | yes | +| `vocab_proj.proj[-1].weight.abs().mean()` | `0.0` | `5.49e-4` | `[1e-4, 1e-2]` | yes | + +Both necessary conditions named in §5.6 are met. §5.6 explicitly stated this does not guarantee the audit flips — audit data below is the test. + +**PASS (18)**: 4.1, 4.2, 4.3, 4.4, 4.5, 4.6, 4.10, 4.12, 4.13, 4.14, 4.15, 4.16, 4.18, 4.22, 4.23, 4.24, 4.26, 4.9. + +**FAIL (8)**: + +| Case | Metric | Observed (trained) | Observed (fresh) | Threshold | Delta | +|---|---|---|---|---|---| +| 4.7 semantic_memory_counterfactual_pairs | `music_margin > 0 AND space_margin > 0` | 0, 0 | 0, 0 | > 0 | unchanged (still axis-C fail) | +| 4.8 degeneration_quality | `avg_unique_token_ratio ≥ 0.35` | 0.296 on "The pianist" (worse than fresh 0.343) | 0.343 | ≥ 0.35 | **regressed** (trained more repetitive) | +| 4.11 retrieval_topk_semantic_shift | any keyword in top-12 | 0 hits | 0 hits | ≥ 1 | unchanged | +| 4.17 retrieval_prefix_decode_correlation_audit | `retrieval_strength__prefix_l2` finite + sign-correct | `null` (prefix_l2_shift=3.22e+11 → variance blew up) | passed | finite | **regression**: trained prefix has extreme L2 shift, correlation undefined | +| 4.19 stepwise_label_mass_alignment_audit | staged alignment ≥ 0 | mis-aligned (decode picks " Options", `stage_counts.decode=2 < inject=6`) | mis-aligned | aligned | unchanged (cascade of 4.11) | +| 4.20 rerank_stability_probe | both pairs jaccard ≥ 0.6 | `space_P2` jaccard=0.429 (spearman 0.961) | passed | both ≥ 0.6 | **regression**: training perturbed retrieval clustering on one prompt pair | +| 4.21 decode_repetition_feedback_probe | `avg_max_repeat ≤ 3.0` | 5.0 (worse than fresh 4.67) | 4.67 | ≤ 3 | regressed | +| 4.25 prefix_length_scaling_probe | `avg_mass_ratio_B_over_A > 1.10` | 0.824 (< 1.0, doubling L_mem *reduces* starter mass) | passed | > 1.10 | **regression**: trained slot weights do not scale positively with L_mem | + +**Net: +0 gains, −3 regressions (4.17/4.20/4.25), score 21 → 18.** + +Axis coverage (v3.49 runner): + +| Axis | v3.46 fresh | v3.46 trained | +|---|---|---| +| A compression | FAIL (8.97 / 10.0) | FAIL (8.97 / 10.0) — structural, unchanged | +| B injection cost | PASS | PASS | +| C fidelity | FAIL (8/11) | FAIL (6/11) | +| D stability | FAIL (2/3) | FAIL (1/3) | + +### 1.5 Why 60-step training did not help — structural read + +The §5.6 observables moved into range, confirming the zero-init dead paths `tail_head.slot_heads[1]` and `vocab_proj.proj[-1]` did start receiving gradient. But none of the five pre-training FAILs flipped (4.7/4.8/4.11/4.19/4.21), and three previously-passing cases flipped FAIL: + +- **4.17**: `prefix_l2_shift = 3.22e+11`. The trained prefix magnitude is ~6 orders of magnitude larger than the baseline hidden-state norm. Something in the training loss (most likely `semantic_alignment` at weight 3.0 against an unconstrained prefix magnitude) drove the injected prefix to saturate — this is consistent with `sa = 9.9 → 9.0` barely moving across 60 steps while producing a prefix with huge norm. The audit's correlation computation drops to `null` when inputs are non-finite or near-constant. +- **4.20**: `space_P2` pair jaccard dropped from ≥0.6 (fresh) to 0.429 (trained). Both prompts still rank `mid=5` first, but the tail of top-5 diverges between paraphrases — the trained retrieval clusters are sharper but more brittle to paraphrase. +- **4.25**: doubling `L_mem` 8→16 decreased starter-positive mass ratio to 0.82 (< 1.10). The trained slots behave anti-correlated with `L_mem`: more slots = more dilution of the starter-direction signal. This is the inverse of what the probe requires. +- **4.21**: `avg_max_repeat_per_content_token` went from 4.67 → 5.0. Training reinforced the corpus-local repetition pattern, making the 4.21 FAIL slightly worse. +- **4.8**: "The pianist" unique-token ratio fell from 0.343 → 0.296. Same class as 4.21. + +The shared pattern: `sa` (3.0× weight, reconstruction-anchored to the Qwen embedding space) trained the ctx encoder to push prefix magnitude up without a counterbalancing norm constraint, and the tail/vocab paths gained small weights that reinforce the retrieved memories' own repetitive phrasing rather than distributed vocabulary. 60 steps on a 12-text corpus is too small and too narrow for the Qwen latent-space geometry to develop a dilution signal; it's exactly long enough to overfit the corpus's own repetition. This is the §5.7 **option A** territory (pre-amplification gap under current bridge depth/width + loss family), now confirmed with data rather than predicted. + +Two things this sprint **does not** recommend based on this data: + +1. Trivially training longer (100–300 steps) on the same 12-text corpus. With no norm regularizer on the prefix and `sa` weight at 3.0, longer training will push `prefix_l2_shift` further up and regress 4.17 more. +2. Adding a prefix-norm regularizer or a decode-time `vocab_bias` amplifier. Both would be threshold-chasing under §3.3 anti-pattern (1) / §5.7 option B without a SPEC amendment. + +--- + ## 2. What changed during this sprint (audit-level, most recent first) | Version | Branch | Audit | Delta | Core change | |---|---|---|---|---| +| v3.46-trained | `AgentMemory/v346-trained-gpu-7e97` | 18/26 | **−3** | 60-step train on H200 (train_v346.py §5.3); AMS_TRAINED_WEIGHTS loader added | | v3.46 | `AgentMemory/v346-revertE-topk-nonexclusive-7e97` | 21/26 | 0 | Revert [E] (one-line Cfg) | | v3.45-cond-buffer | `AgentMemory/v345-bridge-cond-buffer-7e97` | 21/26 | +1 | Add `_last_cond_*` mirror on `EmbBridge`; runner reads cond-preferred buffer for 4.23 | | v3.45-revertB-refreshD | `AgentMemory/v345-revertB-refreshD-7e97` | 20/26 | +2 | Revert [B] LN-dominated tail; add `_refresh_rare_keyword_indices()` in `write()` | @@ -331,9 +391,8 @@ Existing checkpoints `ckpt/v344_trained.pt` and `ckpt/v348_stacked.pt` were trai | #24 | v344-rewrite-abcdef-audit | draft | v3.44 six-mechanism rewrite + 18/26 audit | | #25 | v345-revertB-refreshD | draft | Revert [B], refresh timing, 20/26 audit | | #26 | v345-bridge-cond-buffer | draft | cond-buffer aliasing fix, 21/26 audit | -| #27 | v346-revertE-topk-nonexclusive | draft | **Current head.** Revert [E], 21/26 audit | - -New agent should create a child branch off #27's branch (or merge #27 first per user call) before starting `train_v346.py`. Suggested branch name: `AgentMemory/v346-trained-{suffix}-7e97`. +| #27 | v346-revertE-topk-nonexclusive | draft | Revert [E], 21/26 fresh-init audit (base for #28) | +| #28 | v346-trained-gpu-7e97 | draft | **Current head.** train_v346.py + AMS_TRAINED_WEIGHTS loader + **18/26 trained audit** | --- diff --git a/reports/v346_trained_blackbox/report.json b/reports/v346_trained_blackbox/report.json new file mode 100644 index 0000000..b3bf0e5 --- /dev/null +++ b/reports/v346_trained_blackbox/report.json @@ -0,0 +1,5479 @@ +{ + "generated_at_epoch": 1776817128.9913244, + "elapsed_seconds": 1250.0707318782806, + "checks": [ + { + "name": "leaf_capacity_stability", + "passed": true, + "detail": "{\"per_seed\": [{\"seed\": 0, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 1, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 2, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 3, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 4, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 5, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 6, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 7, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}]}" + }, + { + "name": "degenerate_direction_boundary", + "passed": true, + "detail": "{\"depth\": 47, \"count\": 100, \"violations\": [], \"consistency\": [], \"seed\": 17}" + }, + { + "name": "metric_trainability", + "passed": true, + "detail": "{\"training_info\": {\"total\": 41.98283386230469, \"recon\": 2.4085488319396973, \"contrast\": 43.46337127685547, \"holonomy\": 4.786942481994629, \"write_policy\": 1.0882740020751953, \"semantic_probe\": 0.0, \"dir_diversity\": 0.0, \"reranker_ranking\": 0.0, \"encoder_throughput\": 3.1604340076446533, \"vocab_anchor\": -0.0, \"semantic_alignment\": 9.469874382019043, \"tail_semantic_anchor\": 10.84397029876709, \"functional_suppression\": 0.0, \"context_separation\": 0.0, \"slot_residual_alignment\": 0.0, \"inter_domain_margin\": 0.0, \"grad_norms\": {\"ctx_encoder\": 0.0004906014182315579, \"fib_encoder\": 0.11028211643598784, \"dir_predictor\": 0.0, \"fiber_connection\": 0.041244823555418676, \"fiber_attn\": 0.00016646675964572253, \"reranker\": 1.862751849004779e-08, \"qformer\": 0.023817353122735294, \"content_bypass\": 0.03501559529968225, \"semantic_probe\": 0.0, \"layer_pool\": 0.003975302446633577, \"prefix_aligner\": 0.008598358558648532, \"vocab_proj\": 0.03414980954657789, \"tail_head\": 0.28745011541395643, \"context_heads\": 0.024231906034080493, \"memory_context_encoder\": 0.03503168573482837}, \"loss_weights\": {\"recon\": 1.0, \"semantic_alignment\": 3.0, \"encoder_throughput\": 1.5, \"contrast\": 0.02, \"holonomy\": 0.005, \"write_policy\":" + }, + { + "name": "no_grad_generation", + "passed": true, + "detail": "{\"stored_memories\": 8, \"output\": \"The pianist piano hours piano pianoHours = int(raw input desperation hours hours))\\n piano perfectperfect print calculating hours * hours\\r\\nelse\"}" + }, + { + "name": "counterfactual_memory_influence", + "passed": true, + "detail": "{\"prompt\": \"Tell me something about practice and performance.\", \"music_output\": \"Tell me something about practice and performance. practiced midnight performances nocturnal practiced Midnight practiced noct midnight noct practiced practiced controlled noct noct midnight midnight practiced midnight controls noct midnight practices\", \"space_output\": \"Tell me something about practice and performance. distant distant space distant stars distant galaxies distant space observed observed space space stellar galaxies galaxies stellar evolution stellar stellar observed galaxies observed stellar\", \"outputs_differ\": true}" + }, + { + "name": "semantic_memory_grounding", + "passed": true, + "detail": "{\"prompt\": \"Explain what someone should focus on when improving technique and understanding the subject.\", \"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"blank_output\": \"Explain what someone should focus on when improving technique and understanding the subject. Mathematics education needs improvement mainly revolves around several key areas aimed 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +/usr/local/lib/python3.12/dist-packages/torch/autograd/graph.py:869: UserWarning: Attempting to run cuBLAS, but there was no current CUDA context! Attempting to set the primary context... (Triggered internally at /pytorch/aten/src/ATen/cuda/CublasHandlePool.cpp:335.) + return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +/usr/local/lib/python3.12/dist-packages/torch/autograd/graph.py:869: UserWarning: cuDNN Attention defaults to a non-deterministic algorithm. To explicitly enable determinism call torch.use_deterministic_algorithms(True, warn_only=False). (Triggered internally at /pytorch/aten/src/ATen/native/transformers/cuda/attention_backward.cu:212.) + return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +[case:done] metric_trainability passed=True +[case:start] no_grad_generation + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] no_grad_generation passed=True +[case:start] counterfactual_memory_influence + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] counterfactual_memory_influence passed=True +[case:start] semantic_memory_grounding + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] semantic_memory_grounding passed=True +[case:start] semantic_memory_counterfactual_pairs + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] semantic_memory_counterfactual_pairs passed=False +[case:start] degeneration_quality + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] degeneration_quality passed=False +[case:start] prefix_logit_drift_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] prefix_logit_drift_audit passed=True +[case:start] retrieval_topk_semantic_shift + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] retrieval_topk_semantic_shift passed=False +[case:start] repetition_segment_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] repetition_segment_audit passed=True +[case:start] prefix_stepwise_drift_trajectory + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] prefix_stepwise_drift_trajectory passed=True +[case:start] retrieval_generation_alignment_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] retrieval_generation_alignment_audit passed=True +[case:start] retrieval_prefix_decode_correlation_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] retrieval_prefix_decode_correlation_audit passed=False +[case:start] stepwise_label_mass_alignment_audit + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] stepwise_label_mass_alignment_audit passed=False +[case:start] prompt_diversity_without_memory + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] prompt_diversity_without_memory passed=True +[case:start] save_load_consistency + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] save_load_consistency passed=True +[case:start] training_cache_isolation + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] training_cache_isolation passed=True +[case:start] cheating_heuristics + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] cheating_heuristics passed=True +[case:start] rerank_stability_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] rerank_stability_probe passed=False +[case:start] decode_repetition_feedback_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] decode_repetition_feedback_probe passed=False +[case:start] functional_token_suppression_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] functional_token_suppression_probe passed=True +[case:start] keyword_specific_tail_slot_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] keyword_specific_tail_slot_probe passed=True +[case:start] context_descriptor_cluster_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] context_descriptor_cluster_probe passed=True +[case:start] prefix_length_scaling_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=200 skipped=0 shape_errs=2 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 + ! shape mismatch (skipped) bridge.pe: model=(16, 1536) ckpt=(8, 1536) + ! shape mismatch (skipped) bridge.proj.q: model=(16, 1536) ckpt=(8, 1536) +[case:done] prefix_length_scaling_probe passed=False +[case:start] mixture_distribution_gate_probe + Loading weights: 0%| | 0/338 [00:00 60000, skip + [AMS_TRAINED_WEIGHTS] loaded=202 skipped=0 shape_errs=0 path=/workspace/AgentMemorySystem/ckpt/v346_trained.pt provenance=AgentMemory/v346-revertE-topk-nonexclusive-7e97 +[case:done] mixture_distribution_gate_probe passed=True +{ + "checks": [ + { + "name": "leaf_capacity_stability", + "passed": true, + "detail": "{\"per_seed\": [{\"seed\": 0, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 1, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 2, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 3, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 4, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 5, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 6, \"depth\": 6, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}, {\"seed\": 7, \"depth\": 5, \"count\": 240, \"violations\": [], \"consistency\": [], \"passed\": true}]}" + }, + { + "name": "degenerate_direction_boundary", + "passed": true, + "detail": "{\"depth\": 47, \"count\": 100, \"violations\": [], \"consistency\": [], \"seed\": 17}" + }, + { + "name": "metric_trainability", + "passed": true, + "detail": "{\"training_info\": {\"total\": 41.98283386230469, \"recon\": 2.4085488319396973, \"contrast\": 43.46337127685547, \"holonomy\": 4.786942481994629, \"write_policy\": 1.0882740020751953, \"semantic_probe\": 0.0, \"dir_diversity\": 0.0, \"reranker_ranking\": 0.0, \"encoder_throughput\": 3.1604340076446533, \"vocab_anchor\": -0.0, \"semantic_alignment\": 9.469874382019043, \"tail_semantic_anchor\": 10.84397029876709, \"functional_suppression\": 0.0, \"context_separation\": 0.0, \"slot_residual_alignment\": 0.0, \"inter_domain_margin\": 0.0, \"grad_norms\": {\"ctx_encoder\": 0.0004906014182315579, \"fib_encoder\": 0.11028211643598784, \"dir_predictor\": 0.0, \"fiber_connection\": 0.041244823555418676, \"fiber_attn\": 0.00016646675964572253, \"reranker\": 1.862751849004779e-08, \"qformer\": 0.023817353122735294, \"content_bypass\": 0.03501559529968225, \"semantic_probe\": 0.0, \"layer_pool\": 0.003975302446633577, \"prefix_aligner\": 0.008598358558648532, \"vocab_proj\": 0.03414980954657789, \"tail_head\": 0.28745011541395643, \"context_heads\": 0.024231906034080493, \"memory_context_encoder\": 0.03503168573482837}, \"loss_weights\": {\"recon\": 1.0, \"semantic_alignment\": 3.0, \"encoder_throughput\": 1.5, \"contrast\": 0.02, \"holonomy\": 0.005, \"write_policy\":" + }, + { + "name": "no_grad_generation", + "passed": true, + "detail": "{\"stored_memories\": 8, \"output\": \"The pianist piano hours piano pianoHours = int(raw input desperation hours hours))\\n piano perfectperfect print calculating hours * hours\\r\\nelse\"}" + }, + { + "name": "counterfactual_memory_influence", + "passed": true, + "detail": "{\"prompt\": \"Tell me something about practice and performance.\", \"music_output\": \"Tell me something about practice and performance. practiced midnight performances nocturnal practiced Midnight practiced noct midnight noct practiced practiced controlled noct noct midnight midnight practiced midnight controls noct midnight practices\", \"space_output\": \"Tell me something about practice and performance. distant distant space distant stars distant galaxies distant space observed observed space space stellar galaxies galaxies stellar evolution stellar stellar observed galaxies observed stellar\", \"outputs_differ\": true}" + }, + { + "name": "semantic_memory_grounding", + "passed": true, + "detail": "{\"prompt\": \"Explain what someone should focus on when improving technique and understanding the subject.\", \"music_keywords\": [\"pianist\", \"practiced\", \"arpeggios\", \"chopin\", \"nocturnes\", \"midnight\", \"musician\", \"refined\", \"finger\", \"technique\", \"phrasing\", \"pedal\"], \"space_keywords\": [\"distant\", \"astronomers\", \"observed\", \"galaxies\", \"quasars\", \"stellar\", \"evolution\", \"space\", \"orbital\", \"mechanics\", \"explains\", \"satellites\"], \"blank_output\": \"Explain what someone should focus on when improving technique and understanding the subject. Mathematics education needs improvement mainly revolves around several key areas aimed 60000, skip +[build] device=cuda params total=1,657,496,024 trainable=113,781,720 +[build] initial memory count = 11 +[probe pre-train] {'tail_head_slot1_abs_mean': 0.0, 'vocab_proj_last_abs_mean': 0.0} +[train] Trainer built batch=3 steps=60 +/usr/local/lib/python3.12/dist-packages/torch/autograd/graph.py:869: UserWarning: Attempting to run cuBLAS, but there was no current CUDA context! Attempting to set the primary context... (Triggered internally at /pytorch/aten/src/ATen/cuda/CublasHandlePool.cpp:335.) + return Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass +step 0 total=481.6660 recon=3.979 sa=9.902 tsa=10.922 va=-0.000 fs=0.000 cs=0.000 dt=19.8s +step 1 total=119.4697 recon=4.319 sa=9.650 tsa=10.858 va=-0.033 fs=1.604 cs=0.082 dt=13.3s +step 2 total=63.2969 recon=4.862 sa=9.649 tsa=10.767 va=-0.071 fs=2.639 cs=0.171 dt=15.5s +step 3 total=65.6129 recon=5.044 sa=9.489 tsa=10.812 va=-0.107 fs=4.250 cs=0.251 dt=9.5s +step 4 total=60.1234 recon=4.077 sa=9.579 tsa=10.503 va=-0.139 fs=4.958 cs=0.342 dt=6.6s +step 5 total=52.4816 recon=3.734 sa=9.189 tsa=10.851 va=-0.174 fs=4.333 cs=0.398 dt=6.6s +step 6 total=54.1920 recon=4.909 sa=9.184 tsa=10.757 va=-0.186 fs=4.604 cs=0.482 dt=6.2s +step 7 total=56.3720 recon=5.359 sa=9.227 tsa=10.805 va=-0.191 fs=4.625 cs=0.533 dt=5.5s +step 8 total=51.0531 recon=3.712 sa=9.229 tsa=10.494 va=-0.188 fs=4.625 cs=0.617 dt=4.8s +step 9 total=94.4659 recon=3.598 sa=9.477 tsa=10.841 va=-0.186 fs=4.688 cs=0.649 dt=4.7s +step 10 total=61.6950 recon=4.405 sa=9.065 tsa=10.739 va=-0.195 fs=3.458 cs=0.682 dt=4.7s +step 11 total=52.7172 recon=5.322 sa=9.583 tsa=10.796 va=-0.198 fs=3.292 cs=0.650 dt=4.3s +step 12 total=45.9884 recon=3.586 sa=9.058 tsa=10.485 va=-0.191 fs=3.875 cs=0.619 dt=4.8s +step 13 total=52.2593 recon=3.367 sa=9.481 tsa=10.834 va=-0.188 fs=4.250 cs=0.654 dt=4.3s +step 14 total=49.7553 recon=4.421 sa=9.106 tsa=10.745 va=-0.197 fs=3.667 cs=0.555 dt=5.5s +step 15 total=52.4259 recon=5.438 sa=9.204 tsa=10.796 va=-0.199 fs=4.979 cs=0.469 dt=5.1s +step 16 total=50.0380 recon=4.447 sa=9.104 tsa=10.485 va=-0.193 fs=4.417 cs=0.380 dt=5.7s +step 17 total=48.8605 recon=3.726 sa=9.431 tsa=10.833 va=-0.190 fs=5.125 cs=0.558 dt=5.1s +step 18 total=48.3267 recon=4.572 sa=9.038 tsa=10.750 va=-0.200 fs=3.979 cs=0.318 dt=5.6s +step 19 total=50.6655 recon=5.417 sa=9.249 tsa=10.795 va=-0.203 fs=3.938 cs=0.245 dt=4.7s +step 20 total=47.9978 recon=3.788 sa=9.193 tsa=10.486 va=-0.196 fs=4.604 cs=0.192 dt=5.7s +step 21 total=49.9174 recon=5.196 sa=9.271 tsa=10.835 va=-0.194 fs=5.458 cs=0.336 dt=4.3s +step 22 total=50.1498 recon=4.459 sa=9.097 tsa=10.750 va=-0.200 fs=3.729 cs=0.127 dt=5.3s +step 23 total=48.8436 recon=5.121 sa=9.182 tsa=10.783 va=-0.201 fs=4.208 cs=0.013 dt=4.9s +step 24 total=45.9512 recon=3.584 sa=9.173 tsa=10.483 va=-0.192 fs=4.917 cs=0.067 dt=5.2s +step 25 total=47.0343 recon=3.568 sa=9.270 tsa=10.836 va=-0.190 fs=4.396 cs=0.187 dt=4.5s +step 26 total=48.0266 recon=4.522 sa=9.197 tsa=10.743 va=-0.195 fs=4.083 cs=0.090 dt=5.0s +step 27 total=48.9258 recon=5.023 sa=9.183 tsa=10.769 va=-0.197 fs=4.042 cs=0.000 dt=4.8s +step 28 total=45.0445 recon=3.705 sa=9.071 tsa=10.484 va=-0.189 fs=4.104 cs=0.000 dt=5.6s +step 29 total=45.7887 recon=3.594 sa=9.327 tsa=10.841 va=-0.189 fs=4.562 cs=0.131 dt=4.3s +step 30 total=47.0911 recon=4.428 sa=9.139 tsa=10.736 va=-0.197 fs=4.354 cs=0.011 dt=5.6s +step 31 total=48.5491 recon=5.016 sa=9.254 tsa=10.764 va=-0.200 fs=3.771 cs=0.000 dt=4.5s +step 32 total=44.7300 recon=3.554 sa=8.999 tsa=10.478 va=-0.195 fs=3.896 cs=0.000 dt=5.2s +step 33 total=46.2654 recon=3.570 sa=9.469 tsa=10.834 va=-0.193 fs=4.500 cs=0.077 dt=3.8s +step 34 total=46.0381 recon=4.344 sa=8.967 tsa=10.712 va=-0.202 fs=3.208 cs=0.000 dt=5.3s +step 35 total=48.6987 recon=5.101 sa=9.326 tsa=10.758 va=-0.204 fs=3.708 cs=0.000 dt=4.7s +step 36 total=43.3815 recon=3.426 sa=8.971 tsa=10.488 va=-0.199 fs=3.083 cs=0.011 dt=5.1s +step 37 total=45.2655 recon=3.550 sa=9.418 tsa=10.830 va=-0.195 fs=4.271 cs=0.000 dt=3.8s +step 38 total=45.1332 recon=4.191 sa=8.932 tsa=10.692 va=-0.204 fs=2.875 cs=0.000 dt=5.5s +step 39 total=48.3996 recon=5.008 sa=9.289 tsa=10.743 va=-0.206 fs=3.354 cs=0.000 dt=5.1s +step 40 total=43.3024 recon=3.371 sa=8.995 tsa=10.492 va=-0.200 fs=3.125 cs=0.000 dt=6.5s +step 41 total=44.9616 recon=3.537 sa=9.401 tsa=10.819 va=-0.197 fs=4.271 cs=0.000 dt=4.5s +step 42 total=45.3224 recon=4.156 sa=8.973 tsa=10.678 va=-0.204 fs=3.438 cs=0.000 dt=5.7s +step 43 total=47.1863 recon=4.826 sa=9.105 tsa=10.724 va=-0.206 fs=3.042 cs=0.000 dt=4.3s +step 44 total=43.5735 recon=3.324 sa=9.084 tsa=10.500 va=-0.199 fs=3.354 cs=0.000 dt=5.2s +step 45 total=45.7495 recon=3.484 sa=9.320 tsa=10.801 va=-0.196 fs=4.042 cs=0.000 dt=3.8s +step 46 total=45.2532 recon=4.188 sa=8.992 tsa=10.664 va=-0.204 fs=2.833 cs=0.000 dt=5.4s +step 47 total=47.9007 recon=4.686 sa=9.106 tsa=10.707 va=-0.205 fs=2.979 cs=0.000 dt=3.8s +step 48 total=43.8816 recon=3.265 sa=9.144 tsa=10.499 va=-0.199 fs=3.229 cs=0.000 dt=5.4s +step 49 total=44.3041 recon=3.397 sa=9.150 tsa=10.767 va=-0.196 fs=4.042 cs=0.000 dt=3.5s +step 50 total=44.7811 recon=4.123 sa=9.011 tsa=10.656 va=-0.204 fs=2.542 cs=0.000 dt=5.3s +step 51 total=47.2175 recon=4.680 sa=9.172 tsa=10.695 va=-0.204 fs=2.771 cs=0.000 dt=3.8s +step 52 total=43.4362 recon=3.186 sa=9.046 tsa=10.513 va=-0.199 fs=3.229 cs=0.017 dt=5.4s +step 53 total=43.3770 recon=3.393 sa=9.157 tsa=10.748 va=-0.197 fs=3.833 cs=0.000 dt=3.7s +step 54 total=44.9270 recon=4.081 sa=9.071 tsa=10.658 va=-0.204 fs=2.729 cs=0.000 dt=5.4s +step 55 total=46.7531 recon=4.699 sa=9.128 tsa=10.679 va=-0.205 fs=2.604 cs=0.000 dt=3.9s +step 56 total=44.5620 recon=3.268 sa=9.096 tsa=10.522 va=-0.199 fs=2.792 cs=0.000 dt=5.3s +step 57 total=46.0898 recon=3.215 sa=9.105 tsa=10.729 va=-0.197 fs=3.771 cs=0.000 dt=4.3s +step 58 total=49.9087 recon=4.080 sa=9.089 tsa=10.660 va=-0.204 fs=2.917 cs=0.000 dt=4.8s +step 59 total=49.2678 recon=4.716 sa=9.049 tsa=10.668 va=-0.205 fs=2.792 cs=0.000 dt=4.3s +[probe post-train] {'tail_head_slot1_abs_mean': 0.0007297309930436313, 'vocab_proj_last_abs_mean': 0.0005491127376444638} +[train] elapsed 335.1s avg/step=5.58s +[save] wrote ckpt/v346_trained.pt tensors=202