From c739284384cd6ae5739c8230e1f06a8c0b86fa98 Mon Sep 17 00:00:00 2001 From: Test Date: Fri, 15 May 2026 12:55:47 -0500 Subject: [PATCH] test(inference,#1275): regression test for no-CPU-fallback alpha contract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `tests/no_cpu_fallback_contract.rs` — three forbidden-strings ratchets that fail the build if a future PR weakens the no-CPU-fallback contract: 1. `select_best_device_panics_loudly_on_no_gpu` — asserts `inference/model.rs::select_best_device` keeps the `panic!("No GPU device available for inference. CPU fallback is disabled.")` loud-fail and tries CUDA + Metal before panicking. 2. `ort_providers_documents_no_cpu_fallback_contract` — asserts `ort_providers.rs` keeps the "CPU fallback is forbidden" comment that documents the rule from source. 3. `llamacpp_adapter_uses_loud_fail_for_no_local_model` — asserts `LlamaCppAdapter` uses the typed `NoLocalModelLoadable` error (shipped in #1093 / lane A PR-2) rather than a silent skip. Pattern: same forbidden-strings ratchet shape as lane F PR-2 (#1129 TS persona forbidden-strings), applied to the Rust inference layer. A test failure points the future-PR-author at the exact contract they're about to weaken. Closes the acceptance criterion #3 of #1262 ("regression test per fallback path"). Final PR (4 of 4) for the silent CPU fallback audit. Verified: - cargo test --features metal --test no_cpu_fallback_contract: 3 passed, 0 failed Lane: alpha flywheel #1272 lane 6. Audit: https://github.com/CambrianTech/continuum/issues/1262#issuecomment-4461757997 Co-Authored-By: Claude Opus 4.7 (1M context) --- .../tests/no_cpu_fallback_contract.rs | 85 +++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 src/workers/continuum-core/tests/no_cpu_fallback_contract.rs diff --git a/src/workers/continuum-core/tests/no_cpu_fallback_contract.rs b/src/workers/continuum-core/tests/no_cpu_fallback_contract.rs new file mode 100644 index 000000000..3b443651b --- /dev/null +++ b/src/workers/continuum-core/tests/no_cpu_fallback_contract.rs @@ -0,0 +1,85 @@ +//! Regression test for the no-CPU-fallback alpha contract (#1262 → #1275). +//! +//! Continuum's documented contract per `project_continuum_alpha_product_bar_sensory_personas.md` +//! and `docs/architecture/SENSORY-PERSONA-ALPHA-CONTRACT.md` is **NO silent CPU fallback**: +//! standard personas use `SiliconResidencyRequirement::GpuOrUnifiedMemoryOnly` and the model +//! resolver is supposed to refuse rather than fall through to CPU. +//! +//! The contract is enforced at runtime by `inference::model::select_best_device` (panics if +//! no GPU device is available) and by `inference::ort_providers` (CPU-fallback comment block +//! at line ~119). This test asserts those invariants by inspection of the source files — +//! a future PR that removes the loud-fail panic, weakens the message, or adds a silent +//! CPU branch will fail this test. +//! +//! This is a **forbidden-strings ratchet** following the established pattern from lane F +//! PR-2 (#1129 — TS persona forbidden-strings) applied to the Rust inference layer. +//! +//! Audit context: +//! https://github.com/CambrianTech/continuum/issues/1262#issuecomment-4461757997 + +const SELECT_BEST_DEVICE_SOURCE: &str = + include_str!("../src/inference/model.rs"); + +const ORT_PROVIDERS_SOURCE: &str = + include_str!("../src/inference/ort_providers.rs"); + +const LLAMACPP_ADAPTER_SOURCE: &str = + include_str!("../src/inference/llamacpp_adapter.rs"); + +#[test] +fn select_best_device_panics_loudly_on_no_gpu() { + // The function MUST contain an explicit panic with a message that tells + // the user why we won't fall through to CPU. If a future PR removes the + // panic, weakens the message, or replaces it with a silent fallback + // (e.g. `Device::Cpu` return), this test fails and the no-CPU-fallback + // alpha contract is preserved. + + assert!( + SELECT_BEST_DEVICE_SOURCE.contains("panic!(\"No GPU device available for inference. CPU fallback is disabled.\")"), + "select_best_device must loud-fail with the documented message. \ + If you changed it, update both this test and the alpha contract docs \ + (docs/architecture/SENSORY-PERSONA-ALPHA-CONTRACT.md). \ + A silent fallthrough to Device::Cpu was the bug #1262 was filed for." + ); + + // Belt-and-suspenders: verify the function explicitly returns Device early + // for both Cuda and Metal cases (the only legitimate non-panic exits). + assert!( + SELECT_BEST_DEVICE_SOURCE.contains("Device::new_cuda(0)"), + "select_best_device must try CUDA before panicking" + ); + assert!( + SELECT_BEST_DEVICE_SOURCE.contains("Device::new_metal(0)"), + "select_best_device must try Metal before panicking" + ); +} + +#[test] +fn ort_providers_documents_no_cpu_fallback_contract() { + // ort_providers.rs carries the same contract for the ORT consumer + // (embedding / TTS / STT / vision via ONNX Runtime). The doc string + // must remain present so the architectural rule is discoverable from + // source alone. + + assert!( + ORT_PROVIDERS_SOURCE.contains("CPU fallback is forbidden"), + "ort_providers.rs must document 'CPU fallback is forbidden' for the ORT consumer. \ + If you removed the comment, the no-CPU-fallback rule is no longer self-documenting \ + from source — surface the rule in another way before removing the comment." + ); +} + +#[test] +fn llamacpp_adapter_uses_loud_fail_for_no_local_model() { + // The production adapter must use the typed `NoLocalModelLoadable` error + // (shipped in #1093 / lane A PR-2) rather than a silent fallthrough when + // no local GGUF is on disk. + + assert!( + LLAMACPP_ADAPTER_SOURCE.contains("NoLocalModelLoadable"), + "LlamaCppAdapter must use the typed NoLocalModelLoadable error for missing-model cases. \ + If you replaced it with a silent skip / Result::Ok-with-None / log-and-continue, \ + the no-fallback alpha contract is violated and the user gets 1 tok/sec CPU instead \ + of a clear 'install missing artifact' error." + ); +}