From c739284384cd6ae5739c8230e1f06a8c0b86fa98 Mon Sep 17 00:00:00 2001
From: Test <test@test.com>
Date: Fri, 15 May 2026 12:55:47 -0500
Subject: [PATCH] test(inference,#1275): regression test for no-CPU-fallback
 alpha contract
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add `tests/no_cpu_fallback_contract.rs` — three forbidden-strings
ratchets that fail the build if a future PR weakens the
no-CPU-fallback contract:

1. `select_best_device_panics_loudly_on_no_gpu` — asserts
   `inference/model.rs::select_best_device` keeps the
   `panic!("No GPU device available for inference. CPU fallback is
   disabled.")` loud-fail and tries CUDA + Metal before panicking.

2. `ort_providers_documents_no_cpu_fallback_contract` — asserts
   `ort_providers.rs` keeps the "CPU fallback is forbidden" comment
   that documents the rule from source.

3. `llamacpp_adapter_uses_loud_fail_for_no_local_model` — asserts
   `LlamaCppAdapter` uses the typed `NoLocalModelLoadable` error
   (shipped in #1093 / lane A PR-2) rather than a silent skip.

Pattern: same forbidden-strings ratchet shape as lane F PR-2 (#1129
TS persona forbidden-strings), applied to the Rust inference layer.
A test failure points the future-PR-author at the exact contract
they're about to weaken.

Closes the acceptance criterion #3 of #1262 ("regression test per
fallback path"). Final PR (4 of 4) for the silent CPU fallback audit.

Verified:
- cargo test --features metal --test no_cpu_fallback_contract:
  3 passed, 0 failed

Lane: alpha flywheel #1272 lane 6.
Audit: https://github.com/CambrianTech/continuum/issues/1262#issuecomment-4461757997

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../tests/no_cpu_fallback_contract.rs         | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 src/workers/continuum-core/tests/no_cpu_fallback_contract.rs

diff --git a/src/workers/continuum-core/tests/no_cpu_fallback_contract.rs b/src/workers/continuum-core/tests/no_cpu_fallback_contract.rs
new file mode 100644
index 000000000..3b443651b
--- /dev/null
+++ b/src/workers/continuum-core/tests/no_cpu_fallback_contract.rs
@@ -0,0 +1,85 @@
+//! Regression test for the no-CPU-fallback alpha contract (#1262 → #1275).
+//!
+//! Continuum's documented contract per `project_continuum_alpha_product_bar_sensory_personas.md`
+//! and `docs/architecture/SENSORY-PERSONA-ALPHA-CONTRACT.md` is **NO silent CPU fallback**:
+//! standard personas use `SiliconResidencyRequirement::GpuOrUnifiedMemoryOnly` and the model
+//! resolver is supposed to refuse rather than fall through to CPU.
+//!
+//! The contract is enforced at runtime by `inference::model::select_best_device` (panics if
+//! no GPU device is available) and by `inference::ort_providers` (CPU-fallback comment block
+//! at line ~119). This test asserts those invariants by inspection of the source files —
+//! a future PR that removes the loud-fail panic, weakens the message, or adds a silent
+//! CPU branch will fail this test.
+//!
+//! This is a **forbidden-strings ratchet** following the established pattern from lane F
+//! PR-2 (#1129 — TS persona forbidden-strings) applied to the Rust inference layer.
+//!
+//! Audit context:
+//!   https://github.com/CambrianTech/continuum/issues/1262#issuecomment-4461757997
+
+const SELECT_BEST_DEVICE_SOURCE: &str =
+    include_str!("../src/inference/model.rs");
+
+const ORT_PROVIDERS_SOURCE: &str =
+    include_str!("../src/inference/ort_providers.rs");
+
+const LLAMACPP_ADAPTER_SOURCE: &str =
+    include_str!("../src/inference/llamacpp_adapter.rs");
+
+#[test]
+fn select_best_device_panics_loudly_on_no_gpu() {
+    // The function MUST contain an explicit panic with a message that tells
+    // the user why we won't fall through to CPU. If a future PR removes the
+    // panic, weakens the message, or replaces it with a silent fallback
+    // (e.g. `Device::Cpu` return), this test fails and the no-CPU-fallback
+    // alpha contract is preserved.
+
+    assert!(
+        SELECT_BEST_DEVICE_SOURCE.contains("panic!(\"No GPU device available for inference. CPU fallback is disabled.\")"),
+        "select_best_device must loud-fail with the documented message. \
+         If you changed it, update both this test and the alpha contract docs \
+         (docs/architecture/SENSORY-PERSONA-ALPHA-CONTRACT.md). \
+         A silent fallthrough to Device::Cpu was the bug #1262 was filed for."
+    );
+
+    // Belt-and-suspenders: verify the function explicitly returns Device early
+    // for both Cuda and Metal cases (the only legitimate non-panic exits).
+    assert!(
+        SELECT_BEST_DEVICE_SOURCE.contains("Device::new_cuda(0)"),
+        "select_best_device must try CUDA before panicking"
+    );
+    assert!(
+        SELECT_BEST_DEVICE_SOURCE.contains("Device::new_metal(0)"),
+        "select_best_device must try Metal before panicking"
+    );
+}
+
+#[test]
+fn ort_providers_documents_no_cpu_fallback_contract() {
+    // ort_providers.rs carries the same contract for the ORT consumer
+    // (embedding / TTS / STT / vision via ONNX Runtime). The doc string
+    // must remain present so the architectural rule is discoverable from
+    // source alone.
+
+    assert!(
+        ORT_PROVIDERS_SOURCE.contains("CPU fallback is forbidden"),
+        "ort_providers.rs must document 'CPU fallback is forbidden' for the ORT consumer. \
+         If you removed the comment, the no-CPU-fallback rule is no longer self-documenting \
+         from source — surface the rule in another way before removing the comment."
+    );
+}
+
+#[test]
+fn llamacpp_adapter_uses_loud_fail_for_no_local_model() {
+    // The production adapter must use the typed `NoLocalModelLoadable` error
+    // (shipped in #1093 / lane A PR-2) rather than a silent fallthrough when
+    // no local GGUF is on disk.
+
+    assert!(
+        LLAMACPP_ADAPTER_SOURCE.contains("NoLocalModelLoadable"),
+        "LlamaCppAdapter must use the typed NoLocalModelLoadable error for missing-model cases. \
+         If you replaced it with a silent skip / Result::Ok-with-None / log-and-continue, \
+         the no-fallback alpha contract is violated and the user gets 1 tok/sec CPU instead \
+         of a clear 'install missing artifact' error."
+    );
+}