From 4dc7c16494942745dd184da2d9d146a85441f14f Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 00:14:30 +0200 Subject: [PATCH 01/13] fix(agent): classify HTTP 4xx as ErrorGeneral not ErrorNetwork Refs #1992 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reqwest::Error from error_for_status() on a 400 response is an HTTP application error, not a transport/connectivity failure. Previously classify_error() returned ExitCode::ErrorNetwork (6) for all reqwest errors, causing test_server_mode_search_with_selected_role to fail when the server returned 400 due to missing role configuration. Now check re.status() before falling back to ErrorNetwork: - 401 / 403 → ErrorAuth (5) - 404 → ErrorNotFound (4) - other 4xx/5xx → ErrorGeneral (1) - no status → ErrorNetwork (6, true connectivity failure) Adds five regression tests using a real one-shot TCP server so no mocks are needed, covering 400, 401, 403, 404, 500 paths. Co-Authored-By: Claude Sonnet 4.6 --- crates/terraphim_agent/src/main.rs | 120 +++++++++++++++++++++++++++-- 1 file changed, 114 insertions(+), 6 deletions(-) diff --git a/crates/terraphim_agent/src/main.rs b/crates/terraphim_agent/src/main.rs index 291d18a42..c7d5fd9ad 100644 --- a/crates/terraphim_agent/src/main.rs +++ b/crates/terraphim_agent/src/main.rs @@ -1338,12 +1338,23 @@ fn classify_error(err: &anyhow::Error) -> robot::exit_codes::ExitCode { #[cfg(feature = "server")] if err.chain().any(|e| e.is::()) { - let is_timeout = err - .chain() - .filter_map(|e| e.downcast_ref::()) - .any(|re| re.is_timeout()); - if is_timeout { - return ExitCode::ErrorTimeout; + for e in err.chain() { + if let Some(re) = e.downcast_ref::() { + if re.is_timeout() { + return ExitCode::ErrorTimeout; + } + // HTTP status errors (4xx/5xx) are application errors, not connectivity failures. + // A 400 from a misconfigured role must not masquerade as a network error. + if let Some(status) = re.status() { + return match status.as_u16() { + 401 | 403 => ExitCode::ErrorAuth, + 404 => ExitCode::ErrorNotFound, + _ => ExitCode::ErrorGeneral, + }; + } + // True transport error (connection refused, DNS failure, etc.) + return ExitCode::ErrorNetwork; + } } return ExitCode::ErrorNetwork; } @@ -1507,6 +1518,103 @@ mod classify_error_tests { } } +/// Regression tests for classify_error with real reqwest HTTP status errors. +/// These verify that HTTP 4xx responses are not misclassified as network errors. +#[cfg(all(test, feature = "server"))] +mod classify_reqwest_tests { + use super::*; + use robot::exit_codes::ExitCode; + use std::io::Write; + use std::net::TcpListener; + + /// Start a one-shot TCP server that replies with the given HTTP status code. + fn start_status_server(status: u16, reason: &'static str) -> u16 { + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + std::thread::spawn(move || { + if let Ok((mut stream, _)) = listener.accept() { + let mut buf = [0u8; 2048]; + let _ = std::io::Read::read(&mut stream, &mut buf); + let response = format!( + "HTTP/1.1 {} {}\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", + status, reason + ); + let _ = stream.write_all(response.as_bytes()); + } + }); + port + } + + #[tokio::test] + async fn http_400_maps_to_error_general_not_network() { + let port = start_status_server(400, "Bad Request"); + let client = reqwest::Client::new(); + let res = client + .get(format!("http://127.0.0.1:{}/", port)) + .send() + .await + .unwrap(); + let reqwest_err: anyhow::Error = res.error_for_status().unwrap_err().into(); + assert_eq!( + classify_error(&reqwest_err), + ExitCode::ErrorGeneral, + "HTTP 400 from server must not be classified as ErrorNetwork (exit 6)" + ); + } + + #[tokio::test] + async fn http_401_maps_to_error_auth() { + let port = start_status_server(401, "Unauthorized"); + let client = reqwest::Client::new(); + let res = client + .get(format!("http://127.0.0.1:{}/", port)) + .send() + .await + .unwrap(); + let reqwest_err: anyhow::Error = res.error_for_status().unwrap_err().into(); + assert_eq!(classify_error(&reqwest_err), ExitCode::ErrorAuth); + } + + #[tokio::test] + async fn http_403_maps_to_error_auth() { + let port = start_status_server(403, "Forbidden"); + let client = reqwest::Client::new(); + let res = client + .get(format!("http://127.0.0.1:{}/", port)) + .send() + .await + .unwrap(); + let reqwest_err: anyhow::Error = res.error_for_status().unwrap_err().into(); + assert_eq!(classify_error(&reqwest_err), ExitCode::ErrorAuth); + } + + #[tokio::test] + async fn http_404_maps_to_error_not_found() { + let port = start_status_server(404, "Not Found"); + let client = reqwest::Client::new(); + let res = client + .get(format!("http://127.0.0.1:{}/", port)) + .send() + .await + .unwrap(); + let reqwest_err: anyhow::Error = res.error_for_status().unwrap_err().into(); + assert_eq!(classify_error(&reqwest_err), ExitCode::ErrorNotFound); + } + + #[tokio::test] + async fn http_500_maps_to_error_general() { + let port = start_status_server(500, "Internal Server Error"); + let client = reqwest::Client::new(); + let res = client + .get(format!("http://127.0.0.1:{}/", port)) + .send() + .await + .unwrap(); + let reqwest_err: anyhow::Error = res.error_for_status().unwrap_err().into(); + assert_eq!(classify_error(&reqwest_err), ExitCode::ErrorGeneral); + } +} + /// Build a ForgivingParser with the actual CLI subcommands. fn build_cli_forgiving_parser() -> forgiving::ForgivingParser { let mut commands = vec![ From 4cfc5be6fc50ef939378cc6e99390bfdb377ce6d Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 00:38:15 +0200 Subject: [PATCH 02/13] test(agent): add server_http_error_exits_1 integration test Refs #1992 Adds an integration test that starts a real TCP listener replying with HTTP 400 Bad Request and asserts the binary exits 0 or 1 (never 6). This exercises the classify_error fix end-to-end through the real binary, complementing the unit-level classify_reqwest_tests in main.rs. Co-Authored-By: Claude Sonnet 4.6 --- crates/terraphim_agent/tests/exit_codes.rs | 51 ++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/crates/terraphim_agent/tests/exit_codes.rs b/crates/terraphim_agent/tests/exit_codes.rs index 15b1cf5ea..2ffd279e2 100644 --- a/crates/terraphim_agent/tests/exit_codes.rs +++ b/crates/terraphim_agent/tests/exit_codes.rs @@ -146,6 +146,57 @@ fn unreachable_server_exits_6() { ); } +// --------------------------------------------------------------------------- +// Exit code 1 -- HTTP status error from server (must NOT be exit 6) +// --------------------------------------------------------------------------- + +/// Regression test for #1992: HTTP 4xx responses from the server must produce +/// exit 1 (ErrorGeneral), not exit 6 (ErrorNetwork). +/// +/// Starts a real TCP listener that replies with 400 to every request, so the +/// binary receives a genuine reqwest::Error with a status code attached. +/// Before the fix, classify_error returned ErrorNetwork (6) for all reqwest +/// errors; after the fix it checks re.status() first. +#[cfg(feature = "server")] +#[test] +fn server_http_error_exits_1() { + use std::io::Write; + use std::net::TcpListener; + + let listener = TcpListener::bind("127.0.0.1:0").unwrap(); + let port = listener.local_addr().unwrap().port(); + + std::thread::spawn(move || { + for stream in listener.incoming() { + if let Ok(mut s) = stream { + let mut buf = [0u8; 2048]; + let _ = std::io::Read::read(&mut s, &mut buf); + let _ = s.write_all( + b"HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", + ); + } + } + }); + + let status = cmd() + .args([ + "--server", + "--server-url", + &format!("http://127.0.0.1:{port}"), + "search", + "terraphim", + ]) + .output() + .expect("failed to run binary") + .status; + let code = status.code().unwrap_or(1); + assert!( + code == 0 || code == 1, + "HTTP 400 from server must exit 0 (offline fallback) or 1 (ErrorGeneral), \ + not 6 (ErrorNetwork); got {code}" + ); +} + // --------------------------------------------------------------------------- // Robot mode and --format json error envelopes // --------------------------------------------------------------------------- From fedb78e2d3b0198cea9efad0da3dd82c5368d0ba Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 03:52:14 +0200 Subject: [PATCH 03/13] feat(pr-reviewer): agent work [auto-commit] --- .docs/api-reference-snippets.md | 103 ++++++++++++++++++++++ .docs/doc-report-latest.md | 131 ++++++++++++++++++++++++++++ CHANGELOG.md | 1 + reports/spec-validation-20260604.md | 76 ++++++++++++++++ 4 files changed, 311 insertions(+) create mode 100644 .docs/api-reference-snippets.md create mode 100644 .docs/doc-report-latest.md create mode 100644 reports/spec-validation-20260604.md diff --git a/.docs/api-reference-snippets.md b/.docs/api-reference-snippets.md new file mode 100644 index 000000000..4248b93d8 --- /dev/null +++ b/.docs/api-reference-snippets.md @@ -0,0 +1,103 @@ +# API Reference Snippets + +Generated: 2026-06-04 + +Selected public types with proposed or existing doc comments. + +--- + +## `terraphim_persistence` + +### `DeviceStorage` + +**Source:** `crates/terraphim_persistence/src/lib.rs:66` + +```rust +/// Process-wide singleton managing all configured storage backends, ordered by +/// latency. `fastest_op` is the lowest-latency operator used as the cache +/// write-back target. Obtain via [`DeviceStorage::instance`]; use +/// [`DeviceStorage::init_memory_only`] in tests to avoid filesystem access. +pub struct DeviceStorage { + pub ops: HashMap, + pub fastest_op: Operator, +} +``` + +Key methods: `instance()`, `init_memory_only()`, `arc_instance()`, `arc_memory_only()`. + +--- + +### `Persistable` + +**Source:** `crates/terraphim_persistence/src/lib.rs:220` + +```rust +/// A trait for persisting objects to and from the fastest configured storage +/// operator (file system, database, or cloud backend). +/// +/// Implementors serialise to JSON and delegate I/O to `DeviceStorage`'s +/// `fastest_op`. Provides full CRUD: `save`, `load`, `get`, `delete`. +#[async_trait] +pub trait Persistable: Serialize + DeserializeOwned { + fn new(key: String) -> Self; + async fn save(&self) -> Result<()>; + // ... +} +``` + +--- + +### `ConversationPersistence` + +**Source:** `crates/terraphim_persistence/src/conversation.rs:10` + +```rust +/// Async trait for saving, loading, and listing [`Conversation`] records. +/// +/// The production implementation (`OpenDALConversationPersistence`) maintains +/// an in-memory `ConversationIndex` cache and fans writes to all configured +/// operators. +#[async_trait] +pub trait ConversationPersistence: Send + Sync { + async fn save(&self, conversation: &Conversation) -> Result<()>; + async fn load(&self, id: &ConversationId) -> Result; + async fn delete(&self, id: &ConversationId) -> Result<()>; + async fn list_ids(&self) -> Result>; + async fn exists(&self, id: &ConversationId) -> Result; + async fn list_summaries(&self) -> Result>; +} +``` + +--- + +## `haystack_core` + +### `HaystackProvider` + +**Source:** `crates/haystack_core/src/lib.rs:8` + +Proposed doc comment (currently missing): + +```rust +/// Abstraction over a data source (haystack) that can be indexed and searched. +/// +/// Implement this trait for each external system (filesystem, Confluence, +/// Discourse, email) to expose it to the terraphim search pipeline. +pub trait HaystackProvider { ... } +``` + +--- + +## `terraphim_types` — Priority Gaps + +The following core types in `lib.rs` are undocumented and referenced across +the workspace. One-line proposed comments: + +| Type | Proposed doc | +|------|-------------| +| `RoleName` (line 171) | `/// Newtype wrapper around a role identifier string.` | +| `NormalizedTerm` (line 306) | `/// A term normalised for Aho-Corasick automata matching.` | +| `Concept` (line 438) | `/// A knowledge-graph node linking a normalised term to source documents.` | +| `DocumentType` (line 476) | `/// Classifies a document by its source format or provenance.` | +| `MarkdownDirectives` (line 606) | `/// Structured directives parsed from a Markdown document's front-matter.` | +| `RouteDirective` (line 488) | `/// Specifies how an agent command should be dispatched.` | diff --git a/.docs/doc-report-latest.md b/.docs/doc-report-latest.md new file mode 100644 index 000000000..d5862802a --- /dev/null +++ b/.docs/doc-report-latest.md @@ -0,0 +1,131 @@ +# Documentation Audit Report + +**Date:** 2026-06-04 +**Agent:** documentation-generator +**Scope:** 6 key crates (consumer-facing, high-traffic) + +--- + +## Executive Summary + +| Metric | Value | +|--------|-------| +| Crates scanned | 6 | +| Total public items | 882 | +| Undocumented | 339 | +| Coverage | 62% | +| SIGNIFICANT_GAPS | 5 crates | +| MINOR_GAPS | 1 crate | +| CLEAN | 0 crates | + +--- + +## Per-Crate Results + +| Crate | Public Items | Undocumented | % Gap | Verdict | +|-------|-------------|--------------|-------|---------| +| `terraphim_types` | 344 | 128 | 37% | SIGNIFICANT_GAPS | +| `terraphim_service` | 250 | 89 | 36% | SIGNIFICANT_GAPS | +| `terraphim_sessions` | 111 | 44 | 40% | SIGNIFICANT_GAPS | +| `terraphim_automata` | 142 | 62 | 44% | SIGNIFICANT_GAPS | +| `terraphim_persistence` | 34 | 15 | 44% | SIGNIFICANT_GAPS | +| `haystack_core` | 1 | 1 | 100% | MINOR_GAPS | + +--- + +## Priority Undocumented Items + +### `terraphim_persistence` — highest-density critical types + +| File | Line | Type | Name | +|------|------|------|------| +| `lib.rs` | 66 | struct | `DeviceStorage` | +| `lib.rs` | 220 | trait | `Persistable` | +| `error.rs` | 5 | enum | `Error` | +| `conversation.rs` | 10 | trait | `ConversationPersistence` | +| `conversation.rs` | 32 | struct | `ConversationIndex` | +| `lib.rs` | 18-24 | mod | 7 undocumented `pub mod` re-exports | + +### `terraphim_types` — most undocumented items (128) + +Top priority (referenced workspace-wide): + +| File | Line | Type | Name | +|------|------|------|------| +| `lib.rs` | 171 | struct | `RoleName` | +| `lib.rs` | 262 | struct | `NormalizedTermValue` | +| `lib.rs` | 306 | struct | `NormalizedTerm` | +| `lib.rs` | 438 | struct | `Concept` | +| `lib.rs` | 476 | enum | `DocumentType` | +| `lib.rs` | 488 | struct | `RouteDirective` | +| `lib.rs` | 606 | struct | `MarkdownDirectives` | +| `medical_types.rs` | 36 | enum | `MedicalNodeType` | +| `medical_types.rs` | 141 | enum | `MedicalEdgeType` | +| `medical_types.rs` | 383 | struct | `MedicalNodeMetadata` | +| `hgnc.rs` | 12 | struct | `HgncGene` | + +### `terraphim_sessions` — core domain types (44 gaps) + +| File | Line | Type | Name | +|------|------|------|------| +| `model.rs` | 258 | struct | `Session` | +| `model.rs` | 165 | struct | `Message` | +| `model.rs` | 220 | struct | `SessionMetadata` | +| `model.rs` | 21 | enum | `MessageRole` | +| `model.rs` | 62 | enum | `ContentBlock` | +| `model.rs` | 756 | enum | `FileOperation` | +| `service.rs` | — | fn | all public methods | + +### `terraphim_automata` — evaluation and UMLS types (62 gaps) + +| File | Line | Type | Name | +|------|------|------|------| +| `evaluation.rs` | 17 | struct | `GroundTruthDocument` | +| `evaluation.rs` | 28 | struct | `ExpectedMatch` | +| `evaluation.rs` | 37 | struct | `ClassificationMetrics` | +| `evaluation.rs` | 48 | struct | `TermReport` | +| `evaluation.rs` | 55 | struct | `EvaluationResult` | +| `umls.rs` | 14 | struct | `UmlsConcept` | +| `umls.rs` | 47 | struct | `UmlsDataset` | +| `url_protector.rs` | 55 | struct | `ProtectedUrl` | + +### `haystack_core` — trivial fix + +| File | Line | Type | Name | +|------|------|------|------| +| `lib.rs` | 8 | trait | `HaystackProvider` | + +--- + +## CHANGELOG.md Updates + +Added under `### Fixed` (2026-06-04): +- `fix(agent)`: HTTP 4xx classified as `ErrorGeneral` not `ErrorNetwork`; integration test `server_http_error_exits_1` added (Refs #1992) + +The `docs(specs)` commit (98fa93b32) is an internal documentation update with no user-facing changelog entry. + +--- + +## Recommendations + +1. **Quick wins** (< 30 min combined): + - `haystack_core::HaystackProvider` — one line + - `terraphim_persistence::Error` enum — one line + - `terraphim_persistence` module re-exports (7 lines) + +2. **Medium effort** (half-day): + - `terraphim_persistence` core traits: `DeviceStorage`, `Persistable`, `ConversationPersistence` + - `terraphim_types` core structs: `RoleName`, `NormalizedTerm`, `Concept`, `DocumentType` + +3. **Batch effort** (track as issue): + - `terraphim_sessions::model` — all domain types + - `terraphim_automata::evaluation` — metrics structs + - `terraphim_service` module re-exports + +See `.docs/api-reference-snippets.md` for proposed doc comments on key types. + +--- + +## Gitea + +Findings posted as comment on issue #2137 (Theme-ID: doc-gap). diff --git a/CHANGELOG.md b/CHANGELOG.md index 4866957c8..778cff227 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **Agent HTTP error classification** HTTP 4xx responses now classified as `ErrorGeneral` rather than `ErrorNetwork`; integration test `server_http_error_exits_1` added (Refs #1992, 2026-06-04) - **Redis security exposure** Docker Compose Redis service now binds to `127.0.0.1:6379` instead of `0.0.0.0:6379` to prevent unintended public exposure of the cache (Refs #1313, 2026-05-31) - **Nested `cargo run` in exit-code tests** replaced with `cargo_bin!` macro to avoid file-lock deadlock under concurrent `cargo test` (2026-06-01) - **ADF KG-router fallback respawn loop** closed after quota exit — agents no longer re-routed to `anthropic/sonnet` indefinitely when per-agent config or quota-fallback chose another provider (Refs #1793, PR#1794, 2026-05-22) diff --git a/reports/spec-validation-20260604.md b/reports/spec-validation-20260604.md new file mode 100644 index 000000000..7328e2ce4 --- /dev/null +++ b/reports/spec-validation-20260604.md @@ -0,0 +1,76 @@ +## spec-validator verdict: CONDITIONAL PASS + +**Date**: 2026-06-04 00:29 CEST +**Agent**: Carthos (Domain Architect / spec-validator) +**Issue**: #1992 — `test_server_mode_search_with_selected_role` HTTP 400 → exit 6 + +--- + +### Findings Summary + +| Severity | Finding | +|----------|---------| +| P1 BLOCKER | PR #2011 targets Gitea `main`, but `crates/terraphim_agent` does not exist on Gitea — it was extracted to GitHub (origin) in the E1-E5 polyrepo cycle. `mergeable: false` is structural, not a rebase conflict. | +| P2 GAP | No unit or integration test covers `classify_error` with a real `reqwest::Error` that has `is_status() == true`. The `is_status()` branch added by PR #2011 is unverified by any test. | + +--- + +### Spec Alignment Analysis + +**Contract** (`crates/terraphim_agent/src/robot/exit_codes.rs`): + +| Code | Name | Semantic | +|------|------|----------| +| 1 | `ErrorGeneral` | General/unspecified error | +| 4 | `ErrorNotFound` | No results found | +| 6 | `ErrorNetwork` | **Network or connectivity issue** | + +**Violation on `main`**: `classify_error` (line 1340) returns `ErrorNetwork` (6) for ANY `reqwest::Error`, including HTTP status responses (400, 404, etc.). HTTP 400 is a semantic protocol response — not a network/connectivity failure. This violates the documented meaning of `ErrorNetwork`. + +**Test assertion** (`server_mode_tests.rs:222`): `code == 0 || code == 1` is correct. A search failure due to unknown role configuration is a general error (1), not a network error (6). + +**Fix in PR #2011** (`crates/terraphim_agent/src/main.rs` diff): +```rust +if re.is_status() { + if let Some(status) = re.status() { + if status == reqwest::StatusCode::NOT_FOUND { + return ExitCode::ErrorNotFound; + } + } + return ExitCode::ErrorGeneral; +} +// Connection refused, DNS failure, transport errors → network error. +return ExitCode::ErrorNetwork; +``` + +This is **semantically correct** and consistent with the documented exit code contract. Existing test `unreachable_server_exits_6` (connection refused → code 0 or 6) is unaffected because `is_status()` returns `false` for transport-level failures. + +--- + +### Actions Required + +**P1 (blocks merge)**: Re-route the fix to GitHub. The PR must target `origin/main` (GitHub) where `crates/terraphim_agent` exists. Either: +- Open a new GitHub PR from the same branch `task/1992-fix-search-http-status-exit-code`, OR +- Close PR #2011 on Gitea and apply the change directly to GitHub + +**P2 (test coverage)**: Add a test that invokes a live server returning HTTP 400 and asserts `code == 1`. The integration test suite at `crates/terraphim_agent/tests/exit_codes.rs` already has `unreachable_server_exits_6` (lines 128–147) as the pattern. A companion test `server_http_error_exits_1` using a responding server with a bad query would close this gap. No mocks — use a real server or test with the existing `start_test_server()` fixture. + +--- + +### Existing Test Coverage + +Six `classify_error` unit tests in `mod classify_error_tests` (lines 1393–1507) use plain `anyhow::anyhow!("string")` errors. None exercise the `#[cfg(feature = "server")]` reqwest path. The `is_status()` branch added by the fix has **zero test coverage** in both unit and integration suites. + +--- + +### Traceability + +| Req | Requirement | Spec Ref | Impl Ref | Test | Status | +|-----|-------------|----------|----------|------|--------| +| REQ-01 | HTTP status errors exit 1, not 6 | `exit_codes.rs` `ErrorNetwork` doc | `main.rs:1340` classify_error | `server_mode_tests.rs:222` | ⚠️ fix exists (PR#2011), not merged | +| REQ-02 | `ErrorNetwork` reserved for transport failures | `exit_codes.rs:18` | `main.rs:1348` | `exit_codes.rs:130` unreachable_server | ✅ | +| REQ-03 | `is_status()` path tested | — | PR#2011 `main.rs:1340–1355` | **MISSING** | ❌ | + +--- + +If PR #2011 is re-submitted targeting GitHub with a companion integration test for the `is_status()` path, verdict becomes **PASS**. From b9358edafac62012a51a1f63be78008bef2e285a Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 05:13:02 +0200 Subject: [PATCH 04/13] feat(pr-validator): agent work [auto-commit] --- .../tests/shared_learning_cli_tests.rs | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/crates/terraphim_agent/tests/shared_learning_cli_tests.rs b/crates/terraphim_agent/tests/shared_learning_cli_tests.rs index bc26b2637..5a605b4be 100644 --- a/crates/terraphim_agent/tests/shared_learning_cli_tests.rs +++ b/crates/terraphim_agent/tests/shared_learning_cli_tests.rs @@ -38,12 +38,13 @@ async fn shared_list_empty_store() { async fn shared_list_with_trust_level_filter() { let store = create_store().await; - let l1 = SharedLearning::new( + let mut l1 = SharedLearning::new( "L1 learning".to_string(), "content".to_string(), SharedLearningSource::Manual, "test-agent".to_string(), ); + l1.promote_to_l1(); store.insert(l1).await.expect("insert l1"); let mut l2 = SharedLearning::new( @@ -52,6 +53,7 @@ async fn shared_list_with_trust_level_filter() { SharedLearningSource::Manual, "test-agent".to_string(), ); + l2.promote_to_l1(); l2.promote_to_l2(); store.insert(l2).await.expect("insert l2"); @@ -92,6 +94,7 @@ async fn shared_promote_l1_to_l2() { let id = learning.id.clone(); store.insert(learning).await.expect("insert"); + store.promote_to_l1(&id).await.expect("promote to l1"); store.promote_to_l2(&id).await.expect("promote to l2"); let fetched = store.get(&id).await.expect("get after promote"); @@ -124,12 +127,13 @@ async fn shared_stats_counts() { // Insert 2 L1, 1 L2 for i in 0..2 { - let l = SharedLearning::new( + let mut l = SharedLearning::new( format!("L1 item {}", i), "content".to_string(), SharedLearningSource::Manual, "agent".to_string(), ); + l.promote_to_l1(); store.insert(l).await.expect("insert l1"); } @@ -139,6 +143,7 @@ async fn shared_stats_counts() { SharedLearningSource::Manual, "agent".to_string(), ); + l2.promote_to_l1(); l2.promote_to_l2(); store.insert(l2).await.expect("insert l2"); @@ -173,7 +178,7 @@ async fn shared_import_creates_l1_entries() { let error = "remote: error: denied".to_string(); let tags = vec!["git".to_string(), "push".to_string()]; - let shared = SharedLearning::new( + let mut shared = SharedLearning::new( command.clone(), error.clone(), SharedLearningSource::BashHook, @@ -182,6 +187,7 @@ async fn shared_import_creates_l1_entries() { .with_original_command(command) .with_error_context(error) .with_keywords(tags); + shared.promote_to_l1(); store .insert(shared) @@ -236,7 +242,8 @@ async fn shared_store_survives_restart() { let id = learning.id.clone(); store.insert(learning).await.expect("insert"); - // 3. Promote it to L2 + // 3. Promote it to L2 via L1 (required promotion path: L0 → L1 → L2) + store.promote_to_l1(&id).await.expect("promote to l1"); store.promote_to_l2(&id).await.expect("promote to l2"); // 4. Drop the store (simulating process exit) From 252eac572810934cf6d313231727de491ed6578e Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 05:48:18 +0200 Subject: [PATCH 05/13] fix(rolegraph): add serde(default) to trigger_descriptions and pinned_node_ids Refs #2039 Deserialising a SerializableRoleGraph JSON written before issue #84 (trigger-based KG retrieval) was merged would fail with a missing-field error because trigger_descriptions and pinned_node_ids had no serde(default) annotation. Adds the annotation to both fields and a round-trip regression test that strips the fields from a serialised graph and confirms deserialisation succeeds with empty collections, matching the existing learning_document_ids pattern. Co-Authored-By: Claude Sonnet 4.6 --- crates/terraphim_rolegraph/src/lib.rs | 32 +++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/crates/terraphim_rolegraph/src/lib.rs b/crates/terraphim_rolegraph/src/lib.rs index c707b6cd9..077a54207 100644 --- a/crates/terraphim_rolegraph/src/lib.rs +++ b/crates/terraphim_rolegraph/src/lib.rs @@ -296,8 +296,10 @@ pub struct SerializableRoleGraph { /// reverse lookup - matched id into normalized term pub ac_reverse_nterm: AHashMap, /// Trigger descriptions for each node_id (used to rebuild TriggerIndex) + #[serde(default)] pub trigger_descriptions: AHashMap, /// Node IDs that are pinned (always included in results) + #[serde(default)] pub pinned_node_ids: Vec, /// Document IDs that were indexed from shared learnings #[serde(default)] @@ -2302,4 +2304,34 @@ mod tests { let results = restored.find_matching_node_ids_with_fallback("trigger one text", false); assert!(results.contains(&1u64)); } + + #[test] + async fn serde_default_round_trip_old_json_without_trigger_fields() { + // Simulate a persisted SerializableRoleGraph written before issue #84 was merged. + // We serialise a fresh RoleGraph, strip trigger_descriptions and pinned_node_ids + // from the JSON, then deserialise. serde(default) must supply empty collections + // rather than returning a missing-field error. + let role = "test role".to_string(); + let thesaurus = Thesaurus::new("test".to_string()); + let rolegraph = RoleGraph::new(role.into(), thesaurus).await.unwrap(); + let serializable = rolegraph.to_serializable(); + let full_json = serializable.to_json().unwrap(); + + // Remove the two fields introduced by issue #84 to simulate old JSON. + let mut value: serde_json::Value = + serde_json::from_str(&full_json).expect("serialization produced invalid JSON"); + value + .as_object_mut() + .unwrap() + .remove("trigger_descriptions"); + value.as_object_mut().unwrap().remove("pinned_node_ids"); + let old_json = serde_json::to_string(&value).unwrap(); + + let result = SerializableRoleGraph::from_json(&old_json); + assert!(result.is_ok(), "expected Ok but got: {:?}", result.err()); + let sg = result.unwrap(); + assert!(sg.trigger_descriptions.is_empty()); + assert!(sg.pinned_node_ids.is_empty()); + assert!(sg.learning_document_ids.is_empty()); + } } From 65756a5fa5043052eead80e6e0d4a47317fb8d98 Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 06:07:32 +0200 Subject: [PATCH 06/13] feat(pr-reviewer): agent work [auto-commit] --- crates/terraphim_orchestrator/src/lib.rs | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/crates/terraphim_orchestrator/src/lib.rs b/crates/terraphim_orchestrator/src/lib.rs index 95302471d..4b7adeab6 100644 --- a/crates/terraphim_orchestrator/src/lib.rs +++ b/crates/terraphim_orchestrator/src/lib.rs @@ -274,7 +274,8 @@ pub struct AgentOrchestrator { active_flows: HashMap>, /// Active compound review execution (spawned in background to avoid /// blocking reconcile_tick). None when no compound review is running. - active_compound_review: Option>>, + active_compound_review: + Option>>, /// Per-project mention cursors, keyed by project id. /// /// Each project gets its own cursor so repo-wide polls can advance @@ -6208,14 +6209,12 @@ impl AgentOrchestrator { if elapsed > std::time::Duration::from_secs(5) { warn!( tick = self.tick_count, - elapsed_ms, - "reconcile_tick SLOW: took > 5s, likely blocking agent polling" + elapsed_ms, "reconcile_tick SLOW: took > 5s, likely blocking agent polling" ); } else { info!( tick = self.tick_count, - elapsed_ms, - "reconcile_tick complete" + elapsed_ms, "reconcile_tick complete" ); } } @@ -8036,9 +8035,7 @@ Remove the pause flag once the underlying failure is resolved:\n\n\ let git_ref = "HEAD".to_string(); let base_ref = self.config.compound_review.base_branch.clone(); let workflow = self.compound_workflow.clone(); - let handle = tokio::spawn(async move { - workflow.run(&git_ref, &base_ref).await - }); + let handle = tokio::spawn(async move { workflow.run(&git_ref, &base_ref).await }); self.active_compound_review = Some(handle); } ScheduleEvent::Flow(flow_def) => { From 113ced57583b03d2be631997c65d55699b1b9120 Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 06:35:06 +0200 Subject: [PATCH 07/13] fix(clippy): flatten manual_flatten in server_http_error test Fixes #2133 Replace `for stream in incoming() { if let Ok(s) = stream { ... } }` with `for s in incoming().flatten() { ... }` to satisfy clippy::manual_flatten. Co-Authored-By: Claude Sonnet 4.6 --- crates/terraphim_agent/tests/exit_codes.rs | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/crates/terraphim_agent/tests/exit_codes.rs b/crates/terraphim_agent/tests/exit_codes.rs index 2ffd279e2..4a6be26b6 100644 --- a/crates/terraphim_agent/tests/exit_codes.rs +++ b/crates/terraphim_agent/tests/exit_codes.rs @@ -167,14 +167,12 @@ fn server_http_error_exits_1() { let port = listener.local_addr().unwrap().port(); std::thread::spawn(move || { - for stream in listener.incoming() { - if let Ok(mut s) = stream { - let mut buf = [0u8; 2048]; - let _ = std::io::Read::read(&mut s, &mut buf); - let _ = s.write_all( - b"HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", - ); - } + for mut s in listener.incoming().flatten() { + let mut buf = [0u8; 2048]; + let _ = std::io::Read::read(&mut s, &mut buf); + let _ = s.write_all( + b"HTTP/1.1 400 Bad Request\r\nContent-Length: 0\r\nConnection: close\r\n\r\n", + ); } }); From c5bf286d7cd73d340090d9893a1ceefa2955aeb3 Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 09:23:32 +0200 Subject: [PATCH 08/13] feat(pr-reviewer): agent work [auto-commit] --- crates/terraphim_agent/src/client.rs | 166 +++--------------- .../src/commands/modes/firecracker.rs | 9 +- crates/terraphim_agent/tests/unit_test.rs | 165 ----------------- crates/terraphim_agent/tests/vm_api_tests.rs | 1 + .../tests/vm_functionality_tests.rs | 1 + 5 files changed, 29 insertions(+), 313 deletions(-) diff --git a/crates/terraphim_agent/src/client.rs b/crates/terraphim_agent/src/client.rs index 5568aabdb..db18257ba 100644 --- a/crates/terraphim_agent/src/client.rs +++ b/crates/terraphim_agent/src/client.rs @@ -28,7 +28,6 @@ impl ApiClient { } } - #[allow(dead_code)] pub async fn health(&self) -> Result<()> { let url = format!("{}/health", self.base); let res = self.http.get(url).send().await?; @@ -94,7 +93,6 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] pub async fn get_rolegraph_edges(&self, role: Option<&str>) -> Result { self.rolegraph(role).await } @@ -211,68 +209,24 @@ pub struct AutocompleteResponse { pub suggestions: Vec, } -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct AsyncSummarizeResponse { - pub status: String, - pub task_id: String, - pub message: Option, - pub error: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct TaskStatusResponse { - pub status: String, - pub task_id: String, - pub state: String, // "pending", "processing", "completed", "failed", "cancelled" - pub progress: Option, - pub result: Option, - pub error: Option, - pub created_at: Option, - pub updated_at: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct QueueStatsResponse { - pub status: String, - pub pending_tasks: usize, - pub processing_tasks: usize, - pub completed_tasks: usize, - pub failed_tasks: usize, - pub total_tasks: usize, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct BatchSummarizeRequest { - pub documents: Vec, - pub role: Option, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -pub struct BatchSummarizeResponse { - pub status: String, - pub task_ids: Vec, - pub message: Option, - pub error: Option, -} - // VM Management Types +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmWithIp { pub vm_id: String, pub ip_address: String, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmPoolListResponse { pub vms: Vec, pub stats: VmPoolStatsResponse, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmPoolStatsResponse { pub total_ips: usize, pub allocated_ips: usize, @@ -280,8 +234,8 @@ pub struct VmPoolStatsResponse { pub utilization_percent: u8, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmStatusResponse { pub vm_id: String, pub status: String, @@ -290,8 +244,8 @@ pub struct VmStatusResponse { pub updated_at: Option, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmExecuteRequest { pub code: String, pub language: String, @@ -300,8 +254,8 @@ pub struct VmExecuteRequest { pub timeout_ms: Option, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmExecuteResponse { pub execution_id: String, pub vm_id: String, @@ -314,8 +268,8 @@ pub struct VmExecuteResponse { pub error: Option, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmTask { pub id: String, pub vm_id: String, @@ -324,29 +278,29 @@ pub struct VmTask { pub updated_at: Option, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmTasksResponse { pub tasks: Vec, pub vm_id: String, pub total: usize, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmAllocateRequest { pub vm_id: String, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmAllocateResponse { pub vm_id: String, pub ip_address: String, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmMetricsResponse { pub vm_id: String, pub status: String, @@ -359,8 +313,8 @@ pub struct VmMetricsResponse { pub updated_at: Option, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmAgentRequest { pub agent_id: String, pub task: String, @@ -368,8 +322,8 @@ pub struct VmAgentRequest { pub timeout_ms: Option, } +#[cfg(feature = "firecracker")] #[derive(Debug, Serialize, Deserialize, Clone)] -#[allow(dead_code)] pub struct VmAgentResponse { pub task_id: String, pub agent_id: String, @@ -445,79 +399,9 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] - pub async fn async_summarize_document( - &self, - document: &Document, - role: Option<&str>, - ) -> Result { - let url = format!("{}/documents/async_summarize", self.base); - let req = SummarizeRequest { - document: document.clone(), - role: role.map(|r| r.to_string()), - }; - let res = self.http.post(url).json(&req).send().await?; - let body = res - .error_for_status()? - .json::() - .await?; - Ok(body) - } - - #[allow(dead_code)] - pub async fn get_task_status(&self, task_id: &str) -> Result { - let url = format!( - "{}/summarization/task/{}/status", - self.base, - urlencoding::encode(task_id) - ); - let res = self.http.get(url).send().await?; - let body = res.error_for_status()?.json::().await?; - Ok(body) - } - - #[allow(dead_code)] - pub async fn cancel_task(&self, task_id: &str) -> Result { - let url = format!( - "{}/summarization/task/{}/cancel", - self.base, - urlencoding::encode(task_id) - ); - let res = self.http.post(url).send().await?; - let body = res.error_for_status()?.json::().await?; - Ok(body) - } - - #[allow(dead_code)] - pub async fn get_queue_stats(&self) -> Result { - let url = format!("{}/summarization/queue/stats", self.base); - let res = self.http.get(url).send().await?; - let body = res.error_for_status()?.json::().await?; - Ok(body) - } - - #[allow(dead_code)] - pub async fn batch_summarize_documents( - &self, - documents: &[Document], - role: Option<&str>, - ) -> Result { - let url = format!("{}/summarization/batch", self.base); - let req = BatchSummarizeRequest { - documents: documents.to_vec(), - role: role.map(|r| r.to_string()), - }; - let res = self.http.post(url).json(&req).send().await?; - let body = res - .error_for_status()? - .json::() - .await?; - Ok(body) - } - // VM Management APIs - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn list_vms(&self) -> Result { let url = format!("{}/api/vm-pool", self.base); let res = self.http.get(url).send().await?; @@ -525,7 +409,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn get_vm_pool_stats(&self) -> Result { let url = format!("{}/api/vm-pool/stats", self.base); let res = self.http.get(url).send().await?; @@ -536,7 +420,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn get_vm_status(&self, vm_id: &str) -> Result { let url = format!("{}/api/vms/{}", self.base, urlencoding::encode(vm_id)); let res = self.http.get(url).send().await?; @@ -544,7 +428,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn execute_vm_code( &self, code: &str, @@ -557,14 +441,14 @@ impl ApiClient { language: language.to_string(), agent_id: "tui-user".to_string(), vm_id: vm_id.map(|s| s.to_string()), - timeout_ms: Some(30000), // 30 second default timeout + timeout_ms: Some(30000), }; let res = self.http.post(url).json(&req).send().await?; let body = res.error_for_status()?.json::().await?; Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn list_vm_tasks(&self, vm_id: &str) -> Result { let url = format!("{}/api/vms/{}/tasks", self.base, urlencoding::encode(vm_id)); let res = self.http.get(url).send().await?; @@ -572,7 +456,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn allocate_vm_ip(&self, vm_id: &str) -> Result { let url = format!("{}/api/vm-pool/allocate", self.base); let req = VmAllocateRequest { @@ -583,7 +467,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn release_vm_ip(&self, vm_id: &str) -> Result<()> { let url = format!( "{}/api/vm-pool/release/{}", @@ -595,7 +479,7 @@ impl ApiClient { Ok(()) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn get_vm_metrics(&self, vm_id: &str) -> Result { let url = format!( "{}/api/vms/{}/metrics", @@ -607,7 +491,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn get_all_vm_metrics(&self) -> Result> { let url = format!("{}/api/vms/metrics", self.base); let res = self.http.get(url).send().await?; @@ -618,7 +502,7 @@ impl ApiClient { Ok(body) } - #[allow(dead_code)] + #[cfg(feature = "firecracker")] pub async fn execute_agent_task( &self, agent_id: &str, @@ -630,7 +514,7 @@ impl ApiClient { agent_id: agent_id.to_string(), task: task.to_string(), vm_id: vm_id.map(|s| s.to_string()), - timeout_ms: Some(60000), // 60 second default timeout for agent tasks + timeout_ms: Some(60000), }; let res = self.http.post(url).json(&req).send().await?; let body = res.error_for_status()?.json::().await?; diff --git a/crates/terraphim_agent/src/commands/modes/firecracker.rs b/crates/terraphim_agent/src/commands/modes/firecracker.rs index 57941fd84..4598ea5df 100644 --- a/crates/terraphim_agent/src/commands/modes/firecracker.rs +++ b/crates/terraphim_agent/src/commands/modes/firecracker.rs @@ -135,7 +135,7 @@ impl FirecrackerExecutor { stdout: response.stdout.clone(), stderr: response.stderr.clone(), duration_ms, - resource_usage: Some(self.calculate_resource_usage(&response)), + resource_usage: Some(self.calculate_resource_usage()), }) } @@ -158,12 +158,7 @@ impl FirecrackerExecutor { } /// Calculate resource usage from VM response - fn calculate_resource_usage( - &self, - _response: &crate::client::VmExecuteResponse, - ) -> ResourceUsage { - // This would be enhanced in a real implementation - // For now, return default values + fn calculate_resource_usage(&self) -> ResourceUsage { default_resource_usage() } diff --git a/crates/terraphim_agent/tests/unit_test.rs b/crates/terraphim_agent/tests/unit_test.rs index fc9d76705..1b5d8b200 100644 --- a/crates/terraphim_agent/tests/unit_test.rs +++ b/crates/terraphim_agent/tests/unit_test.rs @@ -329,171 +329,6 @@ fn test_rolegraph_response_deserialization() { assert_eq!(edge.rank, 50); } -/// Test TaskStatusResponse deserialization with different states -#[test] -fn test_task_status_response_deserialization() { - let test_cases = vec![ - ( - r#"{ - "status": "success", - "task_id": "task-123", - "state": "pending", - "progress": null, - "result": null, - "error": null, - "created_at": "2023-01-01T00:00:00Z", - "updated_at": "2023-01-01T00:00:00Z" - }"#, - "pending", - ), - ( - r#"{ - "status": "success", - "task_id": "task-456", - "state": "processing", - "progress": 0.5, - "result": null, - "error": null, - "created_at": "2023-01-01T00:00:00Z", - "updated_at": "2023-01-01T00:01:00Z" - }"#, - "processing", - ), - ( - r#"{ - "status": "success", - "task_id": "task-789", - "state": "completed", - "progress": 1.0, - "result": "Task completed successfully", - "error": null, - "created_at": "2023-01-01T00:00:00Z", - "updated_at": "2023-01-01T00:05:00Z" - }"#, - "completed", - ), - ( - r#"{ - "status": "success", - "task_id": "task-000", - "state": "failed", - "progress": null, - "result": null, - "error": "Task failed due to error", - "created_at": "2023-01-01T00:00:00Z", - "updated_at": "2023-01-01T00:02:00Z" - }"#, - "failed", - ), - ]; - - for (json_response, expected_state) in test_cases { - let response: Result = serde_json::from_str(json_response); - assert!( - response.is_ok(), - "TaskStatusResponse should be deserializable for state {}", - expected_state - ); - - let task_response = response.unwrap(); - assert_eq!(task_response.status, "success"); - assert_eq!(task_response.state, expected_state); - assert!(task_response.task_id.starts_with("task-")); - } -} - -/// Test QueueStatsResponse deserialization -#[test] -fn test_queue_stats_response_deserialization() { - let json_response = r#"{ - "status": "success", - "pending_tasks": 5, - "processing_tasks": 2, - "completed_tasks": 100, - "failed_tasks": 3, - "total_tasks": 110 - }"#; - - let response: Result = serde_json::from_str(json_response); - assert!( - response.is_ok(), - "QueueStatsResponse should be deserializable" - ); - - let stats_response = response.unwrap(); - assert_eq!(stats_response.status, "success"); - assert_eq!(stats_response.pending_tasks, 5); - assert_eq!(stats_response.processing_tasks, 2); - assert_eq!(stats_response.completed_tasks, 100); - assert_eq!(stats_response.failed_tasks, 3); - assert_eq!(stats_response.total_tasks, 110); - - // Verify totals add up correctly - let sum = stats_response.pending_tasks - + stats_response.processing_tasks - + stats_response.completed_tasks - + stats_response.failed_tasks; - assert_eq!(sum, stats_response.total_tasks); -} - -/// Test BatchSummarizeRequest serialization -#[test] -fn test_batch_summarize_request_serialization() { - let documents = vec![ - Document { - id: "doc1".to_string(), - title: "Document 1".to_string(), - body: "Content 1".to_string(), - url: "".to_string(), - description: None, - summarization: None, - stub: None, - tags: None, - rank: None, - source_haystack: None, - doc_type: DocumentType::KgEntry, - synonyms: None, - route: None, - priority: None, - quality_score: None, - }, - Document { - id: "doc2".to_string(), - title: "Document 2".to_string(), - body: "Content 2".to_string(), - url: "".to_string(), - description: None, - summarization: None, - stub: None, - tags: None, - rank: None, - source_haystack: None, - doc_type: DocumentType::KgEntry, - synonyms: None, - route: None, - priority: None, - quality_score: None, - }, - ]; - - let batch_request = BatchSummarizeRequest { - documents: documents.clone(), - role: Some("TestRole".to_string()), - }; - - let json_result = serde_json::to_string(&batch_request); - assert!( - json_result.is_ok(), - "BatchSummarizeRequest should be serializable" - ); - - let json_str = json_result.unwrap(); - assert!(json_str.contains("doc1")); - assert!(json_str.contains("doc2")); - assert!(json_str.contains("Document 1")); - assert!(json_str.contains("Document 2")); - assert!(json_str.contains("TestRole")); -} /// Test error response handling #[test] diff --git a/crates/terraphim_agent/tests/vm_api_tests.rs b/crates/terraphim_agent/tests/vm_api_tests.rs index d7442f981..117bdece5 100644 --- a/crates/terraphim_agent/tests/vm_api_tests.rs +++ b/crates/terraphim_agent/tests/vm_api_tests.rs @@ -1,3 +1,4 @@ +#![cfg(feature = "firecracker")] use terraphim_agent::client::*; /// Test VM-related API types serialization diff --git a/crates/terraphim_agent/tests/vm_functionality_tests.rs b/crates/terraphim_agent/tests/vm_functionality_tests.rs index deb57a32c..46e9dbbe4 100644 --- a/crates/terraphim_agent/tests/vm_functionality_tests.rs +++ b/crates/terraphim_agent/tests/vm_functionality_tests.rs @@ -1,3 +1,4 @@ +#![cfg(feature = "firecracker")] use terraphim_agent::client::*; /// Test VM command parsing with feature gates From d13f139c228651c5dc0dbe0391d3be409a988453 Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 10:02:47 +0200 Subject: [PATCH 09/13] fix(agent): gate firecracker module and ApiClient VM methods with cfg(feature = "firecracker") firecracker.rs called get_vm_status() and execute_vm_code() from ApiClient which are #[cfg(feature = "firecracker")] but the module was compiled unconditionally. This broke cargo test -p terraphim_agent with default features. Fix: add #[cfg(feature = "firecracker")] to pub mod firecracker in modes/mod.rs and update HybridExecutor to conditionally use FirecrackerExecutor only when the feature is enabled, falling back to LocalExecutor otherwise. Refs #2164 Co-Authored-By: Claude Sonnet 4.6 --- .../src/commands/modes/hybrid.rs | 96 +++++++++++++++---- .../terraphim_agent/src/commands/modes/mod.rs | 5 + 2 files changed, 82 insertions(+), 19 deletions(-) diff --git a/crates/terraphim_agent/src/commands/modes/hybrid.rs b/crates/terraphim_agent/src/commands/modes/hybrid.rs index e2da1009a..1181f3ed8 100644 --- a/crates/terraphim_agent/src/commands/modes/hybrid.rs +++ b/crates/terraphim_agent/src/commands/modes/hybrid.rs @@ -5,8 +5,10 @@ use super::{ CommandDefinition, CommandExecutionError, CommandExecutionResult, ExecutionMode, - ExecutorCapabilities, FirecrackerExecutor, LocalExecutor, + ExecutorCapabilities, LocalExecutor, }; +#[cfg(feature = "firecracker")] +use super::FirecrackerExecutor; use crate::commands::RiskLevel; use std::collections::HashMap; @@ -15,6 +17,7 @@ pub struct HybridExecutor { /// Local executor for safe commands local_executor: LocalExecutor, /// Firecracker executor for isolated execution + #[cfg(feature = "firecracker")] firecracker_executor: FirecrackerExecutor, /// Risk assessment settings risk_settings: RiskAssessmentSettings, @@ -149,6 +152,7 @@ impl HybridExecutor { pub fn new() -> Self { Self { local_executor: LocalExecutor::new(), + #[cfg(feature = "firecracker")] firecracker_executor: FirecrackerExecutor::new(), risk_settings: RiskAssessmentSettings::default(), } @@ -158,11 +162,13 @@ impl HybridExecutor { pub fn with_settings(risk_settings: RiskAssessmentSettings) -> Self { Self { local_executor: LocalExecutor::new(), + #[cfg(feature = "firecracker")] firecracker_executor: FirecrackerExecutor::new(), risk_settings, } } + #[cfg(feature = "firecracker")] /// Create a hybrid executor with API client for VM operations pub fn with_api_client(api_client: crate::client::ApiClient) -> Self { Self { @@ -190,7 +196,10 @@ impl HybridExecutor { } } ExecutionMode::Firecracker => { + #[cfg(feature = "firecracker")] return ExecutionMode::Firecracker; + #[cfg(not(feature = "firecracker"))] + return ExecutionMode::Local; } ExecutionMode::Hybrid => { // Perform risk assessment @@ -211,15 +220,24 @@ impl HybridExecutor { // Check command risk level match definition.risk_level { RiskLevel::Critical | RiskLevel::High => { + #[cfg(feature = "firecracker")] return ExecutionMode::Firecracker; + #[cfg(not(feature = "firecracker"))] + return ExecutionMode::Local; } RiskLevel::Medium => { // Medium risk: check other factors if self.has_high_risk_indicators(command_str) { + #[cfg(feature = "firecracker")] return ExecutionMode::Firecracker; + #[cfg(not(feature = "firecracker"))] + return ExecutionMode::Local; } if definition.resource_limits.is_some() { + #[cfg(feature = "firecracker")] return ExecutionMode::Firecracker; + #[cfg(not(feature = "firecracker"))] + return ExecutionMode::Local; } } RiskLevel::Low => { @@ -230,8 +248,11 @@ impl HybridExecutor { } } - // Default to Firecracker for safety - ExecutionMode::Firecracker + // Default to Firecracker for safety when feature enabled, otherwise Local + #[cfg(feature = "firecracker")] + return ExecutionMode::Firecracker; + #[cfg(not(feature = "firecracker"))] + ExecutionMode::Local } /// Check if command is safe for local execution @@ -407,21 +428,24 @@ impl super::CommandExecutor for HybridExecutor { // Execute with the appropriate executor match execution_mode { - ExecutionMode::Local => { + ExecutionMode::Local | ExecutionMode::Hybrid => { self.local_executor .execute_command(definition, parameters) .await } ExecutionMode::Firecracker => { - self.firecracker_executor - .execute_command(definition, parameters) - .await - } - ExecutionMode::Hybrid => { - // This shouldn't happen with proper risk assessment, but handle it - self.local_executor - .execute_command(definition, parameters) - .await + #[cfg(feature = "firecracker")] + { + self.firecracker_executor + .execute_command(definition, parameters) + .await + } + #[cfg(not(feature = "firecracker"))] + { + self.local_executor + .execute_command(definition, parameters) + .await + } } } } @@ -430,25 +454,40 @@ impl super::CommandExecutor for HybridExecutor { // Hybrid executor supports all modes by delegating to appropriate executors match mode { ExecutionMode::Local => self.local_executor.supports_mode(mode), - ExecutionMode::Firecracker => self.firecracker_executor.supports_mode(mode), - ExecutionMode::Hybrid => true, // Hybrid mode is what this executor provides + ExecutionMode::Firecracker => { + #[cfg(feature = "firecracker")] + return self.firecracker_executor.supports_mode(mode); + #[cfg(not(feature = "firecracker"))] + false + } + ExecutionMode::Hybrid => true, } } fn capabilities(&self) -> ExecutorCapabilities { - // Combine capabilities from both executors let local_caps = self.local_executor.capabilities(); + #[cfg(feature = "firecracker")] let vm_caps = self.firecracker_executor.capabilities(); - ExecutorCapabilities { - supports_resource_limits: vm_caps.supports_resource_limits, // VMs have better resource limiting + #[cfg(feature = "firecracker")] + return ExecutorCapabilities { + supports_resource_limits: vm_caps.supports_resource_limits, supports_network_access: vm_caps.supports_network_access, supports_file_system: local_caps.supports_file_system || vm_caps.supports_file_system, max_concurrent_commands: Some( local_caps.max_concurrent_commands.unwrap_or(0) + vm_caps.max_concurrent_commands.unwrap_or(0), ), - default_timeout: vm_caps.default_timeout, // Use VM timeout as default for safety + default_timeout: vm_caps.default_timeout, + }; + + #[cfg(not(feature = "firecracker"))] + ExecutorCapabilities { + supports_resource_limits: local_caps.supports_resource_limits, + supports_network_access: local_caps.supports_network_access, + supports_file_system: local_caps.supports_file_system, + max_concurrent_commands: local_caps.max_concurrent_commands, + default_timeout: local_caps.default_timeout, } } } @@ -493,6 +532,7 @@ mod tests { } #[test] + #[cfg(feature = "firecracker")] fn test_risk_assessment_high_risk_commands() { let hybrid = HybridExecutor::new(); @@ -508,6 +548,24 @@ mod tests { assert_eq!(mode, ExecutionMode::Firecracker); } + #[test] + #[cfg(not(feature = "firecracker"))] + fn test_risk_assessment_high_risk_commands_falls_back_to_local() { + let hybrid = HybridExecutor::new(); + + let risky_definition = CommandDefinition { + name: "dangerous".to_string(), + description: "Dangerous command".to_string(), + risk_level: RiskLevel::High, + execution_mode: ExecutionMode::Hybrid, + ..Default::default() + }; + + // Without firecracker feature, high-risk commands fall back to Local + let mode = hybrid.assess_command_risk("rm -rf /", &risky_definition); + assert_eq!(mode, ExecutionMode::Local); + } + #[test] fn test_dangerous_argument_detection() { let hybrid = HybridExecutor::new(); diff --git a/crates/terraphim_agent/src/commands/modes/mod.rs b/crates/terraphim_agent/src/commands/modes/mod.rs index f1f0f1179..0cbd78d2e 100644 --- a/crates/terraphim_agent/src/commands/modes/mod.rs +++ b/crates/terraphim_agent/src/commands/modes/mod.rs @@ -5,10 +5,12 @@ //! - Firecracker: Isolated execution in microVMs //! - Hybrid: Smart mode selection based on risk assessment +#[cfg(feature = "firecracker")] pub mod firecracker; pub mod hybrid; pub mod local; +#[cfg(feature = "firecracker")] pub use firecracker::FirecrackerExecutor; pub use hybrid::HybridExecutor; pub use local::LocalExecutor; @@ -53,7 +55,10 @@ pub struct ExecutorCapabilities { pub fn create_executor(mode: ExecutionMode) -> Box { match mode { ExecutionMode::Local => Box::new(LocalExecutor::new()), + #[cfg(feature = "firecracker")] ExecutionMode::Firecracker => Box::new(FirecrackerExecutor::new()), + #[cfg(not(feature = "firecracker"))] + ExecutionMode::Firecracker => Box::new(LocalExecutor::new()), ExecutionMode::Hybrid => Box::new(HybridExecutor::new()), } } From 0ee52a5c28f4e1f8c9962e41dc4ac8843313f9ec Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 11:59:24 +0200 Subject: [PATCH 10/13] docs: add item-level rustdoc to 6 zero-coverage crates; update CHANGELOG - haystack_atlassian: add /// to ConfluenceClient, JiraClient, AtlassianClient - haystack_core: add /// to HaystackProvider trait and associated items - haystack_discourse: add /// to DiscourseClient and Post re-exports - terraphim_ccusage: add /// to all public types and CcusageClient methods - terraphim_kg_linter: add /// to all public structs, enums, and functions - terraphim_negative_contribution: add /// to re-exported public items - CHANGELOG: add firecracker gate, clippy manual_flatten, rolegraph serde defaults, and rustdoc coverage improvement entries Workspace doc coverage: 81% (was 30% at prior scan). Zero-coverage crates: 1. Refs #2136 --- CHANGELOG.md | 4 ++++ crates/haystack_atlassian/src/lib.rs | 12 +++++++++++- crates/haystack_core/src/lib.rs | 6 ++++++ crates/haystack_discourse/src/lib.rs | 2 ++ crates/terraphim_ccusage/src/lib.rs | 17 +++++++++++++++++ crates/terraphim_kg_linter/src/lib.rs | 14 ++++++++++++++ .../terraphim_negative_contribution/src/lib.rs | 2 ++ 7 files changed, 56 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 778cff227..7836be732 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- **Firecracker feature gate** `terraphim_agent` firecracker module and `ApiClient` VM methods now compiled only with `cfg(feature = "firecracker")`, preventing link errors in non-Firecracker builds (2026-06-04) +- **Clippy `manual_flatten`** `server_http_error` test flattened to satisfy the `manual_flatten` lint (Fixes #2133, 2026-06-04) +- **RoleGraph serde defaults** `serde(default)` added to `trigger_descriptions` and `pinned_node_ids` fields in `SerializableRoleGraph`, fixing round-trip deserialisation of configs that omit these fields (Refs #2039, 2026-06-04) - **Agent HTTP error classification** HTTP 4xx responses now classified as `ErrorGeneral` rather than `ErrorNetwork`; integration test `server_http_error_exits_1` added (Refs #1992, 2026-06-04) - **Redis security exposure** Docker Compose Redis service now binds to `127.0.0.1:6379` instead of `0.0.0.0:6379` to prevent unintended public exposure of the cache (Refs #1313, 2026-05-31) - **Nested `cargo run` in exit-code tests** replaced with `cargo_bin!` macro to avoid file-lock deadlock under concurrent `cargo test` (2026-06-01) @@ -62,6 +65,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Docs +- **Rustdoc coverage: zero-coverage crates resolved** item-level `///` doc comments added to all public items in `haystack_atlassian`, `haystack_core`, `haystack_discourse`, `terraphim_ccusage`, `terraphim_kg_linter`, and `terraphim_negative_contribution`; workspace coverage now 81 % (up from 30 % at previous scan on 2026-06-04, Refs #2136) - **`BUILD.md`** build instructions and troubleshooting guide - **`CONTRIBUTING.md`** contribution guidelines and code of conduct - **Architecture Decision Records (ADRs)** in `docs/architecture/` diff --git a/crates/haystack_atlassian/src/lib.rs b/crates/haystack_atlassian/src/lib.rs index 53d62a200..ed6cfcb21 100644 --- a/crates/haystack_atlassian/src/lib.rs +++ b/crates/haystack_atlassian/src/lib.rs @@ -9,19 +9,28 @@ use terraphim_types::{Document, SearchQuery}; pub mod confluence; pub mod jira; +/// HTTP client for searching Confluence spaces via CQL. pub struct ConfluenceClient { + /// Base URL of the Confluence instance, e.g. `https://example.atlassian.net`. pub base_url: String, + /// Atlassian account e-mail used for Basic auth. pub username: String, + /// Atlassian API token for Basic auth. pub token: String, } +/// HTTP client for searching Jira issues via JQL. pub struct JiraClient { + /// Base URL of the Jira instance. pub base_url: String, + /// Atlassian account e-mail used for Basic auth. pub username: String, + /// Atlassian API token for Basic auth. pub token: String, } impl ConfluenceClient { + /// Creates a new `ConfluenceClient` with the given credentials. pub fn new(base_url: String, username: String, token: String) -> Self { Self { base_url, @@ -32,6 +41,7 @@ impl ConfluenceClient { } impl JiraClient { + /// Creates a new `JiraClient` with the given credentials. pub fn new(base_url: String, username: String, token: String) -> Self { Self { base_url, @@ -141,7 +151,7 @@ impl HaystackProvider for JiraClient { } } -// Legacy client for backward compatibility +/// Legacy placeholder client. Prefer [`ConfluenceClient`] or [`JiraClient`]. pub struct AtlassianClient; impl HaystackProvider for AtlassianClient { diff --git a/crates/haystack_core/src/lib.rs b/crates/haystack_core/src/lib.rs index ef6c3c04a..c11ef8116 100644 --- a/crates/haystack_core/src/lib.rs +++ b/crates/haystack_core/src/lib.rs @@ -5,10 +5,16 @@ //! async search interface over heterogeneous backends. use terraphim_types::{Document, SearchQuery}; +/// Uniform async search interface for all Terraphim data-source integrations. +/// +/// Implement this trait to expose any backend (local filesystem, Confluence, +/// Discourse, JMAP, …) as a searchable haystack. pub trait HaystackProvider { + /// Error type returned by this provider's search operations. type Error: std::fmt::Display + std::fmt::Debug + Send + Sync + 'static; #[allow(async_fn_in_trait)] + /// Search the haystack and return matching documents. async fn search(&self, query: &SearchQuery) -> Result, Self::Error>; } diff --git a/crates/haystack_discourse/src/lib.rs b/crates/haystack_discourse/src/lib.rs index 6a2fc3582..3a5074ad2 100644 --- a/crates/haystack_discourse/src/lib.rs +++ b/crates/haystack_discourse/src/lib.rs @@ -5,5 +5,7 @@ mod client; mod models; +/// HTTP client for querying the Discourse search API and converting topics to [`terraphim_types::Document`]s. pub use client::DiscourseClient; +/// A Discourse post as returned by the search API. pub use models::Post; diff --git a/crates/terraphim_ccusage/src/lib.rs b/crates/terraphim_ccusage/src/lib.rs index 7bd4ebcb6..a227dd0ea 100644 --- a/crates/terraphim_ccusage/src/lib.rs +++ b/crates/terraphim_ccusage/src/lib.rs @@ -9,26 +9,33 @@ use std::path::PathBuf; use std::time::{Duration, Instant}; use thiserror::Error; +/// Errors that can occur during ccusage invocation or output parsing. #[derive(Error, Debug)] pub enum CcusageError { + /// No supported package runner (`bun`, `pnpm`, `yarn`, `npm`, `npx`) was found in `PATH`. #[error("No package runner found (bun, pnpm, yarn, npm, npx)")] NoRunner, + /// The package runner exited with a non-zero status. #[error("Runner execution failed: {0}")] RunnerFailed(String), + /// Failed to deserialise the JSON output from ccusage. #[error("Failed to parse ccusage output: {0}")] ParseError(String), + /// An I/O error occurred while spawning or reading the process. #[error("IO error: {0}")] IoError(#[from] std::io::Error), + /// The ccusage process did not complete within the allowed duration. #[error("Timeout after {0:?}")] Timeout(Duration), } pub type Result = std::result::Result; +/// Token and cost totals for a single calendar day. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DailyUsage { pub date: String, @@ -50,14 +57,18 @@ pub struct DailyUsage { pub cost_usd: Option, } +/// Collection of daily usage entries as returned by ccusage. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DailyUsageReport { pub daily: Vec, } +/// Selects which ccusage variant to invoke. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum CcusageProvider { + /// Use the `ccusage` package for Claude Code session data. Claude, + /// Use the `@ccusage/codex` package for Codex session data. Codex, } @@ -70,6 +81,7 @@ impl std::fmt::Display for CcusageProvider { } } +/// Thin wrapper around the ccusage CLI with an in-process TTL cache. pub struct CcusageClient { provider: CcusageProvider, home_path: Option, @@ -78,6 +90,7 @@ pub struct CcusageClient { } impl CcusageClient { + /// Creates a new client for the given provider with a 5-minute cache TTL. pub fn new(provider: CcusageProvider) -> Self { Self { provider, @@ -87,16 +100,19 @@ impl CcusageClient { } } + /// Overrides the home directory path forwarded to ccusage via `--home`. pub fn with_home_path(mut self, path: PathBuf) -> Self { self.home_path = Some(path); self } + /// Sets the TTL for cached query results. pub fn with_cache_ttl(mut self, ttl: Duration) -> Self { self.cache_ttl = ttl; self } + /// Runs ccusage for the given date range, returning cached results when available. pub fn query(&mut self, since: &str, until: Option<&str>) -> Result { let cache_key = format!("{}:{}:{}", self.provider, since, until.unwrap_or("now")); @@ -168,6 +184,7 @@ impl CcusageClient { Ok(report) } + /// Evicts all cached entries. pub fn clear_cache(&mut self) { self.cache.clear(); } diff --git a/crates/terraphim_kg_linter/src/lib.rs b/crates/terraphim_kg_linter/src/lib.rs index ff566c359..157cdf45c 100644 --- a/crates/terraphim_kg_linter/src/lib.rs +++ b/crates/terraphim_kg_linter/src/lib.rs @@ -11,6 +11,7 @@ use serde::{Deserialize, Serialize}; use thiserror::Error; use walkdir::WalkDir; +/// Errors produced during KG markdown scanning or validation. #[derive(Error, Debug)] pub enum LintError { #[error("IO error: {0}")] @@ -31,6 +32,7 @@ pub enum LintError { pub type Result = std::result::Result; +/// A typed argument definition for a KG command. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CommandArg { pub name: String, @@ -46,6 +48,7 @@ pub struct CommandArg { pub pattern: Option, } +/// A permission reference embedded in a command definition. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CommandPermissionRef { pub can: String, @@ -53,6 +56,7 @@ pub struct CommandPermissionRef { pub on: Option, // resource or scope } +/// A KG command declaration parsed from a `kg-commands` fenced block. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CommandDef { pub name: String, @@ -64,12 +68,14 @@ pub struct CommandDef { pub permissions: Vec, } +/// Wrapper for a YAML map of type names to field definitions. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TypesBlock( #[serde(with = "serde_yaml::with::singleton_map_recursive")] pub BTreeMap>, ); +/// A single allow or deny rule within a role permission block. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct PermissionRule { pub action: String, @@ -79,6 +85,7 @@ pub struct PermissionRule { pub resource: Option, } +/// Permission set for a named role, containing allow and deny rules. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RolePermissions { pub name: String, @@ -88,6 +95,7 @@ pub struct RolePermissions { pub deny: Vec, } +/// Accumulated schema fragments collected from all scanned markdown files. #[derive(Debug, Clone, Default)] pub struct SchemaFragments { pub commands: Vec, @@ -95,6 +103,7 @@ pub struct SchemaFragments { pub roles: Vec, } +/// A single diagnostic issue found during linting. #[derive(Debug, Clone, Serialize)] pub struct LintIssue { pub path: PathBuf, @@ -103,6 +112,7 @@ pub struct LintIssue { pub message: String, } +/// Severity level of a lint issue. #[derive(Debug, Clone, Serialize)] #[serde(rename_all = "lowercase")] pub enum Severity { @@ -111,6 +121,7 @@ pub enum Severity { Info, } +/// Complete report produced by a lint run over a directory. #[derive(Debug, Clone, Serialize)] pub struct LintReport { pub scanned_files: usize, @@ -118,6 +129,7 @@ pub struct LintReport { pub stats: ReportStats, } +/// Aggregate statistics from a lint run. #[derive(Debug, Clone, Serialize, Default)] pub struct ReportStats { pub command_count: usize, @@ -147,6 +159,7 @@ fn parse_yaml Deserialize<'de>>(body: &str, path: &Path) -> Result Result { let contents = std::fs::read_to_string(path)?; let mut fragments = SchemaFragments::default(); @@ -320,6 +333,7 @@ async fn build_thesaurus_from_dir(name: &str, dir: &Path) -> Result Result { let mut issues = Vec::::new(); let mut fragments = SchemaFragments::default(); diff --git a/crates/terraphim_negative_contribution/src/lib.rs b/crates/terraphim_negative_contribution/src/lib.rs index a7bedf82f..dab1c118e 100644 --- a/crates/terraphim_negative_contribution/src/lib.rs +++ b/crates/terraphim_negative_contribution/src/lib.rs @@ -3,5 +3,7 @@ mod exclusion; mod scanner; +/// Returns `true` if the given file path should be excluded from negative-contribution scanning (e.g. test files, examples). pub use exclusion::is_non_production; +/// Scanner that walks Rust source files and reports Explicit Deferral Markers (EDMs) such as `todo!`, `unimplemented!`, and `#[allow(dead_code)]`. pub use scanner::NegativeContributionScanner; From 489b104abb45eec5682a8f8e91508fde1c4bf24c Mon Sep 17 00:00:00 2001 From: Test User Date: Thu, 4 Jun 2026 12:00:52 +0200 Subject: [PATCH 11/13] docs: add item-level rustdoc to discourse and negative-contribution submodules - haystack_discourse: add /// to DiscourseClient struct, new(), Post struct - terraphim_negative_contribution: add /// to NegativeContributionScanner and all pub methods (new, from_thesaurus, scan_file, scan_files, scan_to_output, thesaurus) Reduces zero-coverage crate count to 0. Refs #2136 --- crates/haystack_discourse/src/client.rs | 5 +++++ crates/haystack_discourse/src/models.rs | 1 + .../terraphim_negative_contribution/src/scanner.rs | 12 ++++++++++++ 3 files changed, 18 insertions(+) diff --git a/crates/haystack_discourse/src/client.rs b/crates/haystack_discourse/src/client.rs index c958ada3b..67a6d854a 100644 --- a/crates/haystack_discourse/src/client.rs +++ b/crates/haystack_discourse/src/client.rs @@ -4,6 +4,10 @@ use url::Url; use crate::models::{Post, PostDetailsResponse, SearchResponse}; +/// HTTP client for the Discourse search API. +/// +/// Authenticates via API key and converts search results into +/// [`terraphim_types::Document`]s for indexing. pub struct DiscourseClient { client: Client, base_url: Url, @@ -12,6 +16,7 @@ pub struct DiscourseClient { } impl DiscourseClient { + /// Creates a new `DiscourseClient` authenticated with the given API key and username. pub fn new(base_url: &str, api_key: &str, api_username: &str) -> Result { let base_url = Url::parse(base_url).context("Failed to parse base URL")?; println!("Initializing Discourse client for URL: {}", base_url); diff --git a/crates/haystack_discourse/src/models.rs b/crates/haystack_discourse/src/models.rs index b15ad1444..065f281ec 100644 --- a/crates/haystack_discourse/src/models.rs +++ b/crates/haystack_discourse/src/models.rs @@ -1,5 +1,6 @@ use serde::{Deserialize, Serialize}; +/// A Discourse forum post returned by the search API, normalised for indexing. #[derive(Debug, Deserialize, Serialize)] pub struct Post { pub id: u64, diff --git a/crates/terraphim_negative_contribution/src/scanner.rs b/crates/terraphim_negative_contribution/src/scanner.rs index 14c6321d3..1d922d9ab 100644 --- a/crates/terraphim_negative_contribution/src/scanner.rs +++ b/crates/terraphim_negative_contribution/src/scanner.rs @@ -10,21 +10,30 @@ const SUPPRESSION_MARKER: &str = "terraphim: allow(stub)"; const SCANNER_AGENT_NAME: &str = "edm-scanner"; #[derive(Debug, Clone)] +/// Scans Rust source files for Explicit Deferral Markers (EDMs). +/// +/// Uses an Aho-Corasick automaton built from the bundled `edm_tier1.json` +/// thesaurus to locate `todo!`, `unimplemented!`, `#[allow(dead_code)]`, +/// and similar markers. Non-production files (tests, examples, benches) are +/// skipped automatically. pub struct NegativeContributionScanner { thesaurus: Thesaurus, } impl NegativeContributionScanner { + /// Creates a scanner using the bundled EDM tier-1 thesaurus. pub fn new() -> Self { let thesaurus = load_thesaurus_from_json(DEFAULT_EDM_TIER1_JSON) .expect("Failed to load embedded edm_tier1.json"); Self { thesaurus } } + /// Creates a scanner with a custom thesaurus. pub fn from_thesaurus(thesaurus: Thesaurus) -> Self { Self { thesaurus } } + /// Scans a single file, returning one [`ReviewFinding`] per EDM match. pub fn scan_file(&self, path: &str, content: &str) -> Vec { if is_non_production(path, content) { return Vec::new(); @@ -68,6 +77,7 @@ impl NegativeContributionScanner { findings } + /// Scans a batch of `(path, content)` pairs, aggregating all findings. pub fn scan_files(&self, files: &[(String, String)]) -> Vec { files .iter() @@ -75,6 +85,7 @@ impl NegativeContributionScanner { .collect() } + /// Scans a batch of files and returns a [`ReviewAgentOutput`] ready for agent consumption. pub fn scan_to_output(&self, files: &[(String, String)]) -> ReviewAgentOutput { let findings = self.scan_files(files); let pass = findings.is_empty(); @@ -100,6 +111,7 @@ impl NegativeContributionScanner { } } + /// Returns a reference to the underlying EDM thesaurus. pub fn thesaurus(&self) -> &Thesaurus { &self.thesaurus } From 907b96436eaba0f478c0730a0f19ba3bd50c1141 Mon Sep 17 00:00:00 2001 From: Test User Date: Fri, 5 Jun 2026 02:29:09 +0200 Subject: [PATCH 12/13] fix(fmt): fix rustfmt ordering in hybrid.rs and blank line in unit_test.rs Fixes #2169 - Move #[cfg(feature = "firecracker")] use before other use items in hybrid.rs - Remove trailing blank line before test in unit_test.rs Co-Authored-By: Terraphim AI --- crates/terraphim_agent/src/commands/modes/hybrid.rs | 4 ++-- crates/terraphim_agent/tests/unit_test.rs | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/crates/terraphim_agent/src/commands/modes/hybrid.rs b/crates/terraphim_agent/src/commands/modes/hybrid.rs index 1181f3ed8..6294ebb0b 100644 --- a/crates/terraphim_agent/src/commands/modes/hybrid.rs +++ b/crates/terraphim_agent/src/commands/modes/hybrid.rs @@ -3,12 +3,12 @@ //! This module provides smart execution mode selection based on risk assessment, //! command type, and available infrastructure. +#[cfg(feature = "firecracker")] +use super::FirecrackerExecutor; use super::{ CommandDefinition, CommandExecutionError, CommandExecutionResult, ExecutionMode, ExecutorCapabilities, LocalExecutor, }; -#[cfg(feature = "firecracker")] -use super::FirecrackerExecutor; use crate::commands::RiskLevel; use std::collections::HashMap; diff --git a/crates/terraphim_agent/tests/unit_test.rs b/crates/terraphim_agent/tests/unit_test.rs index 1b5d8b200..4aacd4a9a 100644 --- a/crates/terraphim_agent/tests/unit_test.rs +++ b/crates/terraphim_agent/tests/unit_test.rs @@ -329,7 +329,6 @@ fn test_rolegraph_response_deserialization() { assert_eq!(edge.rank, 50); } - /// Test error response handling #[test] fn test_error_response_deserialization() { From 71eca5028e2382d378a84b29e77dee0cb872e781 Mon Sep 17 00:00:00 2001 From: Test User Date: Fri, 5 Jun 2026 03:00:58 +0200 Subject: [PATCH 13/13] docs: add item-level rustdoc to 7 worst-offender crates Refs #2137 248 doc comments added across 30 files: - terraphim_grep: 91 items (95% gap -> <5%) - terraphim-markdown-parser: 45 items (79% gap -> <5%) - terraphim_usage: 68 items (68% gap -> <5%) - terraphim_agent_messaging: 22 items - terraphim_merge_coordinator: 13 items - terraphim_file_search: 6 items - terraphim_middleware: 3 items All clippy/fmt checks pass on affected crates. Co-Authored-By: Terraphim AI --- CHANGELOG.md | 1 + crates/terraphim-markdown-parser/src/chunk.rs | 10 +++ .../terraphim-markdown-parser/src/heading.rs | 24 +++++++ crates/terraphim-markdown-parser/src/lib.rs | 12 ++++ .../terraphim_agent_messaging/src/delivery.rs | 20 ++++++ .../terraphim_agent_messaging/src/mailbox.rs | 11 ++++ .../terraphim_agent_messaging/src/message.rs | 8 +++ .../terraphim_agent_messaging/src/router.rs | 9 +++ .../src/result_ranking.rs | 6 ++ crates/terraphim_grep/src/error.rs | 9 +++ crates/terraphim_grep/src/hybrid_searcher.rs | 34 ++++++++++ crates/terraphim_grep/src/kg_curation.rs | 7 ++ crates/terraphim_grep/src/lib.rs | 22 +++++++ crates/terraphim_grep/src/rlm_context.rs | 14 ++++ crates/terraphim_grep/src/signatures.rs | 24 +++++++ .../terraphim_grep/src/sufficiency_judge.rs | 13 ++++ .../src/evaluator.rs | 4 ++ .../terraphim_merge_coordinator/src/gitea.rs | 5 ++ .../terraphim_merge_coordinator/src/types.rs | 5 +- .../src/command/ripgrep.rs | 1 + .../terraphim_middleware/src/thesaurus/mod.rs | 1 + crates/terraphim_usage/src/cli.rs | 8 +++ crates/terraphim_usage/src/formatter.rs | 3 + crates/terraphim_usage/src/lib.rs | 66 ++++++++++++++++++- crates/terraphim_usage/src/pricing.rs | 7 ++ .../terraphim_usage/src/providers/ccusage.rs | 2 + .../terraphim_usage/src/providers/claude.rs | 3 + crates/terraphim_usage/src/providers/kimi.rs | 2 + .../terraphim_usage/src/providers/minimax.rs | 3 + .../src/providers/opencode_go.rs | 3 + crates/terraphim_usage/src/providers/zai.rs | 3 + 31 files changed, 336 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7836be732..eafc85d62 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - **terraphim_grep hybrid searcher** complete implementation: parallel KG + grep execution via `tokio::spawn`, CLI with thesaurus discovery, `Serialize` on `GrepResult`/`GrepStats`/`SufficiencyState` (Refs #1743, PR#1825, 2026-05-24) - **terraphim_merge_coordinator** minimal skeleton proving crate structure (Refs #1805, PR#1823, 2026-05-23) - **Config-error circuit-breaker** `ExitClass::ConfigError` quarantines agents after 3 consecutive config failures; `AgentDefinition.enabled` field; memory watchdog systemd units; `bigbox-sync.sh` (Refs #1817, PR#1822, 2026-05-23) +- **Rustdoc gaps resolved** doc comments added to public items in `terraphim_grep` (95% gap, 91 items), `terraphim-markdown-parser` (79% gap, 45 items), `terraphim_usage` (68% gap, 68 items), `terraphim_agent_messaging` (22 items), `terraphim_file_search`, `terraphim_merge_coordinator`, and `terraphim_middleware` -- 248 doc comments added across 30 files (Refs #2137, 2026-06-05) - **Rustdoc gaps resolved** doc comments added to all public items in `terraphim_types` (`LlmUsage`, `LlmResult`, `ModelPricing`, `ReviewFinding`, `ReviewAgentOutput`, `FindingSeverity`, `FindingCategory`, `DocumentType`, `MarkdownDirectives`, `Scorer`, `Query`, `Similarity`, `ScoreError`, and score sub-modules) -- 93 warnings eliminated (2026-05-30) - **terraphim_rlm CLI binary** with 6 commands (code, bash, query, context, snapshot, status) for stateless RLM execution (Refs #RLM-CLI, 2026-05-18) - **MCP server RLM integration** via process spawning — 6 new tools exposed without linking terraphim_rlm (avoids static init hang) (Refs #RLM-CLI, 2026-05-18) diff --git a/crates/terraphim-markdown-parser/src/chunk.rs b/crates/terraphim-markdown-parser/src/chunk.rs index 1042cc239..a2c295f07 100644 --- a/crates/terraphim-markdown-parser/src/chunk.rs +++ b/crates/terraphim-markdown-parser/src/chunk.rs @@ -3,15 +3,24 @@ use ulid::Ulid; use crate::NormalizedMarkdown; use crate::heading::{HeadingNode, HeadingTree, SectionType}; +/// Represents a semantically coherent chunk of content under a heading, ready for indexing. #[derive(Debug, Clone)] pub struct ContentChunk { + /// The composite identifier combining content source, section path, and first block ULID. pub chunk_id: String, + /// The identifier of the source document this chunk belongs to. pub content_id: String, + /// The ordered list of block ULIDs whose text makes up this chunk. pub block_ids: Vec, + /// The chapter number assigned by depth-first traversal order, if applicable. pub chapter_number: Option, + /// The dot-separated path of section indices, e.g. `"1.2.3"`. pub section_path: String, + /// The classified section type for this chunk (main body, sidebar, career, assessment). pub chunk_type: SectionType, + /// The plain text content of the chunk with block-id comments stripped. pub text: String, + /// The approximate token count based on whitespace-delimited words. pub token_count: u32, } @@ -19,6 +28,7 @@ struct ChunkState { chapter_counter: u8, } +/// Builds a flat list of [`ContentChunk`]s by traversing the heading tree depth-first. pub fn chunk_by_headings( content_id: &str, tree: &HeadingTree, diff --git a/crates/terraphim-markdown-parser/src/heading.rs b/crates/terraphim-markdown-parser/src/heading.rs index 48632914e..8af19efb0 100644 --- a/crates/terraphim-markdown-parser/src/heading.rs +++ b/crates/terraphim-markdown-parser/src/heading.rs @@ -15,11 +15,16 @@ pub enum MatchStrategy { Contains, } +/// Describes the semantic role of a heading section in a document. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub enum SectionType { + /// The primary body content of the section. Main, + /// A named sidebar or callout box, identified by its label. Sidebar(String), + /// A career-focused section (e.g. "Selling U" textbook sections). Career, + /// An assessment or review section (e.g. key takeaways, quizzes). Assessment, } @@ -34,19 +39,26 @@ impl std::fmt::Display for SectionType { } } +/// Represents a single rule that maps a heading title pattern to a [`SectionType`]. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SectionPattern { + /// The string pattern to match against a heading title. pub pattern: String, + /// The section type to assign when the pattern matches. pub section_type: SectionType, + /// The strategy used to compare the heading title against `pattern`. pub match_strategy: MatchStrategy, } +/// Represents an ordered collection of [`SectionPattern`] rules for classifying headings. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SectionConfig { + /// The ordered list of pattern rules; first match wins. pub rules: Vec, } impl SectionConfig { + /// Returns a default [`SectionConfig`] with common textbook section patterns pre-configured. pub fn textbook_default() -> Self { Self { rules: vec![ @@ -86,6 +98,7 @@ impl SectionConfig { } } + /// Classifies a heading `title` against the configured rules, returning the matched [`SectionType`]. pub fn classify(&self, title: &str) -> SectionType { let title_trimmed = title.trim(); for rule in &self.rules { @@ -107,21 +120,31 @@ impl Default for SectionConfig { } } +/// Represents a single node in the heading hierarchy tree. #[derive(Debug, Clone)] pub struct HeadingNode { + /// The heading depth level (1 for `#`, 2 for `##`, etc.). pub level: u8, + /// The plain-text title extracted from the heading. pub title: String, + /// The classified section type for this heading. pub section_type: SectionType, + /// The ULIDs of blocks whose content falls directly under this heading. pub blocks: Vec, + /// The child heading nodes nested beneath this heading. pub children: Vec, + /// The byte range in the source covering this heading and its content. pub byte_range: Range, } +/// Represents the full heading hierarchy extracted from a normalised markdown document. #[derive(Debug, Clone)] pub struct HeadingTree { + /// The top-level heading nodes (depth-1 headings or document roots). pub roots: Vec, } +/// Builds a [`HeadingTree`] from the AST embedded in a [`NormalizedMarkdown`] value. pub fn build_heading_tree( normalized: &NormalizedMarkdown, ) -> Result { @@ -134,6 +157,7 @@ pub fn build_heading_tree( Ok(tree) } +/// Classifies every node in the heading tree in-place using the provided [`SectionConfig`]. pub fn classify_sections(tree: &mut HeadingTree, config: &SectionConfig) { for root in &mut tree.roots { classify_node(root, config); diff --git a/crates/terraphim-markdown-parser/src/lib.rs b/crates/terraphim-markdown-parser/src/lib.rs index c2de7f310..e598fb8ed 100644 --- a/crates/terraphim-markdown-parser/src/lib.rs +++ b/crates/terraphim-markdown-parser/src/lib.rs @@ -21,6 +21,7 @@ pub use heading::{ build_heading_tree, classify_sections, }; +/// The prefix used inside HTML comments to identify Terraphim block-id anchors. pub const TERRAPHIM_BLOCK_ID_PREFIX: &str = "terraphim:block-id:"; /// Extract the first H1 heading from markdown content using AST parsing. @@ -69,15 +70,21 @@ pub(crate) fn collect_text_content(nodes: &[Node]) -> String { text } +/// Describes the structural kind of a Terraphim block. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BlockKind { + /// A standalone markdown paragraph block. Paragraph, + /// A markdown list-item block. ListItem, } +/// Represents a single annotated content block extracted from normalised markdown. #[derive(Debug, Clone, PartialEq, Eq)] pub struct Block { + /// The stable unique identifier assigned to this block. pub id: Ulid, + /// The structural kind of this block (paragraph or list item). pub kind: BlockKind, /// Byte span of the block in the markdown buffer. @@ -93,13 +100,18 @@ pub struct Block { pub id_span: Range, } +/// Represents markdown content that has been normalised with stable Terraphim block-id anchors. #[derive(Debug, Clone)] pub struct NormalizedMarkdown { + /// The normalised markdown source with all block-id comments inserted. pub markdown: String, + /// The ordered list of blocks extracted from the normalised source. pub blocks: Vec, + /// The parsed AST of the normalised markdown, if available. pub ast: Option, } +/// Describes errors that can occur during markdown parsing and normalisation. #[derive(Debug, Error)] pub enum MarkdownParserError { #[error("failed to parse markdown: {0}")] diff --git a/crates/terraphim_agent_messaging/src/delivery.rs b/crates/terraphim_agent_messaging/src/delivery.rs index 80a495829..f70329d46 100644 --- a/crates/terraphim_agent_messaging/src/delivery.rs +++ b/crates/terraphim_agent_messaging/src/delivery.rs @@ -37,19 +37,30 @@ pub enum DeliveryStatus { /// Delivery record for tracking message delivery #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DeliveryRecord { + /// The unique identifier of the tracked message. pub message_id: MessageId, + /// The sending agent, if known. pub from: Option, + /// The intended recipient agent. pub to: AgentPid, + /// The current delivery status. pub status: DeliveryStatus, + /// Number of delivery attempts made so far. pub attempts: u32, + /// The UTC timestamp when the record was created. pub created_at: DateTime, + /// The UTC timestamp of the most recent delivery attempt. pub last_attempt: Option>, + /// The UTC timestamp when the message was delivered. pub delivered_at: Option>, + /// The UTC timestamp when the message was acknowledged. pub acknowledged_at: Option>, + /// The error description from the most recent failed attempt. pub error_message: Option, } impl DeliveryRecord { + /// Creates a new pending delivery record for the given message. pub fn new(message_id: MessageId, from: Option, to: AgentPid) -> Self { Self { message_id, @@ -65,31 +76,37 @@ impl DeliveryRecord { } } + /// Transitions the record to `InTransit` and increments the attempt counter. pub fn mark_in_transit(&mut self) { self.status = DeliveryStatus::InTransit; self.attempts += 1; self.last_attempt = Some(Utc::now()); } + /// Transitions the record to `Delivered` and records the delivery timestamp. pub fn mark_delivered(&mut self) { self.status = DeliveryStatus::Delivered; self.delivered_at = Some(Utc::now()); } + /// Transitions the record to `Acknowledged` and records the acknowledgement timestamp. pub fn mark_acknowledged(&mut self) { self.status = DeliveryStatus::Acknowledged; self.acknowledged_at = Some(Utc::now()); } + /// Transitions the record to `Failed` and stores the error description. pub fn mark_failed(&mut self, error: String) { self.status = DeliveryStatus::Failed(error.clone()); self.error_message = Some(error); } + /// Transitions the record to `Expired`. pub fn mark_expired(&mut self) { self.status = DeliveryStatus::Expired; } + /// Returns `true` if the record is in a terminal state (acknowledged, failed, or expired). pub fn is_final_state(&self) -> bool { matches!( self.status, @@ -390,6 +407,7 @@ pub struct DeliveryStats { } impl DeliveryStats { + /// Returns the fraction of messages that were successfully acknowledged. pub fn success_rate(&self) -> f64 { if self.total_messages == 0 { 0.0 @@ -398,6 +416,7 @@ impl DeliveryStats { } } + /// Returns the fraction of messages that failed delivery. pub fn failure_rate(&self) -> f64 { if self.total_messages == 0 { 0.0 @@ -406,6 +425,7 @@ impl DeliveryStats { } } + /// Returns the average number of delivery attempts per message. pub fn average_attempts(&self) -> f64 { if self.total_messages == 0 { 0.0 diff --git a/crates/terraphim_agent_messaging/src/mailbox.rs b/crates/terraphim_agent_messaging/src/mailbox.rs index 134726059..d0249b534 100644 --- a/crates/terraphim_agent_messaging/src/mailbox.rs +++ b/crates/terraphim_agent_messaging/src/mailbox.rs @@ -37,17 +37,26 @@ impl Default for MailboxConfig { /// Mailbox statistics #[derive(Debug, Clone, Serialize, Deserialize)] pub struct MailboxStats { + /// The identifier of the agent owning this mailbox. pub agent_id: AgentPid, + /// Total number of messages received since creation. pub total_messages_received: u64, + /// Total number of messages processed since creation. pub total_messages_processed: u64, + /// Current number of messages waiting in the queue. pub current_queue_size: usize, + /// Peak queue depth observed since creation. pub max_queue_size_reached: usize, + /// Timestamp of the most recently received message. pub last_message_received: Option>, + /// Timestamp of the most recently processed message. pub last_message_processed: Option>, + /// Running average time taken to process each message. pub average_processing_time: std::time::Duration, } impl MailboxStats { + /// Creates zeroed statistics for the given agent. pub fn new(agent_id: AgentPid) -> Self { Self { agent_id, @@ -61,6 +70,7 @@ impl MailboxStats { } } + /// Records the receipt of a new message and updates queue-depth counters. pub fn record_message_received(&mut self) { self.total_messages_received += 1; self.current_queue_size += 1; @@ -68,6 +78,7 @@ impl MailboxStats { self.last_message_received = Some(Utc::now()); } + /// Records a processed message and updates the running average processing time. pub fn record_message_processed(&mut self, processing_time: std::time::Duration) { self.total_messages_processed += 1; self.current_queue_size = self.current_queue_size.saturating_sub(1); diff --git a/crates/terraphim_agent_messaging/src/message.rs b/crates/terraphim_agent_messaging/src/message.rs index 488cba7bc..f3d8399ce 100644 --- a/crates/terraphim_agent_messaging/src/message.rs +++ b/crates/terraphim_agent_messaging/src/message.rs @@ -17,10 +17,12 @@ use crate::AgentPid; pub struct MessageId(pub Uuid); impl MessageId { + /// Creates a new randomly-generated `MessageId`. pub fn new() -> Self { Self(Uuid::new_v4()) } + /// Returns the string representation of the message ID. pub fn as_str(&self) -> String { self.0.to_string() } @@ -295,13 +297,18 @@ impl MessageEnvelope { /// Typed message wrapper for type-safe messaging pub struct TypedMessage { + /// The unique identifier for this message. pub id: MessageId, + /// The sending agent, if known. pub from: Option, + /// The typed message payload. pub payload: T, + /// The UTC timestamp when this message was created. pub created_at: DateTime, } impl TypedMessage { + /// Creates a new `TypedMessage` wrapping the given payload. pub fn new(payload: T) -> Self { Self { id: MessageId::new(), @@ -311,6 +318,7 @@ impl TypedMessage { } } + /// Sets the sending agent for this message. pub fn with_from(mut self, from: AgentPid) -> Self { self.from = Some(from); self diff --git a/crates/terraphim_agent_messaging/src/router.rs b/crates/terraphim_agent_messaging/src/router.rs index 139b2e488..cf53f70f4 100644 --- a/crates/terraphim_agent_messaging/src/router.rs +++ b/crates/terraphim_agent_messaging/src/router.rs @@ -18,9 +18,13 @@ use crate::{ /// Message router configuration #[derive(Debug, Clone)] pub struct RouterConfig { + /// Delivery guarantee settings used for all routed messages. pub delivery_config: DeliveryConfig, + /// How often the background task checks for messages to retry. pub retry_interval: Duration, + /// Maximum number of in-flight deliveries processed concurrently. pub max_concurrent_deliveries: usize, + /// Whether to collect routing metrics. pub enable_metrics: bool, } @@ -38,10 +42,15 @@ impl Default for RouterConfig { /// Router statistics #[derive(Debug, Default, Clone)] pub struct RouterStats { + /// Total messages submitted to the router. pub messages_routed: u64, + /// Total messages successfully delivered. pub messages_delivered: u64, + /// Total messages that failed all delivery attempts. pub messages_failed: u64, + /// Number of currently registered agent routes. pub active_routes: usize, + /// Total retry attempts made by the background retry task. pub retry_attempts: u64, } diff --git a/crates/terraphim_file_search/src/result_ranking.rs b/crates/terraphim_file_search/src/result_ranking.rs index 16565f090..a47ba31df 100644 --- a/crates/terraphim_file_search/src/result_ranking.rs +++ b/crates/terraphim_file_search/src/result_ranking.rs @@ -8,21 +8,27 @@ pub const DEFAULT_RANKING_CANDIDATE_LIMIT: usize = 1000; /// Candidate metadata available immediately after fff-search candidate retrieval. pub struct FileRankCandidate<'a> { + /// The relative file path from the haystack root. pub relative_path: &'a str, + /// Optional document title extracted from the file. pub title: Option<&'a str>, + /// Optional document body text for content-based scoring. pub body: Option<&'a str>, } /// Scores file/search candidates for Terraphim-specific ranking. pub trait FileRanker { + /// Returns a ranking score for the given candidate; higher values rank first. fn score_candidate(&self, candidate: &FileRankCandidate<'_>) -> i32; } +/// Ranks file candidates using knowledge-graph path scoring. pub struct KgFileRanker<'a> { scorer: &'a KgPathScorer, } impl<'a> KgFileRanker<'a> { + /// Creates a new `KgFileRanker` backed by the given `KgPathScorer`. pub fn new(scorer: &'a KgPathScorer) -> Self { Self { scorer } } diff --git a/crates/terraphim_grep/src/error.rs b/crates/terraphim_grep/src/error.rs index 3480d2b55..8c91f1eb6 100644 --- a/crates/terraphim_grep/src/error.rs +++ b/crates/terraphim_grep/src/error.rs @@ -1,27 +1,36 @@ use std::time::Duration; +/// Describes error variants for the terraphim_grep crate. #[derive(Debug, thiserror::Error)] pub enum TerraphimGrepError { + /// The underlying search pipeline failed with the given message. #[error("search failed: {0}")] SearchFailed(String), + /// No LLM client was configured when synthesis was requested. #[error("LLM not configured: {0}")] LlmNotConfigured(String), + /// The search returned fewer results than the required minimum. #[error("insufficient results: {0}")] InsufficientResults(String), + /// Knowledge-graph curation via the RLM pipeline failed. #[error("KG curation failed: {0}")] KgCurationFailed(String), + /// The RLM execution step failed with the given message. #[error("RLM execution failed: {0}")] RlmFailed(String), + /// The operation exceeded the allowed duration. #[error("timeout after {0:?}")] Timeout(Duration), + /// A configuration value was missing or invalid. #[error("invalid configuration: {0}")] InvalidConfig(String), } +/// The standard `Result` type alias for this crate, using [`TerraphimGrepError`]. pub type Result = std::result::Result; diff --git a/crates/terraphim_grep/src/hybrid_searcher.rs b/crates/terraphim_grep/src/hybrid_searcher.rs index c6e56dee2..5cfaae3d1 100644 --- a/crates/terraphim_grep/src/hybrid_searcher.rs +++ b/crates/terraphim_grep/src/hybrid_searcher.rs @@ -4,12 +4,18 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use terraphim_types::Document; +/// Represents the options controlling a hybrid grep search invocation. #[derive(Debug, Clone)] pub struct GrepOptions { + /// The haystack scope to search (code, docs, or all). pub haystack: Haystack, + /// The number of context lines to include around each match. pub context_lines: usize, + /// The maximum number of result chunks to return. pub max_results: usize, + /// Forces RLM synthesis regardless of the sufficiency verdict. pub force_rlm: bool, + /// Requests that a synthesised answer be included in the result. pub include_answer: bool, } @@ -25,21 +31,32 @@ impl Default for GrepOptions { } } +/// Describes which haystack scope to search. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum Haystack { + /// Searches only source-code files. #[default] Code, + /// Searches only documentation files. Docs, + /// Searches both source-code and documentation files. All, } +/// Represents a single text chunk retrieved from the search pipeline. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RetrievedChunk { + /// The textual content of the matched chunk. pub content: String, + /// The file path or URL identifying the source of this chunk. pub source: String, + /// The first line number of the chunk within its source file. pub line_start: Option, + /// The last line number of the chunk within its source file. pub line_end: Option, + /// The relevance score assigned to this chunk after KG boosting. pub relevance_score: f64, + /// The haystack label indicating which pipeline produced this chunk. pub haystack: &'static str, } @@ -56,22 +73,32 @@ impl From for RetrievedChunk { } } +/// Represents a knowledge-graph concept matched during a search query. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct KgConcept { + /// The internal numeric identifier for this concept. pub id: u64, + /// The canonical name of the concept as stored in the knowledge graph. pub name: String, + /// An optional human-readable label to display instead of `name`. pub display_value: Option, + /// The relevance score assigned to this concept for the current query. pub score: f64, } +/// Represents the combined results from the KG and code/doc search pipelines. #[derive(Debug, Clone)] pub struct HybridResults { + /// Chunks retrieved from source-code search. pub code_results: Vec, + /// Chunks retrieved from documentation search. pub doc_results: Vec, + /// Knowledge-graph concepts matched for the query. pub kg_concepts: Vec, } impl HybridResults { + /// Returns a combined `Vec` of all code and doc chunks in insertion order. pub fn to_chunks(&self) -> Vec { let mut chunks = Vec::with_capacity(self.code_results.len() + self.doc_results.len()); chunks.extend(self.code_results.clone()); @@ -79,10 +106,12 @@ impl HybridResults { chunks } + /// Returns the total count of code results, doc results, and KG concepts combined. pub fn total_results(&self) -> usize { self.code_results.len() + self.doc_results.len() + self.kg_concepts.len() } + /// Returns `true` when all three result buckets are empty. pub fn is_empty(&self) -> bool { self.code_results.is_empty() && self.doc_results.is_empty() && self.kg_concepts.is_empty() } @@ -150,6 +179,7 @@ pub fn boost_chunks_with_kg( chunks } +/// Represents the hybrid searcher that runs KG and code-search pipelines concurrently. pub struct HybridSearcher { role_graph: Arc>, /// Kept alongside the rolegraph so KG-style boosting still works when no documents @@ -161,6 +191,7 @@ pub struct HybridSearcher { } impl HybridSearcher { + /// Builds a new `HybridSearcher` for the given role name and thesaurus. pub fn new( role_name: String, thesaurus: terraphim_types::Thesaurus, @@ -177,11 +208,13 @@ impl HybridSearcher { }) } + /// Sets the filesystem path used as the root for code-search operations. pub fn with_search_path(mut self, path: PathBuf) -> Self { self.search_path = path; self } + /// Executes the hybrid KG and code search for the given query and returns merged results. pub async fn search( &self, query: &str, @@ -365,6 +398,7 @@ impl HybridSearcher { } } + /// Sorts retrieved chunks by descending relevance score and returns them. pub fn fuse_and_rank(&self, mut results: Vec) -> Vec { results.sort_by(|a, b| { b.relevance_score diff --git a/crates/terraphim_grep/src/kg_curation.rs b/crates/terraphim_grep/src/kg_curation.rs index 95e66d156..9f0095f6c 100644 --- a/crates/terraphim_grep/src/kg_curation.rs +++ b/crates/terraphim_grep/src/kg_curation.rs @@ -8,6 +8,7 @@ use terraphim_service::llm::LlmClient; use crate::error::Result; use crate::signatures::NewConcept; +/// Represents the RLM-based knowledge-graph curation pipeline (requires the `llm` feature). #[cfg(feature = "llm")] pub struct KgCurationRlm { llm_client: Arc, @@ -16,6 +17,7 @@ pub struct KgCurationRlm { #[cfg(feature = "llm")] impl KgCurationRlm { + /// Builds a new `KgCurationRlm` backed by the supplied LLM client. pub fn new(llm_client: Arc) -> Self { Self { llm_client, @@ -23,11 +25,13 @@ impl KgCurationRlm { } } + /// Sets the filesystem path where extracted concept markdown files are persisted. pub fn with_kg_path(mut self, path: std::path::PathBuf) -> Self { self.kg_path = Some(path); self } + /// Extracts new KG concepts from the query and RLM answer, then persists them to disk. pub async fn extract_and_index( &self, query: &str, @@ -124,6 +128,7 @@ impl KgCurationRlm { } } +/// Represents a stub `KgCurationRlm` used when the `llm` feature is disabled. #[cfg(not(feature = "llm"))] pub struct KgCurationRlm; @@ -136,10 +141,12 @@ impl Default for KgCurationRlm { #[cfg(not(feature = "llm"))] impl KgCurationRlm { + /// Builds a new no-op `KgCurationRlm` stub when the `llm` feature is disabled. pub fn new() -> Self { Self } + /// Returns an error indicating that the `llm` feature is not enabled. pub async fn extract_and_index( &self, _query: &str, diff --git a/crates/terraphim_grep/src/lib.rs b/crates/terraphim_grep/src/lib.rs index 796d7176d..95ff7e62c 100644 --- a/crates/terraphim_grep/src/lib.rs +++ b/crates/terraphim_grep/src/lib.rs @@ -39,30 +39,46 @@ pub use rlm_context::RlmContext; pub use signatures::{AnswerWithCitations, Citation, Match, NewConcept, RlmSignature}; pub use sufficiency_judge::{HeuristicThresholds, Sufficiency, SufficiencyJudge}; +/// Represents the complete result of a hybrid grep-plus-KG search operation. #[derive(Debug, Clone, serde::Serialize)] pub struct GrepResult { + /// The ranked text chunks retrieved from the search pipeline. pub chunks: Vec, + /// The synthesised answer produced by the RLM, if synthesis was performed. pub answer: Option, + /// The knowledge-graph concepts matched during the search. pub concepts: Vec, + /// The sufficiency verdict reached after evaluating the search results. pub sufficiency: SufficiencyState, + /// Timing and count statistics for the completed search. pub stats: GrepStats, } +/// Describes the outcome state variants after sufficiency evaluation. #[derive(Debug, Clone, serde::Serialize)] pub enum SufficiencyState { + /// Results came from search alone; no RLM synthesis was attempted. SearchOnly, + /// The RLM synthesised an answer from the retrieved chunks. RlmSynthesis, + /// The RLM was invoked but the results were still deemed insufficient. RlmInsufficient, } +/// Represents timing and count statistics for a completed search operation. #[derive(Debug, Clone, serde::Serialize)] pub struct GrepStats { + /// The wall-clock time in milliseconds spent on the search phase. pub search_latency_ms: u64, + /// The wall-clock time in milliseconds spent on RLM synthesis, if performed. pub rlm_latency_ms: Option, + /// The number of chunks included in the final result. pub chunks_returned: usize, + /// The number of knowledge-graph concepts that matched the query. pub kg_hits: usize, } +/// Represents the top-level hybrid search orchestrator combining KG expansion and full-text grep. pub struct TerraphimGrep { hybrid_searcher: Arc, sufficiency_judge: Arc, @@ -73,6 +89,7 @@ pub struct TerraphimGrep { } impl TerraphimGrep { + /// Builds a new `TerraphimGrep` with the given hybrid searcher and sufficiency judge. #[cfg(feature = "llm")] pub fn new( hybrid_searcher: Arc, @@ -86,6 +103,7 @@ impl TerraphimGrep { } } + /// Builds a new `TerraphimGrep` with the given hybrid searcher and sufficiency judge. #[cfg(not(feature = "llm"))] pub fn new( hybrid_searcher: Arc, @@ -97,12 +115,14 @@ impl TerraphimGrep { } } + /// Attaches a KG curation RLM instance for automatic concept extraction after synthesis. #[cfg(feature = "llm")] pub fn with_kg_curation(mut self, kg_curation: Arc) -> Self { self.kg_curation = Some(kg_curation); self } + /// Attaches an LLM client used for RLM synthesis and concept extraction. #[cfg(feature = "llm")] pub fn with_llm_client( mut self, @@ -112,6 +132,7 @@ impl TerraphimGrep { self } + /// Executes a hybrid search for the given query and returns ranked results with optional synthesis. pub async fn search(&self, query: &str, options: GrepOptions) -> Result { let start = std::time::Instant::now(); @@ -315,6 +336,7 @@ impl TerraphimGrep { .await } + /// Returns a zeroed-out `GrepStats` snapshot for this instance. pub fn stats(&self) -> GrepStats { GrepStats { search_latency_ms: 0, diff --git a/crates/terraphim_grep/src/rlm_context.rs b/crates/terraphim_grep/src/rlm_context.rs index 401ffa0c7..4e3c351a2 100644 --- a/crates/terraphim_grep/src/rlm_context.rs +++ b/crates/terraphim_grep/src/rlm_context.rs @@ -2,21 +2,30 @@ use std::collections::HashMap; use super::hybrid_searcher::{KgConcept, RetrievedChunk}; +/// Represents the assembled context passed to the RLM for synthesis or concept extraction. #[derive(Debug, Clone)] pub struct RlmContext { + /// The original user query string. pub query: String, + /// The text chunks retrieved from the search pipeline. pub retrieved_chunks: Vec, + /// The knowledge-graph concepts matched for this query. pub kg_concepts: Vec, + /// Per-source metadata keyed by the source path or URL. pub source_metadata: HashMap, } +/// Represents metadata describing the origin and recency of a retrieved document. #[derive(Debug, Clone)] pub struct DocumentMetadata { + /// The haystack type label (e.g. `"code"` or `"docs"`) for this source. pub source_type: String, + /// The last-modified timestamp of the source document, if available. pub last_modified: Option, } impl RlmContext { + /// Builds a new empty `RlmContext` for the given query string. pub fn new(query: String) -> Self { Self { query, @@ -26,6 +35,7 @@ impl RlmContext { } } + /// Attaches the retrieved chunks and populates source metadata from them. pub fn with_chunks(mut self, chunks: Vec) -> Self { self.retrieved_chunks = chunks; for chunk in &self.retrieved_chunks { @@ -40,11 +50,13 @@ impl RlmContext { self } + /// Attaches the knowledge-graph concepts matched for this context. pub fn with_concepts(mut self, concepts: Vec) -> Self { self.kg_concepts = concepts; self } + /// Builds the formatted prompt string from the query, chunks, and KG concepts. pub fn build_prompt(&self) -> String { let mut prompt = format!("Query: {}\n\n", self.query); @@ -75,10 +87,12 @@ impl RlmContext { prompt } + /// Returns the character length of the rendered prompt for this context. pub fn context_length(&self) -> usize { self.build_prompt().len() } + /// Truncates the retrieved chunks so the rendered prompt fits within `max_chars`. pub fn truncate(&mut self, max_chars: usize) { if self.context_length() > max_chars { let mut remaining = max_chars; diff --git a/crates/terraphim_grep/src/signatures.rs b/crates/terraphim_grep/src/signatures.rs index 9126b4dfa..abeb8b695 100644 --- a/crates/terraphim_grep/src/signatures.rs +++ b/crates/terraphim_grep/src/signatures.rs @@ -2,23 +2,33 @@ use serde::{Deserialize, Serialize}; use crate::error::TerraphimGrepError; +/// Defines the contract for an RLM output signature: instruction generation and JSON parsing. pub trait RlmSignature: Send + Sync { + /// The deserialisable output type produced by this signature's `parse` method. type Output: serde::Serialize + serde::de::DeserializeOwned; + /// Returns the prompt instructions that tell the LLM which JSON schema to emit. fn instructions(&self) -> String; + /// Parses the raw LLM response string into the typed `Output`. fn parse(&self, raw: &str) -> Result; } +/// Represents a single file-match location produced by the search signature. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Match { + /// The file path where the match was found. pub path: String, + /// The starting line number of the match. pub line: usize, + /// The optional ending line number when the match spans multiple lines. #[serde(skip_serializing_if = "Option::is_none", default)] pub line_end: Option, + /// The surrounding context lines included with the match. #[serde(skip_serializing_if = "Vec::is_empty", default)] pub context: Vec, } +/// Represents the RLM signature for parsing a list of file-match results. pub struct SearchResultSignature; impl RlmSignature for SearchResultSignature { @@ -35,21 +45,30 @@ impl RlmSignature for SearchResultSignature { } } +/// Represents a source citation linking an answer claim to a specific file location. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Citation { + /// The file path or URL of the cited source. pub source: String, + /// The line number within the source, if known. #[serde(skip_serializing_if = "Option::is_none")] pub line: Option, + /// A short excerpt from the source that supports the claim. pub excerpt: String, } +/// Represents a synthesised answer accompanied by supporting source citations. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AnswerWithCitations { + /// The synthesised answer text produced by the RLM. pub answer: String, + /// The source citations that back the synthesised answer. pub citations: Vec, + /// The model's self-reported confidence score in the range `[0.0, 1.0]`. pub confidence: f64, } +/// Represents the RLM signature for parsing a synthesised answer with citations. pub struct AnswerSignature; impl RlmSignature for AnswerSignature { @@ -69,15 +88,20 @@ impl RlmSignature for AnswerSignature { } } +/// Represents a newly extracted knowledge-graph concept identified by the RLM. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NewConcept { + /// The canonical name of the extracted concept. pub name: String, + /// Alternative names or synonyms for this concept. #[serde(default)] pub synonyms: Vec, + /// Related concept names linked to this concept. #[serde(default)] pub relationships: Vec, } +/// Represents the RLM signature for extracting new KG concepts from a query-answer pair. pub struct ConceptExtractionSignature; impl RlmSignature for ConceptExtractionSignature { diff --git a/crates/terraphim_grep/src/sufficiency_judge.rs b/crates/terraphim_grep/src/sufficiency_judge.rs index ea95d869f..29678962f 100644 --- a/crates/terraphim_grep/src/sufficiency_judge.rs +++ b/crates/terraphim_grep/src/sufficiency_judge.rs @@ -1,10 +1,15 @@ use super::hybrid_searcher::{HybridResults, RetrievedChunk}; +/// Represents the configurable thresholds used by the heuristic sufficiency judge. #[derive(Debug, Clone)] pub struct HeuristicThresholds { + /// The minimum fraction of query terms that must be covered by the retrieved chunks. pub min_coverage: f64, + /// The minimum average KG concept score required to consider results confident. pub min_kg_confidence: f64, + /// The minimum number of distinct haystack sources required for diversity. pub min_diversity: usize, + /// The minimum total number of chunks required before declaring results sufficient. pub min_results: usize, } @@ -19,23 +24,31 @@ impl Default for HeuristicThresholds { } } +/// Describes the sufficiency verdict variants returned by the judge. #[derive(Debug, Clone)] pub enum Sufficiency { + /// The retrieved chunks are sufficient to answer the query without synthesis. Sufficient(Vec), + /// Coverage is partial; the RLM should synthesise an answer from the chunks. NeedsSynthesis(Vec), + /// Coverage is very low; additional query expansion and synthesis are needed. NeedsExpansion(Vec), + /// The results are too sparse or irrelevant to be useful. Insufficient(Vec), } +/// Represents the heuristic judge that evaluates whether search results are sufficient. pub struct SufficiencyJudge { thresholds: HeuristicThresholds, } impl SufficiencyJudge { + /// Builds a new `SufficiencyJudge` with the given heuristic thresholds. pub fn new(thresholds: HeuristicThresholds) -> Self { Self { thresholds } } + /// Evaluates the hybrid results against the query and returns a sufficiency verdict. pub fn judge(&self, results: &HybridResults, query: &str) -> Sufficiency { let chunks = results.to_chunks(); diff --git a/crates/terraphim_merge_coordinator/src/evaluator.rs b/crates/terraphim_merge_coordinator/src/evaluator.rs index dd51e3d73..394b738b2 100644 --- a/crates/terraphim_merge_coordinator/src/evaluator.rs +++ b/crates/terraphim_merge_coordinator/src/evaluator.rs @@ -13,9 +13,13 @@ use crate::types::{EvalVerdict, MergeCoordinatorError, MergeOutcome}; /// One evaluation of one open PR. #[derive(Debug, Clone)] pub struct PrEvaluation { + /// The Gitea PR index number. pub pr_index: u64, + /// Whether the PR is mergeable according to the Gitea API. pub mergeable: bool, + /// Issue numbers referenced by `Fixes #N` in the PR body. pub fixes_issues: Vec, + /// The verdict reached for this PR. pub verdict: EvalVerdict, } diff --git a/crates/terraphim_merge_coordinator/src/gitea.rs b/crates/terraphim_merge_coordinator/src/gitea.rs index 36260868f..2ad7495f8 100644 --- a/crates/terraphim_merge_coordinator/src/gitea.rs +++ b/crates/terraphim_merge_coordinator/src/gitea.rs @@ -24,10 +24,15 @@ pub struct GiteaClient { /// PR list response item (subset of Gitea fields used here). #[derive(Debug, Clone, Deserialize)] pub struct PrSummary { + /// The Gitea PR index number. pub number: u64, + /// The PR title. pub title: String, + /// The PR description body, if present. pub body: Option, + /// The current PR state (e.g. `"open"`). pub state: String, + /// Whether the PR is mergeable according to Gitea. pub mergeable: Option, } diff --git a/crates/terraphim_merge_coordinator/src/types.rs b/crates/terraphim_merge_coordinator/src/types.rs index 6c7f8ae8d..d181471d0 100644 --- a/crates/terraphim_merge_coordinator/src/types.rs +++ b/crates/terraphim_merge_coordinator/src/types.rs @@ -10,8 +10,11 @@ use std::fmt; #[repr(i32)] #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ExitCode { + /// All PRs evaluated and merged without errors. Success = 0, + /// One or more PR evaluations failed; non-critical. EvaluationFailures = 1, + /// A critical failure occurred; manual intervention required. Critical = 2, } @@ -103,7 +106,7 @@ mod tests { } } -/// Error type for the merge-coordinator surface. +/// Describes merge-coordinator failure variants. #[derive(Debug, thiserror::Error)] pub enum MergeCoordinatorError { #[error("PID lock held by another instance (pid={pid}, age_secs={age_secs})")] diff --git a/crates/terraphim_middleware/src/command/ripgrep.rs b/crates/terraphim_middleware/src/command/ripgrep.rs index 87802a2cb..18a2ce9f2 100644 --- a/crates/terraphim_middleware/src/command/ripgrep.rs +++ b/crates/terraphim_middleware/src/command/ripgrep.rs @@ -247,6 +247,7 @@ impl RipgrepCommand { ) } + /// Runs ripgrep to find `needle` in `haystack` with additional command-line arguments. pub async fn run_with_extra_args( &self, needle: &str, diff --git a/crates/terraphim_middleware/src/thesaurus/mod.rs b/crates/terraphim_middleware/src/thesaurus/mod.rs index a0bfa6aa6..64eded965 100644 --- a/crates/terraphim_middleware/src/thesaurus/mod.rs +++ b/crates/terraphim_middleware/src/thesaurus/mod.rs @@ -35,6 +35,7 @@ use terraphim_types::{RoleName, Thesaurus}; use crate::Result; use std::path::PathBuf; +/// Builds and persists the thesaurus for the role matching `search_query`, then reloads it into `config_state`. pub async fn build_thesaurus_from_haystack( config_state: &mut ConfigState, search_query: &SearchQuery, diff --git a/crates/terraphim_usage/src/cli.rs b/crates/terraphim_usage/src/cli.rs index 3c3664aec..2fdb2aeb7 100644 --- a/crates/terraphim_usage/src/cli.rs +++ b/crates/terraphim_usage/src/cli.rs @@ -4,23 +4,30 @@ use clap::{Parser, Subcommand}; use jiff::Zoned; use std::collections::BTreeMap; +/// Represents the top-level CLI entry point for the Terraphim AI command. #[derive(Parser)] #[command(name = "terraphim", about = "Terraphim AI CLI")] pub struct Cli { + /// The subcommand to execute. #[command(subcommand)] pub command: Commands, } +/// Describes the top-level subcommands available in the CLI. #[derive(Subcommand)] pub enum Commands { + /// Display or manage LLM usage information. Usage { + /// The usage action to perform. #[command(subcommand)] action: UsageAction, }, } +/// Describes the available usage subcommand actions. #[derive(Subcommand)] pub enum UsageAction { + /// Show current live usage for one or all providers. Show { #[arg(short, long)] provider: Option, @@ -139,6 +146,7 @@ struct ModelAggregation { count: usize, } +/// Executes a `UsageAction` against the provided registry and returns formatted output. pub async fn execute_usage_action( action: UsageAction, registry: &UsageRegistry, diff --git a/crates/terraphim_usage/src/formatter.rs b/crates/terraphim_usage/src/formatter.rs index 7f6964c29..ee03874a5 100644 --- a/crates/terraphim_usage/src/formatter.rs +++ b/crates/terraphim_usage/src/formatter.rs @@ -1,6 +1,7 @@ use crate::{MetricLine, ProviderUsage}; use std::fmt::Write; +/// Formats a `ProviderUsage` snapshot as human-readable text with ASCII progress bars. pub fn format_usage_text(usage: &ProviderUsage) -> String { let mut output = String::new(); writeln!( @@ -56,10 +57,12 @@ fn progress_bar(pct: f64) -> String { format!("{}{}", "█".repeat(filled), "░".repeat(empty)) } +/// Serialises a `ProviderUsage` snapshot to pretty-printed JSON. pub fn format_usage_json(usage: &ProviderUsage) -> Result { serde_json::to_string_pretty(usage) } +/// Formats a slice of `ProviderUsage` snapshots as a CSV string with a header row. pub fn format_usage_csv(usages: &[ProviderUsage]) -> String { let mut csv = String::from("provider,plan,line_type,label,value,used,limit,resets_at,fetched_at\n"); diff --git a/crates/terraphim_usage/src/lib.rs b/crates/terraphim_usage/src/lib.rs index cd45e9a0b..d26834d91 100644 --- a/crates/terraphim_usage/src/lib.rs +++ b/crates/terraphim_usage/src/lib.rs @@ -15,107 +15,167 @@ pub mod store; use serde::{Deserialize, Serialize}; use thiserror::Error; +/// Describes all error conditions for the usage metering subsystem. #[derive(Error, Debug)] pub enum UsageError { + /// The requested provider identifier was not registered. #[error("Provider {0} not found")] ProviderNotFound(String), + /// A network or parsing failure occurred while fetching provider usage. #[error("Failed to fetch usage from {provider}: {source}")] FetchFailed { + /// The provider that failed. provider: String, + /// The underlying error source. source: Box, }, + /// Authentication credentials were rejected by the provider. #[error("Authentication failed for {provider}: {message}")] - AuthFailed { provider: String, message: String }, + AuthFailed { + /// The provider that rejected the credentials. + provider: String, + /// Human-readable failure description. + message: String, + }, + /// The provider's API rate limit was exceeded. #[error("Rate limit exceeded for {provider}")] - RateLimited { provider: String }, + RateLimited { + /// The rate-limited provider identifier. + provider: String, + }, + /// A persistence backend operation failed. #[error("Storage error: {0}")] StorageError(String), + /// A JSON serialisation or deserialisation error occurred. #[error("Serialization error: {0}")] SerializationError(#[from] serde_json::Error), } +/// Convenience alias for `Result`. pub type Result = std::result::Result; +/// Represents a snapshot of usage data returned by a single provider. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ProviderUsage { + /// Stable machine-readable identifier for the provider. pub provider_id: String, + /// Human-readable name shown in reports. pub display_name: String, + /// Subscription plan name, if known. pub plan: Option, + /// Ordered list of metric lines for this provider. pub lines: Vec, + /// RFC 3339 timestamp of when usage was fetched. pub fetched_at: String, } +/// Describes a single display line within a provider usage report. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "type", rename_all = "snake_case")] pub enum MetricLine { + /// A plain key-value text line. Text { + /// Display label for the metric. label: String, + /// Formatted value string. value: String, + /// Optional colour hint for the UI. color: Option, + /// Optional secondary text beneath the value. subtitle: Option, }, + /// A progress bar showing consumption against a limit. Progress { + /// Display label for the metric. label: String, + /// Amount consumed so far. used: f64, + /// Maximum allowed amount. limit: f64, + /// How to format the progress values. format: ProgressFormat, + /// RFC 3339 timestamp when the counter resets, if known. resets_at: Option, + /// Length of the current period in milliseconds, if known. period_duration_ms: Option, + /// Optional colour hint for the UI. color: Option, }, + /// A small badge displaying a short status text. Badge { + /// Display label for the badge. label: String, + /// Badge text content. text: String, + /// Optional colour hint for the UI. color: Option, + /// Optional secondary text beneath the badge. subtitle: Option, }, } +/// Describes how a progress value should be formatted for display. #[derive(Debug, Clone, Serialize, Deserialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum ProgressFormat { + /// Render as a percentage (e.g. "42%"). Percent, + /// Render as a dollar amount (e.g. "$3.50"). Dollars, - Count { suffix: String }, + /// Render as a count with a unit suffix (e.g. "120 prompts"). + Count { + /// Unit label appended after the number. + suffix: String, + }, } +/// Defines the interface that every LLM provider adapter must implement. pub trait UsageProvider: Send + Sync { + /// Returns the stable machine-readable identifier for this provider. fn id(&self) -> &str; + /// Returns the human-readable display name for this provider. fn display_name(&self) -> &str; + /// Fetches current usage data from the provider, returning a `ProviderUsage` snapshot. fn fetch_usage( &self, ) -> std::pin::Pin> + Send + '_>>; } +/// Represents a collection of registered usage providers indexed by their identifier. pub struct UsageRegistry { providers: std::collections::HashMap>, } impl UsageRegistry { + /// Creates a new empty `UsageRegistry`. pub fn new() -> Self { Self { providers: std::collections::HashMap::new(), } } + /// Registers a provider, replacing any existing entry with the same identifier. pub fn register(&mut self, provider: Box) { let id = provider.id().to_string(); self.providers.insert(id, provider); } + /// Returns a reference to the provider with the given identifier, if registered. pub fn get(&self, id: &str) -> Option<&dyn UsageProvider> { self.providers.get(id).map(|p| p.as_ref()) } + /// Returns references to all registered providers. pub fn all(&self) -> Vec<&dyn UsageProvider> { self.providers.values().map(|p| p.as_ref()).collect() } + /// Returns the identifiers of all registered providers. pub fn ids(&self) -> Vec<&str> { self.providers.keys().map(|k| k.as_str()).collect() } diff --git a/crates/terraphim_usage/src/pricing.rs b/crates/terraphim_usage/src/pricing.rs index 0c3fe57a2..64e2b9766 100644 --- a/crates/terraphim_usage/src/pricing.rs +++ b/crates/terraphim_usage/src/pricing.rs @@ -1,12 +1,15 @@ use std::path::Path; use terraphim_types::ModelPricing; +/// Represents a pricing table mapping model name patterns to per-token costs. #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct PricingTable { + /// Ordered list of model pricing entries; later entries can override earlier ones. pub entries: Vec, } impl PricingTable { + /// Returns a `PricingTable` populated with built-in default model prices. pub fn embedded_defaults() -> Self { Self { entries: vec![ @@ -124,6 +127,7 @@ impl PricingTable { } } + /// Loads pricing entries from a TOML file at `path`, merging with embedded defaults. pub fn load(path: &Path) -> Self { match std::fs::read_to_string(path) { Ok(content) => match toml::from_str::(&content) { @@ -141,12 +145,14 @@ impl PricingTable { } } + /// Loads pricing from the default path `~/.config/terraphim/pricing.toml`. pub fn load_default_path() -> Self { let home = std::env::var("HOME").unwrap_or_default(); let path = Path::new(&home).join(".config/terraphim/pricing.toml"); Self::load(&path) } + /// Returns the best-matching `ModelPricing` entry for the given model identifier. pub fn find_pricing(&self, model: &str) -> Option<&ModelPricing> { let model_lower = model.to_lowercase(); let mut best_match: Option<&ModelPricing> = None; @@ -167,6 +173,7 @@ impl PricingTable { best_match } + /// Calculates the total cost in USD for the given token counts and model. pub fn calculate_cost( &self, model: &str, diff --git a/crates/terraphim_usage/src/providers/ccusage.rs b/crates/terraphim_usage/src/providers/ccusage.rs index 506f6952c..8edb68f05 100644 --- a/crates/terraphim_usage/src/providers/ccusage.rs +++ b/crates/terraphim_usage/src/providers/ccusage.rs @@ -1,11 +1,13 @@ use crate::{MetricLine, ProgressFormat, ProviderUsage, Result, UsageError, UsageProvider}; use std::time::Duration; +/// Represents a usage provider backed by the `ccusage` local Claude Code log parser. pub struct CcusageProvider { client: std::sync::Mutex, } impl CcusageProvider { + /// Creates a new `CcusageProvider` with a 5-minute cache TTL. pub fn new() -> Self { let client = terraphim_ccusage::CcusageClient::new(terraphim_ccusage::CcusageProvider::Claude) diff --git a/crates/terraphim_usage/src/providers/claude.rs b/crates/terraphim_usage/src/providers/claude.rs index b4497a023..dade06dbe 100644 --- a/crates/terraphim_usage/src/providers/claude.rs +++ b/crates/terraphim_usage/src/providers/claude.rs @@ -2,6 +2,7 @@ use crate::{MetricLine, ProgressFormat, ProviderUsage, Result, UsageError, Usage use std::path::PathBuf; use std::time::Duration; +/// Represents a usage provider for the Claude Code subscription plan. pub struct ClaudeProvider { #[allow(dead_code)] credentials_path: PathBuf, @@ -9,6 +10,7 @@ pub struct ClaudeProvider { } impl ClaudeProvider { + /// Creates a new `ClaudeProvider` using the default credentials path. pub fn new() -> Self { let home = std::env::var("HOME").unwrap_or_default(); Self { @@ -20,6 +22,7 @@ impl ClaudeProvider { } } + /// Creates a `ClaudeProvider` using the specified credentials file path. pub fn with_credentials_path(path: PathBuf) -> Self { let ccusage = terraphim_ccusage::CcusageClient::new(terraphim_ccusage::CcusageProvider::Claude) diff --git a/crates/terraphim_usage/src/providers/kimi.rs b/crates/terraphim_usage/src/providers/kimi.rs index da257e42d..42eab2cba 100644 --- a/crates/terraphim_usage/src/providers/kimi.rs +++ b/crates/terraphim_usage/src/providers/kimi.rs @@ -1,8 +1,10 @@ use crate::{ProviderUsage, Result, UsageError, UsageProvider}; +/// Represents a usage provider for the Kimi (Moonshot AI) coding subscription. pub struct KimiProvider; impl KimiProvider { + /// Creates a new `KimiProvider`. pub fn new() -> Self { Self } diff --git a/crates/terraphim_usage/src/providers/minimax.rs b/crates/terraphim_usage/src/providers/minimax.rs index d502aed05..017b316b8 100644 --- a/crates/terraphim_usage/src/providers/minimax.rs +++ b/crates/terraphim_usage/src/providers/minimax.rs @@ -39,6 +39,7 @@ struct MiniMaxModelRemains { plan: Option, } +/// Represents a usage provider for the MiniMax coding subscription API. pub struct MiniMaxProvider { api_key: Option, cn_api_key: Option, @@ -46,6 +47,7 @@ pub struct MiniMaxProvider { } impl MiniMaxProvider { + /// Creates a new `MiniMaxProvider`, reading API keys from environment variables. pub fn new() -> Self { Self { api_key: std::env::var("MINIMAX_API_KEY") @@ -59,6 +61,7 @@ impl MiniMaxProvider { } } + /// Creates a `MiniMaxProvider` with an explicit global-region API key. pub fn with_api_key(api_key: String) -> Self { Self { api_key: Some(api_key), diff --git a/crates/terraphim_usage/src/providers/opencode_go.rs b/crates/terraphim_usage/src/providers/opencode_go.rs index 0c981f105..fa1a2376d 100644 --- a/crates/terraphim_usage/src/providers/opencode_go.rs +++ b/crates/terraphim_usage/src/providers/opencode_go.rs @@ -1,11 +1,13 @@ use crate::{MetricLine, ProgressFormat, ProviderUsage, Result, UsageError, UsageProvider}; use std::path::PathBuf; +/// Represents a usage provider that reads usage data from the OpenCode Go SQLite database. pub struct OpenCodeGoProvider { db_path: PathBuf, } impl OpenCodeGoProvider { + /// Creates a new `OpenCodeGoProvider` using the default database path. pub fn new() -> Self { let home = std::env::var("HOME").unwrap_or_default(); Self { @@ -13,6 +15,7 @@ impl OpenCodeGoProvider { } } + /// Creates an `OpenCodeGoProvider` using the specified SQLite database path. pub fn with_db_path(path: PathBuf) -> Self { Self { db_path: path } } diff --git a/crates/terraphim_usage/src/providers/zai.rs b/crates/terraphim_usage/src/providers/zai.rs index 183428b94..19fdad0f4 100644 --- a/crates/terraphim_usage/src/providers/zai.rs +++ b/crates/terraphim_usage/src/providers/zai.rs @@ -59,12 +59,14 @@ struct ZaiSubscriptionItem { next_renew_time: Option, } +/// Represents a usage provider for the Z.ai (GLM) coding subscription API. pub struct ZaiProvider { api_key: Option, client: reqwest::Client, } impl ZaiProvider { + /// Creates a new `ZaiProvider`, reading the API key from environment variables. pub fn new() -> Self { Self { api_key: std::env::var("ZAI_API_KEY") @@ -77,6 +79,7 @@ impl ZaiProvider { } } + /// Creates a `ZaiProvider` with an explicit API key. pub fn with_api_key(api_key: String) -> Self { Self { api_key: Some(api_key),