From cd71b7a77a92cca3b229a6bde0fc93d62f0ac47b Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:16:26 +0200 Subject: [PATCH 01/11] docs(#2035): add missing doc comments to 9 crates (types, haystack_core, jmap, settings, mcp_server, rolegraph, sessions, persistence, middleware) Refs #2035 --- crates/haystack_core/src/lib.rs | 3 +++ crates/haystack_jmap/src/main.rs | 1 + crates/terraphim_mcp_server/src/lib.rs | 6 +++++ .../src/command/ripgrep.rs | 24 +++++++++++++++++-- .../terraphim_middleware/src/haystack/mod.rs | 5 ++++ crates/terraphim_middleware/src/lib.rs | 14 +++++++++++ .../terraphim_middleware/src/thesaurus/mod.rs | 1 + crates/terraphim_persistence/src/error.rs | 8 +++++++ crates/terraphim_persistence/src/lib.rs | 9 +++++++ crates/terraphim_persistence/src/settings.rs | 6 +++++ crates/terraphim_rolegraph/src/lib.rs | 11 +++++++++ .../terraphim_sessions/src/connector/mod.rs | 2 ++ crates/terraphim_sessions/src/model.rs | 18 ++++++++++++-- crates/terraphim_settings/src/lib.rs | 6 +++++ crates/terraphim_types/src/lib.rs | 1 + 15 files changed, 111 insertions(+), 4 deletions(-) diff --git a/crates/haystack_core/src/lib.rs b/crates/haystack_core/src/lib.rs index ef6c3c04a..a7fb093dd 100644 --- a/crates/haystack_core/src/lib.rs +++ b/crates/haystack_core/src/lib.rs @@ -5,10 +5,13 @@ //! async search interface over heterogeneous backends. use terraphim_types::{Document, SearchQuery}; +/// A data-source integration that exposes a uniform async search interface. pub trait HaystackProvider { + /// The error type returned by this provider's operations. type Error: std::fmt::Display + std::fmt::Debug + Send + Sync + 'static; #[allow(async_fn_in_trait)] + /// Search this haystack for documents matching `query`. async fn search(&self, query: &SearchQuery) -> Result, Self::Error>; } diff --git a/crates/haystack_jmap/src/main.rs b/crates/haystack_jmap/src/main.rs index 3fa9bb234..db8bc1982 100644 --- a/crates/haystack_jmap/src/main.rs +++ b/crates/haystack_jmap/src/main.rs @@ -1,3 +1,4 @@ +//! Binary entry point for the JMAP haystack search tool. use anyhow::Result; use clap::Parser; use haystack_jmap::JMAPClient; diff --git a/crates/terraphim_mcp_server/src/lib.rs b/crates/terraphim_mcp_server/src/lib.rs index 3d940b8bd..36a941d13 100644 --- a/crates/terraphim_mcp_server/src/lib.rs +++ b/crates/terraphim_mcp_server/src/lib.rs @@ -40,6 +40,7 @@ use terraphim_types::{Layer, NormalizedTermValue, RoleName, SearchQuery}; use thiserror::Error; use tracing::{error, info}; +/// Resource mapping utilities for converting Terraphim documents to MCP resources. pub mod resource_mapper; use crate::resource_mapper::TerraphimResourceMapper; @@ -74,14 +75,19 @@ fn find_terraphim_rlm_binary() -> std::path::PathBuf { /// ([`ErrorData`]), and general I/O or third-party errors via [`anyhow`]. #[derive(Error, Debug)] pub enum TerraphimMcpError { + /// A service-layer operation failed. #[error("Service error: {0}")] Service(#[from] terraphim_service::ServiceError), + /// JSON serialisation or deserialisation failed. #[error("JSON error: {0}")] Json(#[from] serde_json::Error), + /// An MCP protocol error was returned. #[error("MCP error: {0}")] Mcp(#[from] ErrorData), + /// An I/O operation failed. #[error("I/O error: {0}")] Io(#[from] std::io::Error), + /// A general error from an upstream library. #[error("Anyhow error: {0}")] Anyhow(#[from] anyhow::Error), } diff --git a/crates/terraphim_middleware/src/command/ripgrep.rs b/crates/terraphim_middleware/src/command/ripgrep.rs index 87802a2cb..dc6f17e8f 100644 --- a/crates/terraphim_middleware/src/command/ripgrep.rs +++ b/crates/terraphim_middleware/src/command/ripgrep.rs @@ -24,10 +24,15 @@ use crate::Result; #[serde(tag = "type", content = "data")] #[serde(rename_all = "snake_case")] pub enum Message { + /// Marks the start of a per-file search. Begin(Begin), + /// Marks the end of a per-file search. End(End), + /// A non-overlapping pattern match within a file. Match(Match), + /// Context lines surrounding a match. Context(Context), + /// Aggregate statistics for the entire ripgrep run. Summary(Summary), } @@ -35,6 +40,7 @@ pub enum Message { /// It contains the path that is being searched, if one exists. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Begin { + /// Path of the file being searched, if provided. pub path: Option, } @@ -63,10 +69,13 @@ pub struct Summary { /// The `Match` message is sent for each non-overlapping match of a search. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Match { + /// Path of the file that contains the match, if provided. pub path: Option, + /// The matching line(s) of text. pub lines: Data, line_number: Option, absolute_offset: u64, + /// All non-overlapping pattern matches within this line. pub submatches: Vec, } @@ -80,7 +89,9 @@ impl Match { /// The `Context` specifies the lines surrounding a match. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Context { + /// Path of the file containing the context lines, if provided. pub path: Option, + /// The context line(s) of text surrounding a match. pub lines: Data, line_number: Option, absolute_offset: u64, @@ -108,10 +119,18 @@ pub struct SubMatch { #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] #[serde(untagged)] pub enum Data { - Text { text: String }, + /// Valid UTF-8 text content. + Text { + /// The text string. + text: String, + }, // This variant is used when the data isn't valid UTF-8. The bytes are // base64 encoded, so using a String here is OK. - Bytes { bytes: String }, + /// Binary content encoded as base64. + Bytes { + /// Base64-encoded bytes for non-UTF-8 data. + bytes: String, + }, } /// Gets the path from a `Data` type. @@ -247,6 +266,7 @@ impl RipgrepCommand { ) } + /// Run ripgrep with caller-supplied extra arguments in addition to the standard ones. pub async fn run_with_extra_args( &self, needle: &str, diff --git a/crates/terraphim_middleware/src/haystack/mod.rs b/crates/terraphim_middleware/src/haystack/mod.rs index 2b528bf99..5bdabbfac 100644 --- a/crates/terraphim_middleware/src/haystack/mod.rs +++ b/crates/terraphim_middleware/src/haystack/mod.rs @@ -1,13 +1,18 @@ #[cfg(feature = "ai-assistant")] pub mod ai_assistant; +/// ClickUp task-management haystack indexer. pub mod clickup; #[cfg(feature = "grepapp")] pub mod grep_app; #[cfg(feature = "jmap")] pub mod jmap; +/// MCP (Model Context Protocol) haystack indexer. pub mod mcp; +/// Perplexity AI search haystack indexer. pub mod perplexity; +/// QueryRs (Rust docs + Reddit) haystack indexer. pub mod query_rs; +/// Quickwit cloud-native search engine haystack indexer. pub mod quickwit; #[cfg(feature = "ai-assistant")] pub use ai_assistant::AiAssistantHaystackIndexer; diff --git a/crates/terraphim_middleware/src/lib.rs b/crates/terraphim_middleware/src/lib.rs index 41c62907f..0d0f07e61 100644 --- a/crates/terraphim_middleware/src/lib.rs +++ b/crates/terraphim_middleware/src/lib.rs @@ -15,9 +15,13 @@ use serde_json as json; use terraphim_automata::builder::BuilderError; use terraphim_config::TerraphimConfigError; +/// Sandboxed external-tool execution (ripgrep, fff, shell commands). pub mod command; +/// Haystack integrations: ClickUp, QueryRs, MCP, Atlassian, Discourse, JMAP, Quickwit. pub mod haystack; +/// Indexer trait and parallel haystack dispatch. pub mod indexer; +/// Thesaurus building from source documents and URLs. pub mod thesaurus; #[cfg(feature = "kg-integration")] @@ -38,33 +42,43 @@ pub use indexer::{search_haystacks, FffIndexer, RipgrepIndexer}; /// Errors produced by the middleware layer during indexing and search orchestration. #[derive(thiserror::Error, Debug)] pub enum Error { + /// JSON deserialisation of haystack output failed. #[error("Serde deserialization error: {0}")] Json(#[from] json::Error), + /// An I/O operation failed (file read, process pipe, …). #[error("IO error: {0}")] Io(#[from] std::io::Error), + /// The requested role does not exist in the current configuration. #[error("Role not found: {0}")] RoleNotFound(String), + /// The indexation pipeline encountered an error. #[error("Indexation error: {0}")] Indexation(String), + /// A configuration layer operation failed. #[error("Config error: {0}")] Config(#[from] TerraphimConfigError), + /// A persistence layer operation failed. #[error("Persistence error: {0}")] Persistence(#[from] terraphim_persistence::Error), + /// Building the automata from the thesaurus failed. #[error("Builder error: {0}")] Builder(#[from] BuilderError), + /// An HTTP request to an external haystack service failed. #[error("HTTP request error: {0}")] Http(#[from] reqwest::Error), + /// Input failed a domain-level validation check. #[error("Validation error: {0}")] Validation(String), + /// The file-search subsystem returned an error. #[error("File search error: {0}")] FileSearch(String), } diff --git a/crates/terraphim_middleware/src/thesaurus/mod.rs b/crates/terraphim_middleware/src/thesaurus/mod.rs index a0bfa6aa6..8ef75e351 100644 --- a/crates/terraphim_middleware/src/thesaurus/mod.rs +++ b/crates/terraphim_middleware/src/thesaurus/mod.rs @@ -35,6 +35,7 @@ use terraphim_types::{RoleName, Thesaurus}; use crate::Result; use std::path::PathBuf; +/// Build a thesaurus from the active haystack sources and store it in the config state. pub async fn build_thesaurus_from_haystack( config_state: &mut ConfigState, search_query: &SearchQuery, diff --git a/crates/terraphim_persistence/src/error.rs b/crates/terraphim_persistence/src/error.rs index d29e761c6..7fc51d57c 100644 --- a/crates/terraphim_persistence/src/error.rs +++ b/crates/terraphim_persistence/src/error.rs @@ -3,27 +3,35 @@ use terraphim_settings; /// Errors arising from persistence layer operations. #[derive(thiserror::Error, Debug)] pub enum Error { + /// A named storage profile could not be loaded or parsed. #[error("Error with profile: {0}")] Profile(String), + /// An OpenDAL storage operation failed. #[error("OpenDal error: {0}")] OpenDal(Box), + /// JSON serialisation or deserialisation failed. #[error("JSON error: {0}")] Json(#[from] serde_json::Error), + /// No storage operator is configured or available. #[error("No operator found")] NoOperator, + /// The requested key or record does not exist in the store. #[error("Not found: {0}")] NotFound(String), + /// Loading device settings failed. #[error("Settings error: {0}")] Settings(#[from] terraphim_settings::Error), + /// An I/O operation on the filesystem failed. #[error("IO error: {0}")] Io(#[from] std::io::Error), + /// Serialisation to or from a non-JSON format failed. #[error("Serialization error: {0}")] Serde(String), } diff --git a/crates/terraphim_persistence/src/lib.rs b/crates/terraphim_persistence/src/lib.rs index 4504cd480..49e3efdb0 100644 --- a/crates/terraphim_persistence/src/lib.rs +++ b/crates/terraphim_persistence/src/lib.rs @@ -16,11 +16,16 @@ //! - [`Persistable`] -- blanket trait implemented by every serialisable type pub mod compression; +/// Persistence operations for [`terraphim_types::Conversation`] objects. pub mod conversation; +/// Persistence operations for [`terraphim_types::Document`] objects. pub mod document; +/// Error types for the persistence layer. pub mod error; pub mod memory; +/// OpenDAL operator construction from device settings profiles. pub mod settings; +/// Persistence operations for [`terraphim_types::Thesaurus`] objects. pub mod thesaurus; use async_once_cell::OnceCell as AsyncOnceCell; @@ -64,7 +69,9 @@ static DEVICE_STORAGE: AsyncOnceCell = AsyncOnceCell::new(); /// [`DeviceStorage::init_memory_only`] in tests to avoid touching the filesystem. #[derive(Debug)] pub struct DeviceStorage { + /// Named storage operators with their measured latency in nanoseconds. pub ops: HashMap, + /// Pre-selected lowest-latency operator used as the cache write-back target. pub fastest_op: Operator, } @@ -458,7 +465,9 @@ pub trait Persistable: Serialize + DeserializeOwned { }.instrument(span).await } + /// Return the storage key that uniquely identifies this value. fn get_key(&self) -> String; + /// Normalise an arbitrary key string to a storage-safe form. fn normalize_key(&self, key: &str) -> String { // Replace non-alphanumeric characters with underscores to preserve semantic meaning let re = regex::Regex::new(r"[^a-zA-Z0-9]+").expect("Failed to create regex"); diff --git a/crates/terraphim_persistence/src/settings.rs b/crates/terraphim_persistence/src/settings.rs index c01fa63ef..75297982f 100644 --- a/crates/terraphim_persistence/src/settings.rs +++ b/crates/terraphim_persistence/src/settings.rs @@ -161,6 +161,9 @@ fn create_memory_operator() -> OpendalResult { Ok(Operator::new(builder)?.finish()) } +/// Build and benchmark a single OpenDAL operator from the named settings profile. +/// +/// Returns the operator together with its measured write latency in nanoseconds. pub async fn parse_profile( settings: &DeviceSettings, profile_name: &str, @@ -346,6 +349,9 @@ pub async fn parse_profile( Ok((op, speed)) } +/// Build and benchmark all OpenDAL operators declared in the device settings. +/// +/// Returns a map from profile name to `(Operator, latency_ns)`. pub async fn parse_profiles( settings: &DeviceSettings, ) -> Result> { diff --git a/crates/terraphim_rolegraph/src/lib.rs b/crates/terraphim_rolegraph/src/lib.rs index c707b6cd9..a83d3b29b 100644 --- a/crates/terraphim_rolegraph/src/lib.rs +++ b/crates/terraphim_rolegraph/src/lib.rs @@ -33,6 +33,7 @@ use terraphim_types::{ Document, Edge, IndexedDocument, Node, NormalizedTermValue, RoleName, Thesaurus, }; use tokio::sync::{Mutex, MutexGuard}; +/// Input parsing utilities for role-graph construction (thesaurus and document ingestion). pub mod input; #[cfg(feature = "medical")] @@ -48,14 +49,19 @@ use unicode_segmentation::UnicodeSegmentation; /// Errors produced by the role-graph knowledge graph operations. #[derive(thiserror::Error, Debug)] pub enum Error { + /// A referenced node ID does not exist in the graph. #[error("The given node ID was not found")] NodeIdNotFound, + /// A referenced edge ID does not exist in the graph. #[error("The given Edge ID was not found")] EdgeIdNotFound, + /// Failed to serialise an indexed document to JSON. #[error("Cannot convert IndexedDocument to JSON: {0}")] JsonConversionError(#[from] serde_json::Error), + /// The underlying automata engine returned an error. #[error("Error while driving terraphim automata: {0}")] TerraphimAutomataError(#[from] terraphim_automata::TerraphimAutomataError), + /// Building the Aho-Corasick automaton failed. #[error("Indexing error: {0}")] AhoCorasickError(#[from] aho_corasick::BuildError), } @@ -65,10 +71,15 @@ type Result = std::result::Result; /// Statistics about the graph structure for debugging #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct GraphStats { + /// Number of concept nodes in the graph. pub node_count: usize, + /// Number of edges (concept-to-concept relationships) in the graph. pub edge_count: usize, + /// Number of indexed documents. pub document_count: usize, + /// Number of entries in the thesaurus automaton. pub thesaurus_size: usize, + /// Whether the graph has been populated with at least one document. pub is_populated: bool, } diff --git a/crates/terraphim_sessions/src/connector/mod.rs b/crates/terraphim_sessions/src/connector/mod.rs index c58fbbfbe..9f806bf50 100644 --- a/crates/terraphim_sessions/src/connector/mod.rs +++ b/crates/terraphim_sessions/src/connector/mod.rs @@ -42,7 +42,9 @@ use tokio::sync::mpsc; pub enum ConnectorStatus { /// Connector found with estimated session count Available { + /// Filesystem path to the connector's data directory. path: PathBuf, + /// Estimated number of sessions available, if determinable. sessions_estimate: Option, }, /// Connector's data directory not found diff --git a/crates/terraphim_sessions/src/model.rs b/crates/terraphim_sessions/src/model.rs index 011745617..efcf694c8 100644 --- a/crates/terraphim_sessions/src/model.rs +++ b/crates/terraphim_sessions/src/model.rs @@ -61,21 +61,33 @@ impl std::fmt::Display for MessageRole { #[serde(tag = "type", rename_all = "snake_case")] pub enum ContentBlock { /// Plain text content - Text { text: String }, + Text { + /// The text string. + text: String, + }, /// Tool use request ToolUse { + /// Unique identifier for this tool use call. id: String, + /// Name of the tool being called. name: String, + /// Input arguments for the tool call. input: serde_json::Value, }, /// Tool result ToolResult { + /// Identifier matching the originating tool use request. tool_use_id: String, + /// Output produced by the tool. content: String, + /// Exit code returned by the tool (0 = success). exit_code: i32, }, /// Image content - Image { source: String }, + Image { + /// Source reference for the image (URL, base64, or file path). + source: String, + }, } impl<'de> serde::Deserialize<'de> for ContentBlock { @@ -754,7 +766,9 @@ mod tests { /// File access operation type #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub enum FileOperation { + /// A file was opened for reading. Read, + /// A file was opened for writing. Write, } diff --git a/crates/terraphim_settings/src/lib.rs b/crates/terraphim_settings/src/lib.rs index bc49881fa..76cab8bd1 100644 --- a/crates/terraphim_settings/src/lib.rs +++ b/crates/terraphim_settings/src/lib.rs @@ -17,10 +17,13 @@ use terraphim_onepassword_cli::{OnePasswordLoader, SecretLoader}; /// Errors arising from loading or applying device settings. #[derive(thiserror::Error, Debug)] pub enum Error { + /// A configuration layer failed to load or parse. #[error("config error: {0}")] ConfigError(#[from] twelf::Error), + /// An I/O error occurred while reading a settings file. #[error("io error: {0}")] IoError(#[from] std::io::Error), + /// A required environment variable was absent or malformed. #[error("env error: {0}")] EnvError(#[from] std::env::VarError), #[cfg(feature = "onepassword")] @@ -63,6 +66,9 @@ where /// These values are set when the server is initialized, and do not change while /// running. These are constructed from default or local files and ENV /// variables. +// The `#[config]` macro generates a `with_layers` constructor that cannot +// receive a doc comment; suppress the resulting lint. +#[allow(missing_docs)] #[config] #[derive(Debug, Serialize, Clone)] pub struct DeviceSettings { diff --git a/crates/terraphim_types/src/lib.rs b/crates/terraphim_types/src/lib.rs index d54ee81c0..792052835 100644 --- a/crates/terraphim_types/src/lib.rs +++ b/crates/terraphim_types/src/lib.rs @@ -1827,6 +1827,7 @@ impl std::fmt::Display for RotStatus { } } +/// A chat conversation with its history, context, and associated role. #[derive(Debug, Clone, Serialize, Deserialize)] #[cfg_attr(feature = "typescript", derive(Tsify))] #[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))] From 7b81048176c7a7bb544d0d609947e3d3a69d7eb7 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:19:50 +0200 Subject: [PATCH 02/11] docs(#2035): add missing doc comments to terraphim_config (43 items) Refs #2035 --- crates/terraphim_config/src/lib.rs | 29 +++++++++++++++++++++++++- crates/terraphim_config/src/project.rs | 19 ++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/crates/terraphim_config/src/lib.rs b/crates/terraphim_config/src/lib.rs index ca71a5362..2a8465f10 100644 --- a/crates/terraphim_config/src/lib.rs +++ b/crates/terraphim_config/src/lib.rs @@ -46,7 +46,7 @@ use crate::llm_router::LlmRouterConfig; // LLM Router configuration pub mod llm_router; -// Project-level configuration discovery +/// Project-level configuration discovery by walking the filesystem. pub mod project; /// Convenience alias for `Result` used throughout this crate. @@ -59,36 +59,47 @@ type PersistenceResult = std::result::Result /// Errors arising from loading, validating, or persisting Terraphim configuration. #[derive(Error, Debug)] pub enum TerraphimConfigError { + /// The configuration file could not be found. #[error("Unable to load config")] NotFound, + /// The configuration contains no roles, but at least one is required. #[error("At least one role is required")] NoRoles, + /// An error occurred while accessing a configuration profile. #[error("Profile error")] Profile(String), + /// An error propagated from the persistence layer. #[error("Persistence error")] Persistence(Box), + /// A JSON serialisation or deserialisation error occurred. #[error("Serde JSON error")] Json(#[from] serde_json::Error), + /// The tracing subscriber could not be initialised. #[error("Cannot initialize tracing subscriber")] TracingSubscriber(Box), + /// An error propagated from the rolegraph pipe. #[error("Pipe error")] Pipe(#[from] terraphim_rolegraph::Error), + /// An error propagated from the automata layer. #[error("Automata error")] Automata(#[from] terraphim_automata::TerraphimAutomataError), + /// A URL parsing error occurred. #[error("Url error")] Url(#[from] url::ParseError), + /// An I/O error occurred while reading or writing configuration data. #[error("IO error")] Io(#[from] std::io::Error), + /// A generic configuration validation error with a descriptive message. #[error("Config error")] Config(String), } @@ -222,13 +233,19 @@ fn default_context_window() -> Option { #[cfg_attr(feature = "typescript", derive(Tsify))] #[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))] pub struct Role { + /// Optional abbreviated identifier for this role. pub shortname: Option, + /// The display name of this role. pub name: RoleName, /// The relevance function used to rank search results pub relevance_function: RelevanceFunction, + /// Whether Terraphim knowledge-graph integration is active for this role. pub terraphim_it: bool, + /// The UI theme name applied when this role is active. pub theme: String, + /// Optional knowledge-graph configuration for this role. pub kg: Option, + /// The list of haystacks (data sources) searched by this role. pub haystacks: Vec, /// Enable AI-powered article summaries using LLM providers #[serde(default)] @@ -254,6 +271,7 @@ pub struct Role { /// Maximum tokens for LLM context window (default: 32768) #[serde(default = "default_context_window")] pub llm_context_window: Option, + /// Arbitrary key-value extension fields for provider-specific configuration. #[serde(flatten)] #[schemars(skip)] #[cfg_attr(feature = "typescript", tsify(type = "Record"))] @@ -504,7 +522,9 @@ pub struct KnowledgeGraph { pub automata_path: Option, /// Knowlege graph can be re-build from local files, for example Markdown files pub knowledge_graph_local: Option, + /// Whether this knowledge graph is publicly accessible. pub public: bool, + /// Whether this knowledge graph should be published to a remote store. pub publish: bool, } impl KnowledgeGraph { @@ -519,7 +539,9 @@ impl KnowledgeGraph { #[cfg_attr(feature = "typescript", derive(Tsify))] #[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))] pub struct KnowledgeGraphLocal { + /// The format of the input documents used to build the knowledge graph. pub input_type: KnowledgeGraphInputType, + /// The filesystem path to the directory or file containing the source documents. pub path: PathBuf, } /// Builder, which allows to create a new `Config` @@ -933,8 +955,11 @@ impl Default for ConfigBuilder { #[cfg_attr(feature = "typescript", derive(Tsify))] #[cfg_attr(feature = "typescript", tsify(into_wasm_abi, from_wasm_abi))] pub enum ConfigId { + /// Configuration deployed as a standalone background server process. Server, + /// Configuration deployed as a native desktop application. Desktop, + /// Configuration compiled into a library or WASM module. Embedded, } @@ -955,6 +980,7 @@ pub struct Config { pub roles: AHashMap, /// The default role to use if no role is specified pub default_role: RoleName, + /// The currently active role selected by the user. pub selected_role: RoleName, } @@ -1271,6 +1297,7 @@ impl ConfigState { config.default_role.clone() } + /// Get the currently selected role name from the config. pub async fn get_selected_role(&self) -> RoleName { let config = self.config.lock().await; config.selected_role.clone() diff --git a/crates/terraphim_config/src/project.rs b/crates/terraphim_config/src/project.rs index 57cca6007..0e208d07f 100644 --- a/crates/terraphim_config/src/project.rs +++ b/crates/terraphim_config/src/project.rs @@ -1,33 +1,47 @@ use std::path::{Path, PathBuf}; use thiserror::Error; +/// Errors that can occur while discovering or loading project-level configuration. #[derive(Error, Debug)] pub enum ProjectDiscoveryError { + /// An I/O error occurred while reading the filesystem. #[error("IO error: {0}")] Io(#[from] std::io::Error), + /// A JSON deserialisation error occurred while parsing a config file. #[error("JSON error: {0}")] Json(#[from] serde_json::Error), + /// The supplied path is not a directory. #[error("Not a directory: {0}")] NotDirectory(PathBuf), #[error( "multiple project roles found ({available:?}); pass --role or set selected/default role" )] - AmbiguousRole { available: Vec }, + /// Multiple roles exist and none is designated as selected or default. + AmbiguousRole { + /// The role names that were found, sorted alphabetically. + available: Vec, + }, } +/// Project-scoped configuration loaded from a `.terraphim/` directory. #[derive(Debug, Clone, Default, serde::Serialize, serde::Deserialize)] pub struct ProjectConfig { + /// Optional global keyboard shortcut to activate Terraphim from any window. #[serde(default)] pub global_shortcut: Option, + /// The name of the role to use when no explicit role is specified. #[serde(default)] pub default_role: Option, + /// The name of the role currently selected for this project. #[serde(default)] pub selected_role: Option, + /// Role definitions keyed by role name, merged from individual `role-*.json` files. #[serde(default)] pub roles: std::collections::HashMap, } impl ProjectConfig { + /// Load a `ProjectConfig` directly from a JSON file at the given path. pub fn from_file(path: &Path) -> Result { let content = std::fs::read_to_string(path)?; let config: ProjectConfig = serde_json::from_str(&content)?; @@ -68,10 +82,12 @@ impl ProjectConfig { Ok(config) } + /// Return `true` if no shortcut and no roles are configured. pub fn is_empty(&self) -> bool { self.global_shortcut.is_none() && self.roles.is_empty() } + /// Resolve the effective role name, preferring the explicit argument then selected/default fields. pub fn resolve_role_name( &self, explicit_role: Option<&str>, @@ -126,6 +142,7 @@ pub fn discover_kg_path(dir: &Path, role_name: Option<&str>) -> Option if kg_dir.is_dir() { Some(kg_dir) } else { None } } +/// Walk up the directory tree from `start_dir` to find the nearest `.terraphim/` directory. pub fn discover(start_dir: Option<&Path>) -> Result, ProjectDiscoveryError> { let start_dir = match start_dir { Some(d) => d.to_path_buf(), From 6fc8d7ed38c6fb362ff34acefea6006cbff0edaf Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:25:57 +0200 Subject: [PATCH 03/11] docs(#2035): add missing doc comments to terraphim_automata, terraphim_rlm, terraphim_grep Refs #2035 --- crates/terraphim_automata/src/autocomplete.rs | 18 +- crates/terraphim_automata/src/builder.rs | 36 +++- crates/terraphim_automata/src/evaluation.rs | 15 ++ crates/terraphim_automata/src/lib.rs | 5 + .../src/markdown_directives.rs | 7 + crates/terraphim_automata/src/matcher.rs | 9 + crates/terraphim_grep/src/error.rs | 9 + crates/terraphim_grep/src/hybrid_searcher.rs | 34 ++++ crates/terraphim_grep/src/kg_curation.rs | 4 + crates/terraphim_grep/src/lib.rs | 28 +++ crates/terraphim_grep/src/rlm_context.rs | 14 ++ crates/terraphim_grep/src/signatures.rs | 24 +++ .../terraphim_grep/src/sufficiency_judge.rs | 13 ++ crates/terraphim_rlm/src/error.rs | 167 +++++++++++++++--- crates/terraphim_rlm/src/executor/docker.rs | 3 + crates/terraphim_rlm/src/executor/local.rs | 3 + crates/terraphim_rlm/src/mcp_tools.rs | 21 +++ crates/terraphim_rlm/src/query_loop.rs | 10 +- crates/terraphim_rlm/src/session.rs | 3 + 19 files changed, 391 insertions(+), 32 deletions(-) diff --git a/crates/terraphim_automata/src/autocomplete.rs b/crates/terraphim_automata/src/autocomplete.rs index 84bf5b138..b3999074e 100644 --- a/crates/terraphim_automata/src/autocomplete.rs +++ b/crates/terraphim_automata/src/autocomplete.rs @@ -21,17 +21,25 @@ pub struct AutocompleteIndex { /// Metadata associated with each autocomplete term #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AutocompleteMetadata { + /// Unique numeric identifier for the normalised term. pub id: u64, + /// Normalised (canonical) form of the term. pub normalized_term: NormalizedTermValue, + /// Optional URL linking the term to an external resource. #[serde(default)] pub url: Option, + /// Original term string as it appears in the thesaurus source. pub original_term: String, + /// Optional action to perform when the term is selected. #[serde(default)] pub action: Option, + /// Optional priority hint for ranking results. #[serde(default)] pub priority: Option, + /// Optional trigger string that activates this autocomplete entry. #[serde(default)] pub trigger: Option, + /// Whether this term should always appear at the top of results. #[serde(default)] pub pinned: bool, } @@ -39,18 +47,26 @@ pub struct AutocompleteMetadata { /// Result from autocomplete search #[derive(Debug, Clone, PartialEq)] pub struct AutocompleteResult { + /// Display term returned to the caller. pub term: String, + /// Normalised form of the matched term. pub normalized_term: NormalizedTermValue, + /// Unique numeric identifier of the term. pub id: u64, + /// Optional URL associated with this term. pub url: Option, - pub score: f64, // FST value as relevance score + /// FST value used as a relevance score for ranking. + pub score: f64, } /// Configuration for autocomplete behavior #[derive(Debug, Clone)] pub struct AutocompleteConfig { + /// Maximum number of results to return from a single search. pub max_results: usize, + /// Minimum number of characters required before a search is performed. pub min_prefix_length: usize, + /// Whether the prefix search should be case-sensitive. pub case_sensitive: bool, } diff --git a/crates/terraphim_automata/src/builder.rs b/crates/terraphim_automata/src/builder.rs index 7e92cd0e9..3ba86e367 100644 --- a/crates/terraphim_automata/src/builder.rs +++ b/crates/terraphim_automata/src/builder.rs @@ -18,10 +18,13 @@ use terraphim_types::{Concept, NormalizedTerm, NormalizedTermValue, Thesaurus}; /// Errors that can occur while building a thesaurus or running ripgrep. #[derive(Error, Debug)] pub enum BuilderError { + /// An I/O error occurred while reading files or spawning processes. #[error("IO error")] Io(#[from] std::io::Error), + /// A JSON deserialisation error occurred while parsing ripgrep output. #[error("JSON error")] Json(#[from] serde_json::Error), + /// An error occurred during knowledge-graph indexation. #[error("Indexation error: {0}")] Indexation(String), } @@ -97,6 +100,7 @@ pub trait ThesaurusBuilder { const LOGSEQ_KEY_VALUE_DELIMITER: &str = "::"; const LOGSEQ_SYNONYMS_KEYWORD: &str = "synonyms"; +/// Thesaurus builder that extracts synonym definitions from Logseq markdown files. #[derive(Default)] pub struct Logseq { #[allow(dead_code)] @@ -140,6 +144,7 @@ impl ThesaurusBuilder for Logseq { } } +/// Wraps the `rg` (ripgrep) command used to scan Logseq markdown files for synonym lines. #[allow(dead_code)] pub struct LogseqService { command: String, @@ -284,19 +289,27 @@ fn concept_from_path(path: PathBuf) -> Result { }) } +/// A single JSON message emitted by ripgrep in `--json` mode. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] #[serde(tag = "type", content = "data")] #[serde(rename_all = "snake_case")] pub enum Message { + /// Signals the start of searching a new file. Begin(Begin), + /// Signals the end of searching a file. End(End), + /// A line in a file that matched the search pattern. Match(Match), + /// A surrounding context line near a match. Context(Context), + /// Aggregate statistics for the entire search run. Summary(Summary), } +/// Data accompanying a ripgrep `begin` message (start of file). #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Begin { + /// Path of the file being searched, if available. pub path: Option, } @@ -306,6 +319,7 @@ impl Begin { } } +/// Data accompanying a ripgrep `end` message (end of file). #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct End { path: Option, @@ -313,26 +327,34 @@ pub struct End { stats: Stats, } +/// Aggregate statistics emitted at the end of a ripgrep run. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Summary { elapsed_total: Duration, stats: Stats, } +/// Data accompanying a ripgrep `match` message (a matched line). #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Match { + /// Path of the file containing the match, if available. pub path: Option, + /// The full line content that produced the match. pub lines: Data, line_number: Option, absolute_offset: u64, + /// Specific byte-range sub-matches within the line. pub submatches: Vec, } impl Match {} +/// Data accompanying a ripgrep `context` message (a surrounding context line). #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct Context { + /// Path of the file containing the context line, if available. pub path: Option, + /// The full line content of this context line. pub lines: Data, line_number: Option, absolute_offset: u64, @@ -341,6 +363,7 @@ pub struct Context { impl Context {} +/// A single byte-range sub-match within a ripgrep `match` or `context` line. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] pub struct SubMatch { #[serde(rename = "match")] @@ -349,11 +372,20 @@ pub struct SubMatch { end: usize, } +/// The raw content of a ripgrep data field, either UTF-8 text or a base64-encoded byte string. #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] #[serde(untagged)] pub enum Data { - Text { text: String }, - Bytes { bytes: String }, + /// Valid UTF-8 text content. + Text { + /// The text string value. + text: String, + }, + /// Non-UTF-8 content encoded as a base64 string. + Bytes { + /// The base64-encoded byte string value. + bytes: String, + }, } fn as_path(data: &Option) -> Option { diff --git a/crates/terraphim_automata/src/evaluation.rs b/crates/terraphim_automata/src/evaluation.rs index c1117951f..95c88e646 100644 --- a/crates/terraphim_automata/src/evaluation.rs +++ b/crates/terraphim_automata/src/evaluation.rs @@ -35,35 +35,50 @@ pub struct ExpectedMatch { /// Precision, recall, and F1 metrics computed from true/false positive/negative counts. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ClassificationMetrics { + /// Fraction of predicted positives that are correct. pub precision: f64, + /// Fraction of actual positives that were predicted correctly. pub recall: f64, + /// Harmonic mean of precision and recall. pub f1: f64, + /// Number of correctly predicted positive instances. pub true_positives: usize, + /// Number of incorrectly predicted positive instances. pub false_positives: usize, + /// Number of actual positive instances that were missed. pub false_negatives: usize, } /// Metrics for a single term across all evaluated documents. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TermReport { + /// The term these metrics were computed for. pub term: String, + /// Classification metrics for this term. pub metrics: ClassificationMetrics, } /// Full evaluation result covering all documents and terms. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EvaluationResult { + /// Total number of documents evaluated. pub total_documents: usize, + /// Aggregate metrics across all documents and terms. pub overall: ClassificationMetrics, + /// Per-term breakdown of classification metrics. pub per_term: Vec, + /// Terms that repeatedly produce false positives across documents. pub systematic_errors: Vec, } /// A term that consistently appears as a false positive across multiple documents. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SystematicError { + /// The term that produces systematic false positives. pub term: String, + /// Number of documents in which this term was a false positive. pub false_positive_count: usize, + /// Identifiers of the documents where the false positive occurred. pub document_ids: Vec, } diff --git a/crates/terraphim_automata/src/lib.rs b/crates/terraphim_automata/src/lib.rs index 80c7abca3..c8004a3b4 100644 --- a/crates/terraphim_automata/src/lib.rs +++ b/crates/terraphim_automata/src/lib.rs @@ -106,10 +106,14 @@ //! See the [WASM package](wasm/) for browser usage. pub use self::builder::{Logseq, ThesaurusBuilder, compute_kg_source_hash}; +/// FST-backed autocomplete index and prefix/fuzzy search functions. pub mod autocomplete; +/// Thesaurus builder that parses Logseq markdown files via ripgrep JSON output. pub mod builder; pub mod evaluation; +/// Parser for `terraphim` directives embedded in markdown front-matter. pub mod markdown_directives; +/// Aho-Corasick text matcher and link-replacement utilities. pub mod matcher; pub mod url_protector; @@ -158,6 +162,7 @@ pub use umls::{UmlsConcept, UmlsDataset, UmlsStats}; pub use umls_extractor::{UmlsExtractor, UmlsExtractorStats, UmlsMatch}; // Re-export helpers for metadata iteration to support graph-embedding expansions in consumers +/// Helper functions for iterating and querying autocomplete index metadata. pub mod autocomplete_helpers { use super::autocomplete::{AutocompleteIndex, AutocompleteMetadata}; /// Iterates over all `(term, metadata)` pairs stored in the autocomplete index. diff --git a/crates/terraphim_automata/src/markdown_directives.rs b/crates/terraphim_automata/src/markdown_directives.rs index e30ac750a..12fcc7170 100644 --- a/crates/terraphim_automata/src/markdown_directives.rs +++ b/crates/terraphim_automata/src/markdown_directives.rs @@ -5,16 +5,23 @@ use std::path::{Path, PathBuf}; use terraphim_types::{DocumentType, MarkdownDirectives, RouteDirective}; use walkdir::WalkDir; +/// A non-fatal warning produced while parsing markdown directive files. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MarkdownDirectiveWarning { + /// Path of the file that triggered the warning. pub path: PathBuf, + /// Line number within the file, if available. pub line: Option, + /// Human-readable description of the warning. pub message: String, } +/// Combined output of parsing a directory of markdown directive files. #[derive(Debug, Clone, Default, PartialEq, Eq)] pub struct MarkdownDirectivesParseResult { + /// Map from file stem to the parsed directives found in that file. pub directives: HashMap, + /// Non-fatal warnings collected during parsing. pub warnings: Vec, } diff --git a/crates/terraphim_automata/src/matcher.rs b/crates/terraphim_automata/src/matcher.rs index 1f492e46c..be77959cf 100644 --- a/crates/terraphim_automata/src/matcher.rs +++ b/crates/terraphim_automata/src/matcher.rs @@ -5,10 +5,14 @@ use crate::url_protector::UrlProtector; use crate::{Result, TerraphimAutomataError}; +/// A single thesaurus term found within an input text. #[derive(Debug, PartialEq, Clone)] pub struct Matched { + /// The surface form of the term as it appears in the thesaurus key. pub term: String, + /// The normalised term entry associated with this match. pub normalized_term: NormalizedTerm, + /// Byte start and end positions within the source text, if requested. pub pos: Option<(usize, usize)>, } @@ -146,11 +150,16 @@ where thesaurus } +/// The format used when replacing matched terms with hyperlinks. #[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] pub enum LinkType { + /// Replace matches with `[[term]]` wiki-style links. WikiLinks, + /// Replace matches with `term` HTML links. HTMLLinks, + /// Replace matches with `[term](url)` Markdown links. MarkdownLinks, + /// Leave matched terms as plain text without any link markup. #[default] PlainText, } diff --git a/crates/terraphim_grep/src/error.rs b/crates/terraphim_grep/src/error.rs index 3480d2b55..7253bd00a 100644 --- a/crates/terraphim_grep/src/error.rs +++ b/crates/terraphim_grep/src/error.rs @@ -1,27 +1,36 @@ use std::time::Duration; +/// Errors produced by the terraphim_grep hybrid search and RLM pipeline. #[derive(Debug, thiserror::Error)] pub enum TerraphimGrepError { + /// The underlying search operation failed. #[error("search failed: {0}")] SearchFailed(String), + /// An LLM integration was required but not configured. #[error("LLM not configured: {0}")] LlmNotConfigured(String), + /// The search returned too few results to be considered useful. #[error("insufficient results: {0}")] InsufficientResults(String), + /// Knowledge graph curation via the RLM pipeline failed. #[error("KG curation failed: {0}")] KgCurationFailed(String), + /// Execution of the RLM model failed. #[error("RLM execution failed: {0}")] RlmFailed(String), + /// The operation exceeded the configured time limit. #[error("timeout after {0:?}")] Timeout(Duration), + /// The provided configuration was invalid. #[error("invalid configuration: {0}")] InvalidConfig(String), } +/// Convenience alias for `Result`. pub type Result = std::result::Result; diff --git a/crates/terraphim_grep/src/hybrid_searcher.rs b/crates/terraphim_grep/src/hybrid_searcher.rs index c6e56dee2..044b1a5ac 100644 --- a/crates/terraphim_grep/src/hybrid_searcher.rs +++ b/crates/terraphim_grep/src/hybrid_searcher.rs @@ -4,12 +4,18 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use terraphim_types::Document; +/// Options controlling a hybrid search operation. #[derive(Debug, Clone)] pub struct GrepOptions { + /// Which haystack (code, docs, or both) to search. pub haystack: Haystack, + /// Number of context lines to include around each match. pub context_lines: usize, + /// Maximum number of results to return. pub max_results: usize, + /// When true, always invoke the RLM synthesis pipeline regardless of sufficiency. pub force_rlm: bool, + /// When true, include an LLM-generated answer alongside raw chunks. pub include_answer: bool, } @@ -25,21 +31,32 @@ impl Default for GrepOptions { } } +/// Selects which haystack corpus to search. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum Haystack { + /// Search only code-related sources (the default). #[default] Code, + /// Search only documentation sources. Docs, + /// Search both code and documentation sources. All, } +/// A single chunk of text retrieved from a haystack source. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RetrievedChunk { + /// The text content of the retrieved chunk. pub content: String, + /// Identifier for the source file or URL this chunk came from. pub source: String, + /// First line number of this chunk within its source file, if known. pub line_start: Option, + /// Last line number of this chunk within its source file, if known. pub line_end: Option, + /// Relevance score used for ranking (higher is more relevant). pub relevance_score: f64, + /// Name of the haystack that produced this chunk (e.g. `"code"`, `"docs"`). pub haystack: &'static str, } @@ -56,22 +73,32 @@ impl From for RetrievedChunk { } } +/// A concept node matched from the knowledge graph. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct KgConcept { + /// Unique numeric identifier of this concept in the knowledge graph. pub id: u64, + /// Canonical name of the concept. pub name: String, + /// Optional human-readable display label for the concept. pub display_value: Option, + /// Relevance score for this concept relative to the query. pub score: f64, } +/// Combined results from the parallel code-search and documentation pipelines. #[derive(Debug, Clone)] pub struct HybridResults { + /// Chunks retrieved from code-search haystacks. pub code_results: Vec, + /// Chunks retrieved from documentation haystacks. pub doc_results: Vec, + /// Knowledge-graph concepts matched against the query. pub kg_concepts: Vec, } impl HybridResults { + /// Merge code and doc results into a single ordered list. pub fn to_chunks(&self) -> Vec { let mut chunks = Vec::with_capacity(self.code_results.len() + self.doc_results.len()); chunks.extend(self.code_results.clone()); @@ -79,10 +106,12 @@ impl HybridResults { chunks } + /// Return the total number of results across all pipelines. pub fn total_results(&self) -> usize { self.code_results.len() + self.doc_results.len() + self.kg_concepts.len() } + /// Return true if all result lists are empty. pub fn is_empty(&self) -> bool { self.code_results.is_empty() && self.doc_results.is_empty() && self.kg_concepts.is_empty() } @@ -150,6 +179,7 @@ pub fn boost_chunks_with_kg( chunks } +/// Runs parallel KG-concept and ripgrep searches and merges the results. pub struct HybridSearcher { role_graph: Arc>, /// Kept alongside the rolegraph so KG-style boosting still works when no documents @@ -161,6 +191,7 @@ pub struct HybridSearcher { } impl HybridSearcher { + /// Create a new searcher initialised with the given role name and thesaurus. pub fn new( role_name: String, thesaurus: terraphim_types::Thesaurus, @@ -177,11 +208,13 @@ impl HybridSearcher { }) } + /// Override the filesystem root used for ripgrep searches. pub fn with_search_path(mut self, path: PathBuf) -> Self { self.search_path = path; self } + /// Run the hybrid search and return merged results. pub async fn search( &self, query: &str, @@ -365,6 +398,7 @@ impl HybridSearcher { } } + /// Sort retrieved chunks by descending relevance score. pub fn fuse_and_rank(&self, mut results: Vec) -> Vec { results.sort_by(|a, b| { b.relevance_score diff --git a/crates/terraphim_grep/src/kg_curation.rs b/crates/terraphim_grep/src/kg_curation.rs index 95e66d156..88063d65b 100644 --- a/crates/terraphim_grep/src/kg_curation.rs +++ b/crates/terraphim_grep/src/kg_curation.rs @@ -8,6 +8,7 @@ use terraphim_service::llm::LlmClient; use crate::error::Result; use crate::signatures::NewConcept; +/// Extracts new knowledge-graph concepts from query-answer pairs via an LLM. #[cfg(feature = "llm")] pub struct KgCurationRlm { llm_client: Arc, @@ -16,6 +17,7 @@ pub struct KgCurationRlm { #[cfg(feature = "llm")] impl KgCurationRlm { + /// Create a new curation client backed by the given LLM. pub fn new(llm_client: Arc) -> Self { Self { llm_client, @@ -23,11 +25,13 @@ impl KgCurationRlm { } } + /// Set a filesystem path for storing extracted concepts. pub fn with_kg_path(mut self, path: std::path::PathBuf) -> Self { self.kg_path = Some(path); self } + /// Extract new concepts from a query-answer pair and return them for indexing. pub async fn extract_and_index( &self, query: &str, diff --git a/crates/terraphim_grep/src/lib.rs b/crates/terraphim_grep/src/lib.rs index 796d7176d..3e0746d36 100644 --- a/crates/terraphim_grep/src/lib.rs +++ b/crates/terraphim_grep/src/lib.rs @@ -20,11 +20,17 @@ //! # } //! ``` +/// Error types for the terraphim_grep crate. pub mod error; +/// Hybrid KG + ripgrep searcher and supporting types. pub mod hybrid_searcher; +/// Knowledge-graph curation via the RLM pipeline. pub mod kg_curation; +/// Context assembly for RLM prompts. pub mod rlm_context; +/// RLM signature traits and output structs. pub mod signatures; +/// Heuristic sufficiency judge for determining when RLM synthesis is needed. pub mod sufficiency_judge; use std::sync::Arc; @@ -39,30 +45,46 @@ pub use rlm_context::RlmContext; pub use signatures::{AnswerWithCitations, Citation, Match, NewConcept, RlmSignature}; pub use sufficiency_judge::{HeuristicThresholds, Sufficiency, SufficiencyJudge}; +/// The result of a complete hybrid grep operation. #[derive(Debug, Clone, serde::Serialize)] pub struct GrepResult { + /// Retrieved text chunks, ranked by relevance. pub chunks: Vec, + /// LLM-synthesised answer, if RLM synthesis was invoked. pub answer: Option, + /// Knowledge-graph concepts matched by the query. pub concepts: Vec, + /// Which synthesis path was taken. pub sufficiency: SufficiencyState, + /// Latency and count statistics for the operation. pub stats: GrepStats, } +/// Describes which synthesis pipeline produced the final result. #[derive(Debug, Clone, serde::Serialize)] pub enum SufficiencyState { + /// Search results were sufficient; no RLM synthesis was needed. SearchOnly, + /// RLM synthesis was invoked to summarise the retrieved chunks. RlmSynthesis, + /// RLM synthesis was attempted but deemed insufficient. RlmInsufficient, } +/// Timing and throughput statistics for a single grep operation. #[derive(Debug, Clone, serde::Serialize)] pub struct GrepStats { + /// Wall-clock time in milliseconds taken by the search phase. pub search_latency_ms: u64, + /// Wall-clock time in milliseconds taken by the RLM phase, if invoked. pub rlm_latency_ms: Option, + /// Total number of chunks returned to the caller. pub chunks_returned: usize, + /// Number of knowledge-graph concept hits. pub kg_hits: usize, } +/// Orchestrates hybrid search combining ripgrep, KG, and optional RLM synthesis. pub struct TerraphimGrep { hybrid_searcher: Arc, sufficiency_judge: Arc, @@ -74,6 +96,7 @@ pub struct TerraphimGrep { impl TerraphimGrep { #[cfg(feature = "llm")] + /// Create a new `TerraphimGrep` with the given searcher and sufficiency judge. pub fn new( hybrid_searcher: Arc, sufficiency_judge: Arc, @@ -87,6 +110,7 @@ impl TerraphimGrep { } #[cfg(not(feature = "llm"))] + /// Create a new `TerraphimGrep` with the given searcher and sufficiency judge. pub fn new( hybrid_searcher: Arc, sufficiency_judge: Arc, @@ -98,12 +122,14 @@ impl TerraphimGrep { } #[cfg(feature = "llm")] + /// Attach a KG curation module for post-query concept extraction. pub fn with_kg_curation(mut self, kg_curation: Arc) -> Self { self.kg_curation = Some(kg_curation); self } #[cfg(feature = "llm")] + /// Attach an LLM client for RLM synthesis. pub fn with_llm_client( mut self, llm_client: Arc, @@ -112,6 +138,7 @@ impl TerraphimGrep { self } + /// Execute a hybrid search and return ranked results, invoking RLM synthesis when needed. pub async fn search(&self, query: &str, options: GrepOptions) -> Result { let start = std::time::Instant::now(); @@ -315,6 +342,7 @@ impl TerraphimGrep { .await } + /// Return an empty `GrepStats` placeholder (used when stats are unavailable). pub fn stats(&self) -> GrepStats { GrepStats { search_latency_ms: 0, diff --git a/crates/terraphim_grep/src/rlm_context.rs b/crates/terraphim_grep/src/rlm_context.rs index 401ffa0c7..4b69412ba 100644 --- a/crates/terraphim_grep/src/rlm_context.rs +++ b/crates/terraphim_grep/src/rlm_context.rs @@ -2,21 +2,30 @@ use std::collections::HashMap; use super::hybrid_searcher::{KgConcept, RetrievedChunk}; +/// Assembled context for an RLM (Retrieval-Language Model) prompt. #[derive(Debug, Clone)] pub struct RlmContext { + /// The original user query. pub query: String, + /// Retrieved text chunks to include as context. pub retrieved_chunks: Vec, + /// Knowledge-graph concepts matched by the query. pub kg_concepts: Vec, + /// Metadata about each source document, keyed by source identifier. pub source_metadata: HashMap, } +/// Metadata describing a single source document. #[derive(Debug, Clone)] pub struct DocumentMetadata { + /// Type of source (e.g. `"code"`, `"docs"`). pub source_type: String, + /// ISO 8601 timestamp of the last modification, if available. pub last_modified: Option, } impl RlmContext { + /// Create an empty context for the given query. pub fn new(query: String) -> Self { Self { query, @@ -26,6 +35,7 @@ impl RlmContext { } } + /// Add retrieved chunks to the context and update source metadata. pub fn with_chunks(mut self, chunks: Vec) -> Self { self.retrieved_chunks = chunks; for chunk in &self.retrieved_chunks { @@ -40,11 +50,13 @@ impl RlmContext { self } + /// Add knowledge-graph concepts to the context. pub fn with_concepts(mut self, concepts: Vec) -> Self { self.kg_concepts = concepts; self } + /// Render the context into a formatted prompt string for the RLM. pub fn build_prompt(&self) -> String { let mut prompt = format!("Query: {}\n\n", self.query); @@ -75,10 +87,12 @@ impl RlmContext { prompt } + /// Return the character length of the rendered prompt. pub fn context_length(&self) -> usize { self.build_prompt().len() } + /// Trim retrieved chunks so that the rendered prompt fits within `max_chars`. pub fn truncate(&mut self, max_chars: usize) { if self.context_length() > max_chars { let mut remaining = max_chars; diff --git a/crates/terraphim_grep/src/signatures.rs b/crates/terraphim_grep/src/signatures.rs index 9126b4dfa..6b0fcb750 100644 --- a/crates/terraphim_grep/src/signatures.rs +++ b/crates/terraphim_grep/src/signatures.rs @@ -2,23 +2,33 @@ use serde::{Deserialize, Serialize}; use crate::error::TerraphimGrepError; +/// A typed RLM prompt signature that knows how to format its instructions and parse its output. pub trait RlmSignature: Send + Sync { + /// The structured output type returned by this signature. type Output: serde::Serialize + serde::de::DeserializeOwned; + /// Return the natural-language instructions to include in the prompt. fn instructions(&self) -> String; + /// Parse the raw RLM text output into the structured `Output` type. fn parse(&self, raw: &str) -> Result; } +/// A single search match returned by the RLM search-result signature. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Match { + /// Path of the matched file. pub path: String, + /// Line number of the match (1-based). pub line: usize, + /// Optional end line number for a multi-line match. #[serde(skip_serializing_if = "Option::is_none", default)] pub line_end: Option, + /// Surrounding context lines. #[serde(skip_serializing_if = "Vec::is_empty", default)] pub context: Vec, } +/// RLM signature that asks for a list of file-match locations. pub struct SearchResultSignature; impl RlmSignature for SearchResultSignature { @@ -35,21 +45,30 @@ impl RlmSignature for SearchResultSignature { } } +/// A source citation accompanying an RLM-synthesised answer. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Citation { + /// Source identifier (file path or URL). pub source: String, + /// Line number within the source, if known. #[serde(skip_serializing_if = "Option::is_none")] pub line: Option, + /// The quoted excerpt from the source. pub excerpt: String, } +/// An LLM-synthesised answer with supporting citations. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct AnswerWithCitations { + /// The synthesised answer text. pub answer: String, + /// Sources cited in the answer. pub citations: Vec, + /// Model confidence in the answer, in the range [0, 1]. pub confidence: f64, } +/// RLM signature that asks for a synthesised answer with citations. pub struct AnswerSignature; impl RlmSignature for AnswerSignature { @@ -69,15 +88,20 @@ impl RlmSignature for AnswerSignature { } } +/// A new domain concept extracted by the RLM from a query-answer pair. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NewConcept { + /// Canonical name of the extracted concept. pub name: String, + /// Alternative names or aliases for this concept. #[serde(default)] pub synonyms: Vec, + /// Related concept names (free-form). #[serde(default)] pub relationships: Vec, } +/// RLM signature that extracts new domain concepts from a query-answer pair. pub struct ConceptExtractionSignature; impl RlmSignature for ConceptExtractionSignature { diff --git a/crates/terraphim_grep/src/sufficiency_judge.rs b/crates/terraphim_grep/src/sufficiency_judge.rs index ea95d869f..97b789b80 100644 --- a/crates/terraphim_grep/src/sufficiency_judge.rs +++ b/crates/terraphim_grep/src/sufficiency_judge.rs @@ -1,10 +1,15 @@ use super::hybrid_searcher::{HybridResults, RetrievedChunk}; +/// Thresholds used by [`SufficiencyJudge`] to decide whether results are adequate. #[derive(Debug, Clone)] pub struct HeuristicThresholds { + /// Minimum query-term coverage ratio (0.0 – 1.0) required for sufficiency. pub min_coverage: f64, + /// Minimum mean KG concept confidence required for sufficiency. pub min_kg_confidence: f64, + /// Minimum number of distinct sources required for diversity. pub min_diversity: usize, + /// Minimum number of chunks required before results are considered sufficient. pub min_results: usize, } @@ -19,23 +24,31 @@ impl Default for HeuristicThresholds { } } +/// The sufficiency verdict returned by [`SufficiencyJudge::judge`]. #[derive(Debug, Clone)] pub enum Sufficiency { + /// Results meet all thresholds; no RLM synthesis is needed. Sufficient(Vec), + /// Results partially meet thresholds; RLM synthesis should be invoked. NeedsSynthesis(Vec), + /// Too few results; additional expansion via RLM is needed. NeedsExpansion(Vec), + /// Results are entirely insufficient to answer the query. Insufficient(Vec), } +/// Judges whether a set of hybrid search results is sufficient to answer a query. pub struct SufficiencyJudge { thresholds: HeuristicThresholds, } impl SufficiencyJudge { + /// Create a judge with the given heuristic thresholds. pub fn new(thresholds: HeuristicThresholds) -> Self { Self { thresholds } } + /// Evaluate whether `results` sufficiently answers `query`. pub fn judge(&self, results: &HybridResults, query: &str) -> Sufficiency { let chunks = results.to_chunks(); diff --git a/crates/terraphim_rlm/src/error.rs b/crates/terraphim_rlm/src/error.rs index 85d558d76..5306211a2 100644 --- a/crates/terraphim_rlm/src/error.rs +++ b/crates/terraphim_rlm/src/error.rs @@ -12,125 +12,208 @@ pub enum RlmError { // Session errors /// Session not found. #[error("Session not found: {session_id}")] - SessionNotFound { session_id: SessionId }, + SessionNotFound { + /// Identifier of the session that was not found. + session_id: SessionId, + }, /// Session has expired. #[error("Session expired: {session_id}")] - SessionExpired { session_id: SessionId }, + SessionExpired { + /// Identifier of the session that expired. + session_id: SessionId, + }, /// Session is in an invalid state for the requested operation. #[error("Session {session_id} is in invalid state {state} for operation {operation}")] InvalidSessionState { + /// Identifier of the session in an invalid state. session_id: SessionId, + /// Current state of the session. state: String, + /// Operation that was attempted. operation: String, }, /// Maximum session extensions reached. #[error("Session {session_id} has reached maximum extensions ({max})")] - MaxExtensionsReached { session_id: SessionId, max: u32 }, + MaxExtensionsReached { + /// Identifier of the session that reached the extension limit. + session_id: SessionId, + /// Maximum number of extensions allowed. + max: u32, + }, // Budget errors /// Token budget exceeded. #[error("Token budget exceeded: used {used} of {budget} tokens")] - TokenBudgetExceeded { used: u64, budget: u64 }, + TokenBudgetExceeded { + /// Number of tokens consumed. + used: u64, + /// Token budget limit. + budget: u64, + }, /// Time budget exceeded. #[error("Time budget exceeded: used {used_ms}ms of {budget_ms}ms")] - TimeBudgetExceeded { used_ms: u64, budget_ms: u64 }, + TimeBudgetExceeded { + /// Elapsed time in milliseconds. + used_ms: u64, + /// Time budget limit in milliseconds. + budget_ms: u64, + }, /// Recursion depth limit exceeded. #[error("Recursion depth limit exceeded: {depth} >= {max_depth}")] - RecursionDepthExceeded { depth: u32, max_depth: u32 }, + RecursionDepthExceeded { + /// Current recursion depth reached. + depth: u32, + /// Maximum recursion depth allowed. + max_depth: u32, + }, // Execution errors /// Code execution failed. #[error("Code execution failed: {message}")] ExecutionFailed { + /// Human-readable description of the failure. message: String, + /// Exit code returned by the process, if available. exit_code: Option, + /// Standard output captured before failure, if any. stdout: Option, + /// Standard error captured before failure, if any. stderr: Option, }, /// Command parsing failed. #[error("Failed to parse command from LLM output: {message}")] - CommandParseFailed { message: String }, + CommandParseFailed { + /// Description of the parse failure. + message: String, + }, /// Execution timed out. #[error("Execution timed out after {timeout_ms}ms")] - ExecutionTimeout { timeout_ms: u64 }, + ExecutionTimeout { + /// Configured timeout in milliseconds that was exceeded. + timeout_ms: u64, + }, /// VM crashed or became unresponsive. #[error("VM crashed: {message}")] - VmCrashed { message: String }, + VmCrashed { + /// Description of the crash or unresponsiveness. + message: String, + }, // Validation errors /// Knowledge graph validation failed. #[error("KG validation failed: unknown terms {unknown_terms:?}")] - KgValidationFailed { unknown_terms: Vec }, + KgValidationFailed { + /// Terms not found in the knowledge graph. + unknown_terms: Vec, + }, /// KG validation requires user escalation. #[error("KG validation requires user approval for terms: {unknown_terms:?}")] KgEscalationRequired { + /// Terms requiring user approval. unknown_terms: Vec, + /// Recommended action for the user to take. suggested_action: String, + /// Context in which the unknown terms were encountered. context: String, }, // Snapshot errors /// Snapshot not found. #[error("Snapshot not found: {snapshot_id}")] - SnapshotNotFound { snapshot_id: String }, + SnapshotNotFound { + /// Identifier of the snapshot that was not found. + snapshot_id: String, + }, /// Maximum snapshots per session reached. #[error("Maximum snapshots ({max}) reached for session")] - MaxSnapshotsReached { max: u32 }, + MaxSnapshotsReached { + /// Maximum number of snapshots allowed per session. + max: u32, + }, /// Snapshot creation failed. #[error("Failed to create snapshot: {message}")] - SnapshotCreationFailed { message: String }, + SnapshotCreationFailed { + /// Description of why snapshot creation failed. + message: String, + }, /// Snapshot restoration failed. #[error("Failed to restore snapshot: {message}")] - SnapshotRestoreFailed { message: String }, + SnapshotRestoreFailed { + /// Description of why snapshot restoration failed. + message: String, + }, // Backend errors /// No execution backend available. #[error("No execution backend available. Tried: {tried:?}")] - NoBackendAvailable { tried: Vec }, + NoBackendAvailable { + /// Names of backends that were attempted. + tried: Vec, + }, /// Backend initialization failed. #[error("Failed to initialize {backend} backend: {message}")] - BackendInitFailed { backend: String, message: String }, + BackendInitFailed { + /// Name of the backend that failed to initialise. + backend: String, + /// Description of the initialisation failure. + message: String, + }, /// VM pool exhausted (all VMs busy, no overflow capacity). #[error( "VM pool exhausted: all {pool_size} VMs busy, overflow at capacity ({overflow_count}/{max_overflow})" )] PoolExhausted { + /// Total number of VMs in the pool. pool_size: u32, + /// Number of overflow VMs currently in use. overflow_count: u32, + /// Maximum number of overflow VMs allowed. max_overflow: u32, }, /// VM allocation timed out. #[error("VM allocation timed out after {timeout_ms}ms")] - VmAllocationTimeout { timeout_ms: u64 }, + VmAllocationTimeout { + /// Allocation timeout in milliseconds that was exceeded. + timeout_ms: u64, + }, // Network/DNS errors /// DNS query blocked by allowlist. #[error("DNS query blocked: {domain} not in allowlist")] - DnsBlocked { domain: String }, + DnsBlocked { + /// Domain name that was blocked. + domain: String, + }, /// Network request blocked. #[error("Network request blocked: {url}")] - NetworkBlocked { url: String }, + NetworkBlocked { + /// URL that was blocked. + url: String, + }, // LLM errors /// LLM call failed. #[error("LLM call failed: {message}")] - LlmCallFailed { message: String }, + LlmCallFailed { + /// Description of why the LLM call failed. + message: String, + }, /// No LLM client configured. Enable the `llm` feature and set an API key /// or run a local Ollama instance. @@ -145,42 +228,72 @@ pub enum RlmError { /// Invalid session token format. #[error("Invalid session token: {token}")] - InvalidSessionToken { token: String }, + InvalidSessionToken { + /// The malformed session token. + token: String, + }, /// Batch query size too large. #[error("Batch size {size} exceeds maximum {max}")] - BatchSizeTooLarge { size: usize, max: usize }, + BatchSizeTooLarge { + /// Requested batch size. + size: usize, + /// Maximum permitted batch size. + max: usize, + }, // Output errors /// Output too large for inline return. #[error("Output exceeds inline limit ({size} > {limit} bytes), streamed to {file_path}")] OutputTooLarge { + /// Actual output size in bytes. size: u64, + /// Inline size limit in bytes. limit: u64, + /// Path to the file where the output was streamed. file_path: String, }, // Operations errors /// Auto-remediation failed. #[error("Auto-remediation failed after {attempts} attempts: {message}")] - AutoRemediationFailed { attempts: u32, message: String }, + AutoRemediationFailed { + /// Number of remediation attempts made. + attempts: u32, + /// Description of the final failure. + message: String, + }, /// Alert webhook failed. #[error("Failed to send alert to webhook: {message}")] - AlertWebhookFailed { message: String }, + AlertWebhookFailed { + /// Description of why the webhook call failed. + message: String, + }, // Generic errors /// Configuration error. #[error("Configuration error: {message}")] - ConfigError { message: String }, + ConfigError { + /// Description of the configuration problem. + message: String, + }, /// Operation is not supported by the selected backend. #[error("Backend '{backend}' does not support operation '{op}'")] - NotSupported { backend: String, op: String }, + NotSupported { + /// Name of the backend that does not support the operation. + backend: String, + /// Name of the unsupported operation. + op: String, + }, /// Internal error. #[error("Internal error: {message}")] - Internal { message: String }, + Internal { + /// Description of the internal error. + message: String, + }, /// Cancelled by user or parent. #[error("Operation cancelled")] diff --git a/crates/terraphim_rlm/src/executor/docker.rs b/crates/terraphim_rlm/src/executor/docker.rs index f58e48025..4fbe3c069 100644 --- a/crates/terraphim_rlm/src/executor/docker.rs +++ b/crates/terraphim_rlm/src/executor/docker.rs @@ -41,6 +41,7 @@ const DEFAULT_MEMORY_BYTES: i64 = 512 * 1024 * 1024; /// Default container PIDs limit. const DEFAULT_PIDS_LIMIT: i64 = 256; +/// Docker-based execution backend that isolates each session in a container. pub struct DockerExecutor { docker: Docker, /// Per-session container map. Each entry holds a `Mutex>`: @@ -82,6 +83,7 @@ fn unsupported(op: &'static str) -> RlmError { } impl DockerExecutor { + /// Create a new `DockerExecutor` connecting to the local Docker daemon. pub fn new(_config: RlmConfig) -> Result { let docker = Docker::connect_with_local_defaults().map_err(|e| RlmError::BackendInitFailed { @@ -108,6 +110,7 @@ impl DockerExecutor { }) } + /// Create a new `DockerExecutor` using the specified container image. pub fn with_image(config: RlmConfig, image: &str) -> Result { let mut executor = Self::new(config)?; executor.image = image.to_string(); diff --git a/crates/terraphim_rlm/src/executor/local.rs b/crates/terraphim_rlm/src/executor/local.rs index 7a7f2db2d..c5339636f 100644 --- a/crates/terraphim_rlm/src/executor/local.rs +++ b/crates/terraphim_rlm/src/executor/local.rs @@ -29,17 +29,20 @@ use crate::types::SessionId; const BACKEND_NAME: &str = "local"; +/// Local process executor that runs code and commands directly on the host. pub struct LocalExecutor { python_path: String, } impl LocalExecutor { + /// Create a new `LocalExecutor` using the default `python3` interpreter. pub fn new() -> Self { Self { python_path: "python3".to_string(), } } + /// Override the Python interpreter path used for code execution. pub fn with_python(mut self, path: impl Into) -> Self { self.python_path = path.into(); self diff --git a/crates/terraphim_rlm/src/mcp_tools.rs b/crates/terraphim_rlm/src/mcp_tools.rs index 65c067f01..55715d30e 100644 --- a/crates/terraphim_rlm/src/mcp_tools.rs +++ b/crates/terraphim_rlm/src/mcp_tools.rs @@ -774,39 +774,56 @@ impl Default for RlmMcpService { /// Response from rlm_code tool. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RlmCodeResponse { + /// Standard output produced by the executed code. pub stdout: String, + /// Standard error produced by the executed code. pub stderr: String, + /// Exit code returned by the process. pub exit_code: i32, + /// Wall-clock execution time in milliseconds. pub execution_time_ms: u64, + /// Whether the execution completed successfully (exit code 0). pub success: bool, } /// Response from rlm_bash tool. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RlmBashResponse { + /// Standard output produced by the executed command. pub stdout: String, + /// Standard error produced by the executed command. pub stderr: String, + /// Exit code returned by the process. pub exit_code: i32, + /// Wall-clock execution time in milliseconds. pub execution_time_ms: u64, + /// Whether the execution completed successfully (exit code 0). pub success: bool, } /// Response from rlm_query tool. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RlmQueryResponse { + /// Text response returned by the LLM. pub response: String, + /// Number of tokens consumed by the query. pub tokens_used: u64, + /// Identifier of the model that handled the query. pub model: String, } /// Response from rlm_context tool. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RlmContextResponse { + /// Action that was performed on the context store. pub action: String, + /// Key involved in the action, if applicable. #[serde(skip_serializing_if = "Option::is_none")] pub key: Option, + /// Value retrieved or stored, if applicable. #[serde(skip_serializing_if = "Option::is_none")] pub value: Option, + /// Full map of context variables, returned for list actions. #[serde(skip_serializing_if = "Option::is_none")] pub variables: Option>, } @@ -814,11 +831,15 @@ pub struct RlmContextResponse { /// Response from rlm_snapshot tool. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct RlmSnapshotResponse { + /// Snapshot action that was performed. pub action: String, + /// Name of the snapshot, if provided. #[serde(skip_serializing_if = "Option::is_none")] pub snapshot_name: Option, + /// Identifier of the snapshot created or restored. #[serde(skip_serializing_if = "Option::is_none")] pub snapshot_id: Option, + /// List of available snapshot identifiers, returned for list actions. #[serde(skip_serializing_if = "Option::is_none")] pub snapshots: Option>, } diff --git a/crates/terraphim_rlm/src/query_loop.rs b/crates/terraphim_rlm/src/query_loop.rs index 54356c5b0..ce6854332 100644 --- a/crates/terraphim_rlm/src/query_loop.rs +++ b/crates/terraphim_rlm/src/query_loop.rs @@ -49,7 +49,10 @@ pub enum TerminationReason { /// FINAL command was executed. FinalReached, /// FINAL_VAR command was executed. - FinalVarReached { variable: String }, + FinalVarReached { + /// Name of the variable returned as the final result. + variable: String, + }, /// Token budget exhausted. TokenBudgetExhausted, /// Time budget exhausted. @@ -59,7 +62,10 @@ pub enum TerminationReason { /// Maximum recursion depth reached. RecursionDepthExhausted, /// Error occurred during execution. - Error { message: String }, + Error { + /// Description of the error that caused termination. + message: String, + }, /// Cancelled by user. Cancelled, } diff --git a/crates/terraphim_rlm/src/session.rs b/crates/terraphim_rlm/src/session.rs index 777a6d16f..b9f69961f 100644 --- a/crates/terraphim_rlm/src/session.rs +++ b/crates/terraphim_rlm/src/session.rs @@ -463,8 +463,11 @@ impl SessionManager { /// Session statistics. #[derive(Debug, Clone)] pub struct SessionStats { + /// Total number of sessions created since the manager started. pub total_sessions_created: u32, + /// Number of currently active (non-expired) sessions. pub active_sessions: u32, + /// Number of active sessions that have an associated VM. pub sessions_with_vm: u32, } From 7a71f2e47886afc68876f00fbcab80d3675f3b86 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:30:58 +0200 Subject: [PATCH 04/11] docs(#2035): add missing doc comments to terraphim_service Refs #2035 Add /// doc comments to all public items in terraphim_service that were flagged by RUSTDOCFLAGS="-W missing-docs": modules, enum variants, struct fields, trait methods, type aliases, and builder methods. Zero warnings remain after this change. Co-Authored-By: Claude Sonnet 4.6 --- .../src/conversation_service.rs | 5 +++ crates/terraphim_service/src/error.rs | 38 ++++++++++++++++++- crates/terraphim_service/src/lib.rs | 24 +++++++++--- crates/terraphim_service/src/llm.rs | 8 ++++ crates/terraphim_service/src/llm_proxy.rs | 30 +++++++++++++-- crates/terraphim_service/src/logging.rs | 5 ++- .../src/summarization_queue.rs | 30 +++++++++++++++ 7 files changed, 129 insertions(+), 11 deletions(-) diff --git a/crates/terraphim_service/src/conversation_service.rs b/crates/terraphim_service/src/conversation_service.rs index 55f8a97e5..245ae7d7a 100644 --- a/crates/terraphim_service/src/conversation_service.rs +++ b/crates/terraphim_service/src/conversation_service.rs @@ -29,10 +29,15 @@ pub struct ConversationFilter { /// Statistics about conversations #[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] pub struct ConversationStatistics { + /// Total number of conversations stored. pub total_conversations: usize, + /// Total number of messages across all conversations. pub total_messages: usize, + /// Total number of context items across all conversations. pub total_context_items: usize, + /// Number of conversations grouped by role name. pub conversations_by_role: std::collections::HashMap, + /// Average number of messages per conversation. pub average_messages_per_conversation: f64, } diff --git a/crates/terraphim_service/src/error.rs b/crates/terraphim_service/src/error.rs index 23790e0ce..1129cc3fd 100644 --- a/crates/terraphim_service/src/error.rs +++ b/crates/terraphim_service/src/error.rs @@ -45,47 +45,70 @@ pub enum ErrorCategory { /// Common error patterns used across terraphim crates #[derive(Error, Debug)] pub enum CommonError { + /// A network-level failure (timeout, connection refused, etc.). #[error("Network error: {message}")] Network { + /// Human-readable description of the network failure. message: String, #[source] + /// Optional underlying error that caused this failure. source: Option>, }, + /// Invalid or missing configuration value. #[error("Configuration error: {message}")] Configuration { + /// Human-readable description of the configuration problem. message: String, + /// Optional name of the specific configuration field that is invalid. field: Option, }, + /// Input failed validation checks. #[error("Validation error: {message}")] Validation { + /// Human-readable description of the validation failure. message: String, + /// Optional name of the field that failed validation. field: Option, }, + /// Authentication or authorisation failure. #[error("Authentication error: {message}")] - Auth { message: String }, + Auth { + /// Human-readable description of the authentication failure. + message: String, + }, + /// Storage or persistence failure. #[error("Storage error: {message}")] Storage { + /// Human-readable description of the storage failure. message: String, #[source] + /// Optional underlying error that caused this failure. source: Option>, }, + /// Failure when interacting with an external service. #[error("Integration error with {service}: {message}")] Integration { + /// Name of the external service that returned an error. service: String, + /// Human-readable description of the integration failure. message: String, #[source] + /// Optional underlying error that caused this failure. source: Option>, }, + /// Internal system-level failure. #[error("System error: {message}")] System { + /// Human-readable description of the system failure. message: String, #[source] + /// Optional underlying error that caused this failure. source: Option>, }, } @@ -113,6 +136,7 @@ impl TerraphimError for CommonError { /// Helper functions for creating common error types impl CommonError { + /// Create a network error with the given message and no source error. pub fn network(message: impl Into) -> Self { CommonError::Network { message: message.into(), @@ -120,6 +144,7 @@ impl CommonError { } } + /// Create a network error with an underlying source error. pub fn network_with_source( message: impl Into, source: impl std::error::Error + Send + Sync + 'static, @@ -130,6 +155,7 @@ impl CommonError { } } + /// Create a configuration error with the given message. pub fn config(message: impl Into) -> Self { CommonError::Configuration { message: message.into(), @@ -137,6 +163,7 @@ impl CommonError { } } + /// Create a configuration error referencing a specific field. pub fn config_field(message: impl Into, field: impl Into) -> Self { CommonError::Configuration { message: message.into(), @@ -144,6 +171,7 @@ impl CommonError { } } + /// Create a validation error with the given message. pub fn validation(message: impl Into) -> Self { CommonError::Validation { message: message.into(), @@ -151,6 +179,7 @@ impl CommonError { } } + /// Create a validation error referencing a specific field. pub fn validation_field(message: impl Into, field: impl Into) -> Self { CommonError::Validation { message: message.into(), @@ -158,12 +187,14 @@ impl CommonError { } } + /// Create an authentication error with the given message. pub fn auth(message: impl Into) -> Self { CommonError::Auth { message: message.into(), } } + /// Create a storage error with the given message and no source error. pub fn storage(message: impl Into) -> Self { CommonError::Storage { message: message.into(), @@ -171,6 +202,7 @@ impl CommonError { } } + /// Create a storage error with an underlying source error. pub fn storage_with_source( message: impl Into, source: impl std::error::Error + Send + Sync + 'static, @@ -181,6 +213,7 @@ impl CommonError { } } + /// Create an integration error for the named external service. pub fn integration(service: impl Into, message: impl Into) -> Self { CommonError::Integration { service: service.into(), @@ -189,6 +222,7 @@ impl CommonError { } } + /// Create an integration error with an underlying source error. pub fn integration_with_source( service: impl Into, message: impl Into, @@ -201,6 +235,7 @@ impl CommonError { } } + /// Create a system error with the given message and no source error. pub fn system(message: impl Into) -> Self { CommonError::System { message: message.into(), @@ -208,6 +243,7 @@ impl CommonError { } } + /// Create a system error with an underlying source error. pub fn system_with_source( message: impl Into, source: impl std::error::Error + Send + Sync + 'static, diff --git a/crates/terraphim_service/src/lib.rs b/crates/terraphim_service/src/lib.rs index 626871298..e60dd24ea 100644 --- a/crates/terraphim_service/src/lib.rs +++ b/crates/terraphim_service/src/lib.rs @@ -27,7 +27,7 @@ pub use auto_route::{ #[cfg(feature = "openrouter")] pub mod openrouter; -// Generic LLM layer for multiple providers (OpenRouter, Ollama, etc.) +/// Generic LLM layer for multiple providers (OpenRouter, Ollama, etc.). pub mod llm; // LLM proxy service for unified provider management @@ -35,27 +35,32 @@ pub mod llm; // LLM Proxy service\npub mod proxy_client; // LLM Router configuration integration\n +/// Unified LLM proxy service for managing multiple provider endpoints. pub mod llm_proxy; // LLM Router configuration integration\n -// Centralized HTTP client creation and configuration +/// Centralised HTTP client creation and configuration. pub mod http_client; -// Standardized logging initialization utilities +/// Standardised logging initialisation utilities. pub mod logging; -// Summarization queue system for production-ready async processing +/// Conversation management service. pub mod conversation_service; +/// Rate-limiter utilities for LLM and API calls. pub mod rate_limiter; +/// Manager coordinating summarisation tasks and their lifecycle. pub mod summarization_manager; +/// Async summarisation queue with priority scheduling. pub mod summarization_queue; +/// Background worker that processes summarisation tasks from the queue. pub mod summarization_worker; -// Centralized error handling patterns and utilities +/// Centralised error handling patterns and utilities. pub mod error; -// Context management for LLM conversations +/// Context management for LLM conversations. pub mod context; #[cfg(test)] @@ -72,22 +77,28 @@ fn normalize_filename_to_id(filename: &str) -> String { /// Top-level error type for the Terraphim service layer. #[derive(thiserror::Error, Debug)] pub enum ServiceError { + /// Error originating from the middleware layer. #[error("Middleware error: {0}")] Middleware(#[from] terraphim_middleware::Error), + /// Error from the OpenDAL storage abstraction layer. #[error("OpenDal error: {0}")] OpenDal(Box), + /// Error from the persistence layer. #[error("Persistence error: {0}")] Persistence(#[from] terraphim_persistence::Error), + /// Configuration error with a descriptive message. #[error("Config error: {0}")] Config(String), #[cfg(feature = "openrouter")] + /// Error from the OpenRouter LLM provider. #[error("OpenRouter error: {0}")] OpenRouter(#[from] crate::openrouter::OpenRouterError), + /// Error from a common shared error type. #[error("Common error: {0}")] Common(#[from] crate::error::CommonError), } @@ -125,6 +136,7 @@ impl crate::error::TerraphimError for ServiceError { } } +/// Convenience alias for results returned by the service layer. pub type Result = std::result::Result; /// Main entry point for search, indexing, and AI operations in Terraphim. diff --git a/crates/terraphim_service/src/llm.rs b/crates/terraphim_service/src/llm.rs index a5bf9451f..998d71b10 100644 --- a/crates/terraphim_service/src/llm.rs +++ b/crates/terraphim_service/src/llm.rs @@ -17,22 +17,30 @@ mod router_config; use crate::Result as ServiceResult; +/// Options controlling how content is summarised by an LLM. #[derive(Clone, Debug)] pub struct SummarizeOptions { + /// Maximum character length for the generated summary. pub max_length: usize, } #[allow(dead_code)] +/// Options for chat-completion requests sent to an LLM. #[derive(Clone, Debug)] pub struct ChatOptions { + /// Maximum number of tokens the model may generate. pub max_tokens: Option, + /// Sampling temperature controlling response randomness. pub temperature: Option, } +/// Abstraction over different LLM providers for summarisation and chat. #[async_trait::async_trait] pub trait LlmClient: Send + Sync { + /// Return the human-readable name of this LLM provider. fn name(&self) -> &'static str; + /// Summarise the provided content according to the given options. async fn summarize(&self, content: &str, opts: SummarizeOptions) -> ServiceResult; /// List available models for this provider (best-effort) diff --git a/crates/terraphim_service/src/llm_proxy.rs b/crates/terraphim_service/src/llm_proxy.rs index ad9e6ae7a..e7155d58a 100644 --- a/crates/terraphim_service/src/llm_proxy.rs +++ b/crates/terraphim_service/src/llm_proxy.rs @@ -10,24 +10,40 @@ use std::env; use std::time::Duration; use thiserror::Error; +/// Errors that can occur within the LLM proxy service. #[derive(Error, Debug)] pub enum LlmProxyError { + /// The proxy configuration is invalid or missing required values. #[error("Invalid configuration: {0}")] ConfigError(String), + /// A network-level failure occurred while contacting a provider. #[error("Network error: {0}")] NetworkError(String), + /// Authentication with the specified provider failed. #[error("Authentication failed for provider: {provider}")] - AuthError { provider: String }, + AuthError { + /// Name of the provider for which authentication failed. + provider: String, + }, + /// The specified provider is currently rate-limiting requests. #[error("Rate limit exceeded for provider: {provider}")] - RateLimitError { provider: String }, + RateLimitError { + /// Name of the provider that returned a rate-limit response. + provider: String, + }, + /// The specified provider is not supported by this proxy. #[error("Provider not supported: {provider}")] - UnsupportedProvider { provider: String }, + UnsupportedProvider { + /// Name of the unsupported provider. + provider: String, + }, } +/// Convenience alias for results returned by the LLM proxy module. pub type Result = std::result::Result; /// Configuration for LLM proxy settings @@ -35,12 +51,19 @@ pub type Result = std::result::Result; /// `api_key` is redacted in `Debug` output to prevent credential leakage in logs. #[derive(Clone)] pub struct ProxyConfig { + /// Name of the LLM provider (e.g. `"anthropic"`, `"openrouter"`, `"ollama"`). pub provider: String, + /// Model identifier to use for requests to this provider. pub model: String, + /// Optional custom base URL for the provider endpoint or proxy. pub base_url: Option, + /// Optional API key for authenticating with the provider. pub api_key: Option, + /// Maximum duration to wait for a response before timing out. pub timeout: Duration, + /// Maximum number of retry attempts on transient failures. pub max_retries: u32, + /// Whether to fall back to the direct provider endpoint if the proxy fails. pub enable_fallback: bool, } @@ -102,6 +125,7 @@ impl ProxyConfig { pub struct LlmProxyClient { client: Client, configs: HashMap, + /// Name of the provider used when no explicit provider is specified. pub default_provider: String, } diff --git a/crates/terraphim_service/src/logging.rs b/crates/terraphim_service/src/logging.rs index 6fa5487f0..49a1d66e5 100644 --- a/crates/terraphim_service/src/logging.rs +++ b/crates/terraphim_service/src/logging.rs @@ -47,7 +47,10 @@ pub enum LoggingConfig { /// Integration test logging (INFO level, reduced noise) IntegrationTest, /// Custom logging level - Custom { level: log::LevelFilter }, + Custom { + /// The log level filter to apply. + level: log::LevelFilter, + }, } /// Initialize logging based on configuration preset diff --git a/crates/terraphim_service/src/summarization_queue.rs b/crates/terraphim_service/src/summarization_queue.rs index 43773c680..43cb34e91 100644 --- a/crates/terraphim_service/src/summarization_queue.rs +++ b/crates/terraphim_service/src/summarization_queue.rs @@ -21,6 +21,7 @@ impl Default for TaskId { } impl TaskId { + /// Create a new randomly-generated task identifier. pub fn new() -> Self { Self(Uuid::new_v4()) } @@ -51,35 +52,49 @@ pub enum Priority { pub enum TaskStatus { /// Task is queued and waiting to be processed Pending { + /// Timestamp when the task entered the queue. queued_at: DateTime, + /// Current position of this task in the queue, if known. position_in_queue: Option, }, /// Task is currently being processed Processing { + /// Timestamp when processing started. started_at: DateTime, + /// Optional progress fraction in the range `[0.0, 1.0]`. progress: Option, }, /// Task completed successfully Completed { + /// The generated summary text. summary: String, + /// Timestamp when the task finished. completed_at: DateTime, + /// Total time spent processing this task, in seconds. processing_duration_seconds: u64, }, /// Task failed with error Failed { + /// Human-readable description of the failure. error: String, + /// Timestamp when the failure was recorded. failed_at: DateTime, + /// Number of retry attempts made so far. retry_count: u32, + /// Scheduled time for the next retry attempt, if any. next_retry_at: Option>, }, /// Task was cancelled Cancelled { + /// Timestamp when the task was cancelled. cancelled_at: DateTime, + /// Human-readable reason for cancellation. reason: String, }, } impl TaskStatus { + /// Return `true` if the task has reached a terminal state (completed, failed, or cancelled). pub fn is_terminal(&self) -> bool { matches!( self, @@ -87,10 +102,12 @@ impl TaskStatus { ) } + /// Return `true` if the task is currently being processed. pub fn is_processing(&self) -> bool { matches!(self, TaskStatus::Processing { .. }) } + /// Return `true` if the task is waiting in the queue. pub fn is_pending(&self) -> bool { matches!(self, TaskStatus::Pending { .. }) } @@ -124,6 +141,7 @@ pub struct SummarizationTask { } impl SummarizationTask { + /// Create a new summarisation task for the given document and role. pub fn new(document: Document, role: Role) -> Self { Self { id: TaskId::new(), @@ -140,44 +158,53 @@ impl SummarizationTask { } } + /// Set the task priority, returning `self` for builder-style chaining. pub fn with_priority(mut self, priority: Priority) -> Self { self.priority = priority; self } + /// Set the maximum number of retry attempts, returning `self` for chaining. pub fn with_max_retries(mut self, max_retries: u32) -> Self { self.max_retries = max_retries; self } + /// Override the maximum summary character length, returning `self` for chaining. pub fn with_max_summary_length(mut self, length: usize) -> Self { self.max_summary_length = Some(length); self } + /// Set whether an existing summary should be regenerated, returning `self` for chaining. pub fn with_force_regenerate(mut self, force: bool) -> Self { self.force_regenerate = force; self } + /// Set a callback URL to be notified on completion, returning `self` for chaining. pub fn with_callback_url(mut self, url: String) -> Self { self.callback_url = Some(url); self } + /// Attach global configuration for provider fallbacks, returning `self` for chaining. pub fn with_config(mut self, config: Config) -> Self { self.config = Some(config); self } + /// Return `true` if the task may be retried again. pub fn can_retry(&self) -> bool { self.retry_count < self.max_retries } + /// Increment the retry counter by one. pub fn increment_retry(&mut self) { self.retry_count += 1; } + /// Return the maximum summary length, falling back to the default of 250 characters. pub fn get_summary_length(&self) -> usize { self.max_summary_length.unwrap_or(250) } @@ -305,8 +332,11 @@ pub struct RateLimiterStatus { pub enum SubmitResult { /// Task was successfully queued Queued { + /// Identifier assigned to the newly-queued task. task_id: TaskId, + /// Position of this task in the queue at submission time. position_in_queue: usize, + /// Estimated number of seconds until the task begins processing. estimated_wait_time_seconds: Option, }, /// Task was rejected due to queue being full From 53061347402dd2956b7d8d5aaca8d92ee89d4c7d Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:33:38 +0200 Subject: [PATCH 05/11] docs(#2035): add missing doc comments to terraphim_agent (119 items) Refs #2035 --- crates/terraphim_agent/src/lib.rs | 1 + crates/terraphim_agent/src/repl/commands.rs | 154 +++++++++++++++++--- crates/terraphim_agent/src/repl/handler.rs | 3 + crates/terraphim_agent/src/robot/budget.rs | 9 ++ crates/terraphim_agent/src/robot/docs.rs | 26 ++++ crates/terraphim_agent/src/robot/mod.rs | 1 + crates/terraphim_agent/src/robot/schema.rs | 8 + crates/terraphim_agent/src/service.rs | 11 ++ 8 files changed, 192 insertions(+), 21 deletions(-) diff --git a/crates/terraphim_agent/src/lib.rs b/crates/terraphim_agent/src/lib.rs index a0b39eeda..0372c8d45 100644 --- a/crates/terraphim_agent/src/lib.rs +++ b/crates/terraphim_agent/src/lib.rs @@ -6,6 +6,7 @@ #[cfg(feature = "server")] pub mod client; pub mod onboarding; +/// Service layer providing the `TuiService` wrapper around `TerraphimService`. pub mod service; #[cfg(feature = "shared-learning")] pub mod shared_learning; diff --git a/crates/terraphim_agent/src/repl/commands.rs b/crates/terraphim_agent/src/repl/commands.rs index 0bcad2834..f335d5bf4 100644 --- a/crates/terraphim_agent/src/repl/commands.rs +++ b/crates/terraphim_agent/src/repl/commands.rs @@ -3,117 +3,167 @@ use anyhow::{Result, anyhow}; use std::str::FromStr; +/// All commands available in the interactive REPL, parsed from `/command` strings. #[derive(Debug, Clone, PartialEq)] pub enum ReplCommand { - // Base commands (always available with 'repl' feature) + /// Search documents in the knowledge graph. Search { + /// The search query string. query: String, + /// Optional role override for this search. role: Option, + /// Maximum number of results to return. limit: Option, + /// Whether to use semantic (embedding-based) search. semantic: bool, + /// Whether to expand the query using knowledge-graph concepts. concepts: bool, }, + /// Manage agent configuration. Config { + /// The config sub-operation to perform. subcommand: ConfigSubcommand, }, + /// Manage and switch between roles. Role { + /// The role sub-operation to perform. subcommand: RoleSubcommand, }, + /// Display knowledge-graph statistics. Graph { + /// Number of top nodes to show (all if `None`). top_k: Option, }, // Chat commands (requires 'llm' feature) #[cfg(feature = "llm")] + /// Start an AI chat session. Chat { + /// Optional opening message; opens interactive mode when `None`. message: Option, }, #[cfg(feature = "llm")] + /// Summarise a document or piece of text. Summarize { + /// Document ID or raw text to summarise. target: String, }, // MCP commands (requires 'repl-mcp' feature) #[cfg(feature = "repl-mcp")] + /// Autocomplete a partial term using the loaded thesaurus. Autocomplete { + /// Partial query to complete. query: String, + /// Maximum number of suggestions to return. limit: Option, }, #[cfg(feature = "repl-mcp")] + /// Extract paragraphs from text that start at matched terms. Extract { + /// The input text to search within. text: String, + /// When `true`, omit the matched term itself from each result. exclude_term: bool, }, #[cfg(feature = "repl-mcp")] + /// Find automaton matches within a block of text. Find { + /// The input text to search within. text: String, }, #[cfg(feature = "repl-mcp")] + /// Replace matched terms in text using knowledge-graph substitutions. Replace { + /// The input text to transform. text: String, + /// Output format override (e.g. `markdown`). format: Option, }, #[cfg(feature = "repl-mcp")] + /// Display thesaurus entries for the active or specified role. Thesaurus { + /// Role whose thesaurus to display; uses current role if `None`. role: Option, }, // File commands (requires 'repl-file' feature) #[cfg(feature = "repl-file")] + /// Perform local file operations. File { + /// The file sub-operation to perform. subcommand: FileSubcommand, }, // Web commands (requires 'repl-web' feature) #[cfg(feature = "repl-web")] + /// Perform web fetch or scraping operations. Web { + /// The web sub-operation to perform. subcommand: WebSubcommand, }, // VM commands (requires 'firecracker' feature) #[cfg(feature = "firecracker")] + /// Manage Firecracker microVMs. Vm { + /// The VM sub-operation to perform. subcommand: VmSubcommand, }, - // Robot mode commands (for AI agents) + /// Access robot-mode self-documentation for AI agent integration. Robot { + /// The robot sub-operation to perform. subcommand: RobotSubcommand, }, // Session commands (requires 'repl-sessions' feature) #[cfg(feature = "repl-sessions")] + /// Browse and search AI coding session history. Sessions { + /// The session sub-operation to perform. subcommand: SessionsSubcommand, }, - // Update management commands (always available) + /// Manage agent binary updates. Update { + /// The update sub-operation to perform. subcommand: UpdateSubcommand, }, - // Utility commands + /// Show help information for available commands. Help { + /// Optional specific command to describe; shows all commands if `None`. command: Option, }, + /// Exit the REPL (alias for `Exit`). Quit, + /// Exit the REPL. Exit, + /// Clear the terminal screen. Clear, } +/// Sub-operations for the `/robot` command used by AI agents. #[derive(Debug, Clone, PartialEq)] pub enum RobotSubcommand { /// Get capabilities summary Capabilities, /// Get schema for a command (or all commands) - Schemas { command: Option }, + Schemas { + /// Command name to fetch schema for; fetches all schemas if `None`. + command: Option, + }, /// Get examples for a command - Examples { command: Option }, + Examples { + /// Command name to fetch examples for; fetches all examples if `None`. + command: Option, + }, /// List exit codes ExitCodes, } @@ -126,31 +176,59 @@ pub enum UpdateSubcommand { /// Install available updates Install, /// Rollback to a previous version - Rollback { version: String }, + Rollback { + /// The version string to roll back to. + version: String, + }, /// List available backup versions List, } +/// Sub-operations for the `/config` command. #[derive(Debug, Clone, PartialEq)] pub enum ConfigSubcommand { + /// Display the current configuration. Show, - Set { key: String, value: String }, + /// Set a configuration key to a new value. + Set { + /// The configuration key to update. + key: String, + /// The new value to assign. + value: String, + }, } +/// Sub-operations for the `/role` command. #[derive(Debug, Clone, PartialEq)] pub enum RoleSubcommand { + /// List all available roles. List, - Select { name: String }, + /// Switch to the named role. + Select { + /// Name of the role to activate. + name: String, + }, } +/// Sub-operations for the `/file` command. #[derive(Debug, Clone, PartialEq)] #[cfg(feature = "repl-file")] pub enum FileSubcommand { - Search { query: String }, + /// Search for files matching the given query. + Search { + /// Query string to match against file names and content. + query: String, + }, + /// List all files visible to the current role. List, - Info { path: String }, + /// Show metadata for a specific file. + Info { + /// Path of the file to inspect. + path: String, + }, } +/// Sub-operations for the `/sessions` command. #[derive(Debug, Clone, PartialEq)] #[cfg(feature = "repl-sessions")] pub enum SessionsSubcommand { @@ -158,41 +236,75 @@ pub enum SessionsSubcommand { Sources, /// List imported sessions (auto-imports if cache is empty) List { + /// Filter results to sessions from this source only. source: Option, + /// Maximum number of sessions to show. limit: Option, }, /// Search sessions by query - Search { query: String }, + Search { + /// Full-text query to search session content. + query: String, + }, /// Show session statistics Stats, /// Show details of a specific session - Show { session_id: String }, + Show { + /// Unique identifier of the session to display. + session_id: String, + }, /// Search sessions by concept (Phase 3 - requires enrichment) - Concepts { concept: String }, + Concepts { + /// Knowledge-graph concept name to search for. + concept: String, + }, /// Find sessions related to a given session Related { + /// Unique identifier of the reference session. session_id: String, + /// Minimum number of shared concepts required. min_shared: Option, }, /// Show session timeline grouped by period Timeline { - group_by: Option, // day, week, month + /// Grouping period: `day`, `week`, or `month`. + group_by: Option, + /// Maximum number of period buckets to show. limit: Option, }, /// Export sessions to file Export { - format: Option, // json, markdown - output: Option, // file path + /// Output format: `json` or `markdown`. + format: Option, + /// Destination file path for the export. + output: Option, + /// Export only this specific session when set. session_id: Option, }, /// Enrich sessions with concepts (Phase 3) - Enrich { session_id: Option }, + Enrich { + /// Session to enrich; enriches all sessions if `None`. + session_id: Option, + }, /// List files accessed by a session - Files { session_id: String, json: bool }, + Files { + /// Unique identifier of the session whose files to list. + session_id: String, + /// Output machine-readable JSON instead of text. + json: bool, + }, /// Find sessions by file path - ByFile { file_path: String, json: bool }, + ByFile { + /// File path to search for across all sessions. + file_path: String, + /// Output machine-readable JSON instead of text. + json: bool, + }, /// Build search index and show index statistics - Index { verbose: bool }, + Index { + /// Show extended statistics when `true`. + verbose: bool, + }, /// Cluster sessions by concept similarity (Spec F5.2) Cluster { /// Maximum number of clusters (auto-detect if None) diff --git a/crates/terraphim_agent/src/repl/handler.rs b/crates/terraphim_agent/src/repl/handler.rs index ce610d6d7..354351e6b 100644 --- a/crates/terraphim_agent/src/repl/handler.rs +++ b/crates/terraphim_agent/src/repl/handler.rs @@ -25,6 +25,7 @@ use rustyline::Editor; #[cfg(feature = "repl")] use colored::Colorize; +/// Drives the interactive REPL loop, dispatching parsed commands to the underlying service. pub struct ReplHandler { service: Option, #[cfg(feature = "server")] @@ -35,6 +36,7 @@ pub struct ReplHandler { } impl ReplHandler { + /// Creates a `ReplHandler` that operates directly against a local `TuiService` (no HTTP). pub fn new_offline(service: TuiService) -> Self { #[cfg(feature = "repl-mcp")] let mcp_handler = { @@ -63,6 +65,7 @@ impl ReplHandler { } } + /// Starts the interactive REPL loop, reading and executing commands until the user exits. #[cfg(feature = "repl")] pub async fn run(&mut self) -> Result<()> { use rustyline::completion::{Completer, Pair}; diff --git a/crates/terraphim_agent/src/robot/budget.rs b/crates/terraphim_agent/src/robot/budget.rs index 371465908..402b59709 100644 --- a/crates/terraphim_agent/src/robot/budget.rs +++ b/crates/terraphim_agent/src/robot/budget.rs @@ -3,19 +3,26 @@ use serde::{Deserialize, Serialize}; use super::output::{FieldMode, RobotConfig, RobotFormatter}; use super::schema::{Pagination, SearchResultItem, TokenBudget}; +/// Search results after budget constraints (token and count limits) have been applied. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BudgetedResults { + /// The filtered and potentially truncated search result items. pub results: Vec, + /// Pagination metadata describing totals and offsets. pub pagination: Pagination, + /// Token budget metadata when a token limit was configured. pub token_budget: Option, } +/// Errors that can occur when applying budget constraints to search results. #[derive(Debug, thiserror::Error)] pub enum BudgetError { + /// Failed to serialise a search result item to JSON. #[error("serialization error: {0}")] Serialization(#[from] serde_json::Error), } +/// Applies configurable field, result-count, and token-count budgets to search results. pub struct BudgetEngine { config: RobotConfig, formatter: RobotFormatter, @@ -34,11 +41,13 @@ const KNOWN_FIELDS: &[&str] = &[ ]; impl BudgetEngine { + /// Creates a new `BudgetEngine` from the given robot configuration. pub fn new(config: RobotConfig) -> Self { let formatter = RobotFormatter::new(config.clone()); Self { config, formatter } } + /// Applies all configured budget constraints to the given search results and returns the budgeted output. pub fn apply(&self, results: &[SearchResultItem]) -> Result { let total = results.len(); diff --git a/crates/terraphim_agent/src/robot/docs.rs b/crates/terraphim_agent/src/robot/docs.rs index 1dafb1cd0..c4cdc80f4 100644 --- a/crates/terraphim_agent/src/robot/docs.rs +++ b/crates/terraphim_agent/src/robot/docs.rs @@ -586,34 +586,52 @@ impl Default for SelfDocumentation { /// Capabilities summary #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Capabilities { + /// Human-readable name of this agent or tool. pub name: String, + /// Semantic version string of the current build. pub version: String, + /// Brief description of what this agent does. pub description: String, + /// Feature flags indicating which subsystems are compiled in. pub features: FeatureFlags, + /// List of top-level command names supported by this agent. pub commands: Vec, + /// Output formats this agent can produce (e.g. `json`, `text`). pub supported_formats: Vec, } /// Documentation for a single command #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CommandDoc { + /// Primary name of the command as used in the REPL. pub name: String, + /// Alternative names that also invoke this command. pub aliases: Vec, + /// Short description of what the command does. pub description: String, + /// Positional arguments accepted by this command. pub arguments: Vec, + /// Named flags accepted by this command. pub flags: Vec, + /// Usage examples demonstrating typical invocations. pub examples: Vec, + /// JSON schema describing the command's response structure. pub response_schema: serde_json::Value, } /// Documentation for a command argument #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ArgumentDoc { + /// Name of the argument as shown in usage strings. pub name: String, + /// Data type of the argument (e.g. `string`, `usize`). #[serde(rename = "type")] pub arg_type: String, + /// Whether the argument must be supplied by the caller. pub required: bool, + /// Description of what this argument controls. pub description: String, + /// Default value used when the argument is omitted. #[serde(skip_serializing_if = "Option::is_none")] pub default: Option, } @@ -621,21 +639,29 @@ pub struct ArgumentDoc { /// Documentation for a command flag #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FlagDoc { + /// Long name of the flag (without leading `--`). pub name: String, + /// Optional single-character short alias (without leading `-`). #[serde(skip_serializing_if = "Option::is_none")] pub short: Option, + /// Data type expected after the flag (e.g. `bool`, `usize`). #[serde(rename = "type")] pub flag_type: String, + /// Default value used when the flag is not provided. #[serde(skip_serializing_if = "Option::is_none")] pub default: Option, + /// Description of what this flag controls. pub description: String, } /// Documentation for a command example #[derive(Debug, Clone, Serialize, Deserialize)] pub struct ExampleDoc { + /// Short explanation of what this example demonstrates. pub description: String, + /// The exact command string to run, including the leading `/`. pub command: String, + /// Expected or sample output produced by the command. #[serde(skip_serializing_if = "Option::is_none")] pub output: Option, } diff --git a/crates/terraphim_agent/src/robot/mod.rs b/crates/terraphim_agent/src/robot/mod.rs index c7ea23fc9..75e30d0b7 100644 --- a/crates/terraphim_agent/src/robot/mod.rs +++ b/crates/terraphim_agent/src/robot/mod.rs @@ -3,6 +3,7 @@ //! This module provides structured JSON output and self-documentation //! capabilities for integration with AI agents and automation tools. +/// Budget engine that applies token and result-count limits to search output. #[allow(dead_code)] pub mod budget; #[allow(dead_code)] diff --git a/crates/terraphim_agent/src/robot/schema.rs b/crates/terraphim_agent/src/robot/schema.rs index 2cebef6f8..9f345d891 100644 --- a/crates/terraphim_agent/src/robot/schema.rs +++ b/crates/terraphim_agent/src/robot/schema.rs @@ -348,13 +348,21 @@ pub struct CapabilitiesData { /// Feature flags #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FeatureFlags { + /// Whether document search is available. pub search: bool, + /// Whether AI chat is available (requires `repl-chat` feature). pub chat: bool, + /// Whether MCP tool integration is available (requires `repl-mcp` feature). pub mcp_tools: bool, + /// Whether local file operations are available (requires `repl-file` feature). pub file_operations: bool, + /// Whether web fetch operations are available (requires `repl-web` feature). pub web_operations: bool, + /// Whether Firecracker VM execution is available. pub vm_execution: bool, + /// Whether AI coding session search is available (requires `repl-sessions` feature). pub session_search: bool, + /// Whether knowledge-graph query operations are available. pub knowledge_graph: bool, } diff --git a/crates/terraphim_agent/src/service.rs b/crates/terraphim_agent/src/service.rs index 1c61ae55f..a79826067 100644 --- a/crates/terraphim_agent/src/service.rs +++ b/crates/terraphim_agent/src/service.rs @@ -10,6 +10,7 @@ use terraphim_settings::{DeviceSettings, Error as DeviceSettingsError}; use terraphim_types::{Document, Layer, NormalizedTermValue, RoleName, SearchQuery, Thesaurus}; use tokio::sync::Mutex; +/// Service wrapper that combines configuration state and search service for TUI use. #[derive(Clone)] pub struct TuiService { config_state: ConfigState, @@ -898,24 +899,34 @@ impl TuiService { /// Result of connectivity check #[derive(Debug, Clone, serde::Serialize)] pub struct ConnectivityResult { + /// Whether all terms are connected via a single graph path. pub connected: bool, + /// The terms that were matched by the automaton. pub matched_terms: Vec, + /// Human-readable description of the connectivity result. pub message: String, } /// Fuzzy suggestion result #[derive(Debug, Clone, serde::Serialize)] pub struct FuzzySuggestion { + /// The candidate term suggested as a correction. pub term: String, + /// Jaro-Winkler similarity score between the query and this term. pub similarity: f64, } /// Checklist validation result #[derive(Debug, Clone, serde::Serialize)] pub struct ChecklistResult { + /// Name of the checklist that was evaluated. pub checklist_name: String, + /// Whether every checklist item was satisfied. pub passed: bool, + /// Total number of items in the checklist. pub total_items: usize, + /// Items from the checklist that were satisfied. pub satisfied: Vec, + /// Items from the checklist that were not satisfied. pub missing: Vec, } From 9b31b49645f9fc718fcb523b4c914a654a6099fa Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:35:27 +0200 Subject: [PATCH 06/11] docs(#2035): partial doc comments for terraphim_orchestrator (6 of 44 files) Refs #2035 --- .../src/adf_commands.rs | 20 +++++- .../src/agent_registry.rs | 11 ++++ .../src/agent_run_command.rs | 33 ++++++++++ .../src/agent_run_record.rs | 3 + .../src/agent_runner.rs | 63 +++++++++++++++++++ crates/terraphim_orchestrator/src/config.rs | 28 ++++++++- 6 files changed, 154 insertions(+), 4 deletions(-) diff --git a/crates/terraphim_orchestrator/src/adf_commands.rs b/crates/terraphim_orchestrator/src/adf_commands.rs index 3a85fc11a..c76c64c59 100644 --- a/crates/terraphim_orchestrator/src/adf_commands.rs +++ b/crates/terraphim_orchestrator/src/adf_commands.rs @@ -11,23 +11,39 @@ use terraphim_types::{NormalizedTerm, NormalizedTermValue, Thesaurus}; #[derive(Debug, Clone, PartialEq)] pub enum AdfCommand { /// Trigger a compound review - CompoundReview { issue_number: u64, comment_id: u64 }, + CompoundReview { + /// Issue number where the command was found. + issue_number: u64, + /// Comment ID that triggered the command. + comment_id: u64, + }, /// Spawn a specific agent SpawnAgent { + /// Name of the agent to spawn. agent_name: String, + /// Issue number where the command was found. issue_number: u64, + /// Comment ID that triggered the command. comment_id: u64, + /// Contextual text following the command. context: String, }, /// Trigger a persona-based agent SpawnPersona { + /// Name of the persona to activate. persona_name: String, + /// Issue number where the command was found. issue_number: u64, + /// Comment ID that triggered the command. comment_id: u64, + /// Contextual text following the command. context: String, }, /// Unknown command - Unknown { raw: String }, + Unknown { + /// Raw text of the unrecognised command. + raw: String, + }, } /// Parser for ADF commands using terraphim-automata diff --git a/crates/terraphim_orchestrator/src/agent_registry.rs b/crates/terraphim_orchestrator/src/agent_registry.rs index 6482a9eb4..112152ba4 100644 --- a/crates/terraphim_orchestrator/src/agent_registry.rs +++ b/crates/terraphim_orchestrator/src/agent_registry.rs @@ -39,11 +39,14 @@ impl AgentScope { /// Stable key for a registered agent definition. #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct AgentKey { + /// Scope (legacy or project-specific) that this agent belongs to. pub scope: AgentScope, + /// Name of the agent within its scope. pub name: String, } impl AgentKey { + /// Create a new agent key from a scope and name. pub fn new(scope: AgentScope, name: impl Into) -> Self { Self { scope, @@ -51,10 +54,12 @@ impl AgentKey { } } + /// Create a project-scoped agent key. pub fn project(project: impl Into, name: impl Into) -> Self { Self::new(AgentScope::Project(project.into()), name) } + /// Create a legacy (single-project) agent key. pub fn legacy(name: impl Into) -> Self { Self::new(AgentScope::Legacy, name) } @@ -76,16 +81,21 @@ pub enum AgentSource { /// Registry entry for an agent definition. #[derive(Debug, Clone)] pub struct RegisteredAgent { + /// Unique key identifying this agent within its scope. pub key: AgentKey, + /// Full agent definition from the merged config. pub definition: AgentDefinition, + /// Where this agent entry originated. pub source: AgentSource, } impl RegisteredAgent { + /// Return the project ID this agent is scoped to, if any. pub fn project_id(&self) -> Option<&str> { self.definition.project.as_deref() } + /// Return whether this agent should only be triggered by events, not scheduled runs. pub fn event_only(&self) -> bool { self.definition.event_only } @@ -138,6 +148,7 @@ impl AgentRegistry { self.by_key.len() } + /// Return true if the registry contains no agents. pub fn is_empty(&self) -> bool { self.by_key.is_empty() } diff --git a/crates/terraphim_orchestrator/src/agent_run_command.rs b/crates/terraphim_orchestrator/src/agent_run_command.rs index 18d2493d4..976846efd 100644 --- a/crates/terraphim_orchestrator/src/agent_run_command.rs +++ b/crates/terraphim_orchestrator/src/agent_run_command.rs @@ -31,6 +31,7 @@ fn parse_cron(expr: &str) -> Result { .map_err(|e| OrchestratorError::Config(format!("invalid cron '{}': {}", expr, e))) } +/// Return the list of trigger modes applicable to the given agent definition. pub fn applicable_modes(agent: &AgentDefinition) -> Vec { let mut modes = vec![TriggerMode::Local]; if agent.schedule.is_some() { @@ -46,6 +47,7 @@ pub fn applicable_modes(agent: &AgentDefinition) -> Vec { modes } +/// Return the cron schedule string for the named agent, if configured. pub fn schedule_for_agent(config: &OrchestratorConfig, agent_name: &str) -> Option { config .agents @@ -54,6 +56,7 @@ pub fn schedule_for_agent(config: &OrchestratorConfig, agent_name: &str) -> Opti .and_then(|a| a.schedule.clone()) } +/// Return true if the cron expression can be parsed into a valid schedule. pub fn is_cron_schedule_valid(expr: &str) -> bool { parse_cron(expr).is_ok() } @@ -119,6 +122,7 @@ fn validate_agent_mode( } } +/// Validate an agent definition against all applicable trigger modes. pub fn validate_agent_all_modes( config: &OrchestratorConfig, agent: &AgentDefinition, @@ -169,30 +173,48 @@ pub fn validate_agent_all_modes( (report, mode_results) } +/// Parsed agent CLI subcommand. #[derive(Debug, Clone, PartialEq, Eq)] pub enum AgentSubcommand { + /// Validate one or all agents from the loaded config. Validate { + /// Optional name of a specific agent to validate. agent_name: Option, + /// Optional project scope for the agent. project: Option, + /// Output format for the validation report. format: OutputFormat, + /// Skip probing whether the model is available. skip_model_probe: bool, }, + /// Validate all agents from an explicit config file. ValidateAll { + /// Path to the config file to validate. config: PathBuf, + /// Output format for the validation report. format: OutputFormat, + /// Skip probing whether the model is available. skip_model_probe: bool, }, + /// Run an agent with a synthetic event for local testing. RunSynthetic { + /// Name of the agent to run. agent_name: String, + /// Optional project scope for the agent. project: Option, + /// Synthetic event to inject. event: SyntheticEvent, + /// Output format for the run report. format: OutputFormat, }, } +/// Output format for validation and run reports. #[derive(Debug, Clone, Copy, PartialEq, Eq, Default)] pub enum OutputFormat { + /// Human-readable text output. Human, + /// Machine-readable JSON output. #[default] Json, } @@ -208,16 +230,24 @@ impl std::str::FromStr for OutputFormat { } } +/// Aggregated validation report for all agents in the config. #[derive(Debug, Clone, Serialize)] pub struct AgentValidateAllReport { + /// Per-agent runtime validation reports keyed by agent name. pub agents: HashMap, + /// Per-agent mode validation results keyed by agent name. pub mode_results: HashMap>, + /// Total number of agents validated. pub total: usize, + /// Number of agents that are runnable in all modes. pub runnable: usize, + /// Number of agents that failed validation in at least one mode. pub failed: usize, + /// True when every agent is runnable across all modes. pub all_modes_runnable: bool, } +/// Parse CLI arguments into an `AgentSubcommand`. pub fn parse_agent_args(args: &[String]) -> Result { let mut iter = args.iter(); let mut subcommand: Option = None; @@ -329,6 +359,7 @@ pub fn parse_agent_args(args: &[String]) -> Result { } } +/// Run the validate subcommand and return an exit code. pub fn run_validate( config: &OrchestratorConfig, agent_name: Option, @@ -388,6 +419,7 @@ pub fn run_validate( } } +/// Run the validate-all subcommand loading config from the given path and return an exit code. pub fn run_validate_all( config: PathBuf, format: OutputFormat, @@ -431,6 +463,7 @@ pub fn run_validate_all( } } +/// Run an agent with a synthetic event and return an exit code. pub fn run_synthetic( _config: &OrchestratorConfig, agent_name: &str, diff --git a/crates/terraphim_orchestrator/src/agent_run_record.rs b/crates/terraphim_orchestrator/src/agent_run_record.rs index 2436547d2..5faa7d2f9 100644 --- a/crates/terraphim_orchestrator/src/agent_run_record.rs +++ b/crates/terraphim_orchestrator/src/agent_run_record.rs @@ -578,12 +578,15 @@ pub trait RunRecordPersistence: Send + Sync { /// Errors for run record persistence. #[derive(Debug, thiserror::Error)] pub enum RunRecordError { + /// A backend storage operation failed. #[error("storage error: {0}")] Storage(String), + /// JSON serialisation or deserialisation failed. #[error("serialization error: {0}")] Serialization(#[from] serde_json::Error), + /// An underlying I/O operation failed. #[error("IO error: {0}")] Io(#[from] std::io::Error), } diff --git a/crates/terraphim_orchestrator/src/agent_runner.rs b/crates/terraphim_orchestrator/src/agent_runner.rs index fc5168f40..545d0b054 100644 --- a/crates/terraphim_orchestrator/src/agent_runner.rs +++ b/crates/terraphim_orchestrator/src/agent_runner.rs @@ -7,13 +7,17 @@ use crate::{AgentOrchestrator, OrchestratorError}; const LEGACY_PROJECT: &str = ""; +/// Request to run or validate a specific agent. #[derive(Debug, Clone)] pub struct AgentRunRequest { + /// Name of the agent to run. pub agent_name: String, + /// Optional project scope constraining which agent definition is selected. pub project: Option, } impl AgentRunRequest { + /// Create a new run request for the named agent. pub fn new(agent_name: impl Into) -> Self { Self { agent_name: agent_name.into(), @@ -21,38 +25,58 @@ impl AgentRunRequest { } } + /// Restrict this request to agents in the given project. pub fn with_project(mut self, project: impl Into) -> Self { self.project = Some(project.into()); self } } +/// Gitea repository target information extracted from agent config. #[derive(Debug, Clone, Serialize, PartialEq, Eq)] pub struct GiteaTargetReport { + /// Base URL of the Gitea instance. pub base_url: String, + /// Repository owner name. pub owner: String, + /// Repository name. pub repo: String, + /// Optional issue number linked to this agent. pub issue: Option, } +/// How an agent can be triggered to run. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] #[serde(rename_all = "snake_case")] pub enum TriggerMode { + /// Agent is triggered on a cron schedule. Cron, + /// Agent is triggered by an @mention in an issue or PR comment. Mention, + /// Agent is triggered by a git push event. Push, + /// Agent is triggered by a pull-request event. PullRequest, + /// Agent is run directly from the local CLI. Local, + /// Agent is triggered via an incoming webhook call. Webhook, } +/// Validation result for a single trigger mode. #[derive(Debug, Clone, Serialize, Eq)] pub struct ModeResult { + /// The trigger mode this result applies to. pub trigger_mode: TriggerMode, + /// Whether the agent is considered runnable in this mode. pub runnable: bool, + /// Result of probing whether the CLI tool is available, if applicable. pub cli_tool_probe: Option, + /// Result of probing whether the configured model is available, if applicable. pub model_probe: Option, + /// Whether a synthetic event was accepted, if tested. pub synthetic_event_ok: Option, + /// Non-fatal warnings accumulated during validation. pub warnings: Vec, } @@ -65,32 +89,50 @@ impl PartialEq for ModeResult { } } +/// Validation report for a single agent across all trigger modes. #[derive(Debug, Clone, Serialize, PartialEq, Eq)] pub struct AgentValidationReport { + /// Name of the validated agent. pub agent_name: String, + /// Project scope the agent belongs to. pub project: String, + /// Per-mode validation results. pub mode_results: HashMap, + /// True when the agent is runnable across every applicable trigger mode. pub all_modes_runnable: bool, } +/// A synthetic event used for local testing of event-driven agents. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub enum SyntheticEvent { + /// Simulates a pull-request open or update event. PullRequest { + /// Pull request number. number: u64, + /// HEAD commit SHA of the PR branch. head_sha: String, + /// Login of the PR author. author: String, + /// Title of the pull request. title: String, + /// Approximate lines-of-change count. diff_loc: usize, }, + /// Simulates a git push event. Push { + /// SHA of the pushed commit. sha: String, + /// Git ref that was pushed (e.g. `refs/heads/main`). ref_name: String, + /// Login of the person who pushed. pusher: String, + /// List of changed file paths. files: Vec, }, } impl SyntheticEvent { + /// Convert the event into environment variables for the agent process. pub fn env_vars(&self) -> HashMap { let mut vars = HashMap::new(); match self { @@ -125,26 +167,43 @@ impl SyntheticEvent { } } +/// Full runtime validation report for a single agent definition. #[derive(Debug, Clone, Serialize, PartialEq, Eq)] pub struct AgentRuntimeValidationReport { + /// Name of the agent that was validated. pub agent_name: String, + /// Project scope, or `` for global agents. pub project: String, + /// Agent layer classification as a debug string. pub layer: String, + /// Configured cron schedule, if any. pub schedule: Option, + /// CLI tool used to invoke the agent. pub cli_tool: String, + /// Configured model identifier, if any. pub model: Option, + /// Resolved working directory for the agent. pub working_dir: String, + /// Whether the working directory exists on disk. pub repo_ok: bool, + /// Gitea repository target extracted from config, if configured. pub gitea_target: Option, + /// Whether the agent has evolution mode requested. pub evolution_requested: bool, + /// Whether evolution mode is both requested and globally enabled. pub evolution_available: bool, + /// Whether the agent is fully runnable. pub runnable: bool, + /// Result of probing whether the CLI tool exists and is executable. pub cli_tool_probe: Option, + /// Result of probing whether the configured model is available. pub model_probe: Option, + /// Non-fatal warnings accumulated during validation. pub warnings: Vec, } impl AgentOrchestrator { + /// Validate the runtime environment for the agent identified by `request`. pub fn validate_agent_runtime( &self, request: &AgentRunRequest, @@ -153,6 +212,7 @@ impl AgentOrchestrator { } } +/// Validate the runtime environment for the agent described by `request`. pub fn validate_agent_runtime( config: &OrchestratorConfig, request: &AgentRunRequest, @@ -241,6 +301,7 @@ pub fn validate_agent_runtime( }) } +/// Check whether the CLI tool named by `cli_tool` is available and executable. pub fn probe_cli_tool(cli_tool: &str) -> Result { if cli_tool.trim().is_empty() { return Ok(false); @@ -273,6 +334,7 @@ pub fn probe_cli_tool(cli_tool: &str) -> Result { } } +/// Check whether the given model identifier is considered available on the current system. pub fn probe_model_available( model: &str, _provider: Option<&str>, @@ -291,6 +353,7 @@ pub fn probe_model_available( } } +/// Run an agent with a synthetic event, returning a mode result. pub fn run_agent_synthetic( _config: &OrchestratorConfig, _request: &AgentRunRequest, diff --git a/crates/terraphim_orchestrator/src/config.rs b/crates/terraphim_orchestrator/src/config.rs index 9c8dc685e..c8ff8ed5b 100644 --- a/crates/terraphim_orchestrator/src/config.rs +++ b/crates/terraphim_orchestrator/src/config.rs @@ -11,15 +11,23 @@ pub enum PreCheckStrategy { Always, /// Check git diff between last recorded commit and HEAD. /// Only spawn if changed files match watch_paths prefixes. - GitDiff { watch_paths: Vec }, + GitDiff { + /// File path prefixes to watch for changes. + watch_paths: Vec, + }, /// Query latest comments on a Gitea issue. Skip if PASS verdict /// and no new commits since. - GiteaIssue { issue_number: u64 }, + GiteaIssue { + /// Issue number to query for the latest verdict. + issue_number: u64, + }, /// Run an arbitrary shell command via sh -c. /// Exit 0 + non-empty stdout = Findings; Exit 0 + empty stdout = NoFindings; /// Non-zero exit or timeout = Failed (fail-open). Shell { + /// Shell script to execute via `sh -c`. script: String, + /// Maximum seconds to wait for the script to complete. #[serde(default = "default_pre_check_timeout")] timeout_secs: u64, }, @@ -349,16 +357,22 @@ impl PrDispatchConfig { /// prompts at spawn time and records exit outcomes as validation evidence. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct LearningConfig { + /// Whether the shared learning system is active. #[serde(default)] pub enabled: bool, + /// Minimum trust level required for a learning to be injected. #[serde(default = "default_learning_min_trust")] pub min_trust: String, + /// Maximum token budget for injected learnings per prompt. #[serde(default = "default_learning_max_tokens")] pub max_tokens: usize, + /// Maximum number of learning entries to inject per prompt. #[serde(default = "default_learning_max_entries")] pub max_entries: usize, + /// Days before a learning entry is archived. #[serde(default = "default_learning_archive_days")] pub archive_days: u32, + /// Number of reconciliation ticks between consolidation passes. #[serde(default = "default_learning_consolidation_ticks")] pub consolidation_ticks: u64, } @@ -396,14 +410,19 @@ impl Default for LearningConfig { } } +/// Configuration for the agent evolution system. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct EvolutionConfig { + /// Whether agent evolution is globally enabled. #[serde(default)] pub enabled: bool, + /// Maximum token budget for injected memory per evolved agent prompt. #[serde(default = "default_evolution_max_memory_tokens")] pub max_memory_tokens: usize, + /// Maximum number of evolution snapshots retained per agent. #[serde(default = "default_evolution_max_snapshots")] pub max_snapshots_per_agent: usize, + /// Number of reconciliation ticks between evolution consolidation passes. #[serde(default = "default_evolution_consolidation_ticks")] pub consolidation_interval_ticks: u64, } @@ -517,10 +536,13 @@ fn default_true_routing() -> bool { /// Configuration for posting agent output to Gitea issues. #[derive(Clone, Serialize, Deserialize)] pub struct GiteaOutputConfig { + /// Base URL of the Gitea instance (e.g. `https://git.example.com`). pub base_url: String, /// Gitea API token. Redacted in `Debug` output. pub token: String, + /// Repository owner login. pub owner: String, + /// Repository name. pub repo: String, /// Path to JSON file mapping agent names to Gitea API tokens. /// When present, agents post comments under their own Gitea user. @@ -708,7 +730,9 @@ fn default_quickwit_use_es_bulk() -> bool { /// Lightweight reference to an SFIA skill code and level. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct SfiaSkillRef { + /// SFIA skill code (e.g. `PROG`, `TEST`). pub code: String, + /// Required skill level (1–7). pub level: u8, } From 9b247207a2cf6213054dcf3a441a37cd553ef9a5 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:44:58 +0200 Subject: [PATCH 07/11] docs(#2035): add doc comments to orchestrator project_adf + learning Refs #2035 Co-Authored-By: Claude Sonnet 4.6 --- crates/terraphim_orchestrator/src/learning.rs | 35 +++++++++++++++++ .../terraphim_orchestrator/src/project_adf.rs | 38 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/crates/terraphim_orchestrator/src/learning.rs b/crates/terraphim_orchestrator/src/learning.rs index efc4219f7..8688b26f5 100644 --- a/crates/terraphim_orchestrator/src/learning.rs +++ b/crates/terraphim_orchestrator/src/learning.rs @@ -38,18 +38,23 @@ use uuid::Uuid; /// Errors for the shared learning store. #[derive(Debug, Error)] pub enum LearningError { + /// A storage backend operation failed with the given message. #[error("storage error: {0}")] Storage(String), + /// JSON serialisation or deserialisation failed. #[error("serialization error: {0}")] Serialization(#[from] serde_json::Error), + /// The supplied trust level string is not a valid `TrustLevel` variant. #[error("invalid trust level: {0}")] InvalidTrustLevel(String), + /// No learning with the given identifier exists. #[error("learning not found: {0}")] NotFound(String), + /// An underlying IO error occurred. #[error("IO error: {0}")] Io(#[from] std::io::Error), } @@ -100,12 +105,19 @@ impl std::str::FromStr for TrustLevel { #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum LearningCategory { + /// An LLM model returned an error or unexpected response. ModelError, + /// An agent step failed during execution. StepFailure, + /// A tool or external dependency was found to be unhealthy. ToolHealth, + /// A general behavioural or process pattern observed across runs. Pattern, + /// A practical tip or best-practice hint from a peer agent. Tip, + /// An operation took significantly longer or shorter than expected. TimingAnomaly, + /// A pattern that has been observed in multiple independent runs. RecurringPattern, } @@ -145,37 +157,54 @@ impl std::str::FromStr for LearningCategory { /// A shared learning extracted from agent runs. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Learning { + /// Unique UUID for this learning entry. pub id: String, + /// Name of the agent that originally produced this learning. pub source_agent: String, + /// Category classifying the nature of this learning. pub category: LearningCategory, + /// Short human-readable summary of what was learned. pub summary: String, + /// Optional extended description or code snippet. pub details: Option, /// Agents this learning applies to. Empty means all agents. pub applicable_agents: Vec, + /// Current trust level; higher means more verified. pub trust_level: TrustLevel, /// Shell command that must exit 0 for this learning to remain valid. pub verify_pattern: Option, + /// Total number of times this learning has been applied. pub applied_count: u32, + /// Number of times applying this learning produced a positive outcome. pub effective_count: u32, /// Distinct agent names that have applied or confirmed this learning. #[serde(default)] pub agent_names: Vec, + /// Timestamp when this learning was first recorded. pub created_at: DateTime, + /// Timestamp of the most recent update to this learning. pub updated_at: DateTime, + /// Set when the learning has been archived and is no longer active. pub archived_at: Option>, } /// Input for creating a new learning (no id/timestamps). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct NewLearning { + /// Name of the agent submitting this learning. pub source_agent: String, + /// Category classifying the nature of this learning. pub category: LearningCategory, + /// Short human-readable summary of what was learned. pub summary: String, #[serde(default)] + /// Optional extended description or code snippet. pub details: Option, #[serde(default)] + /// Agents this learning applies to; empty means all agents. pub applicable_agents: Vec, #[serde(default)] + /// Optional shell command that must exit 0 for the learning to remain valid. pub verify_pattern: Option, } @@ -228,6 +257,7 @@ pub struct InMemoryLearningPersistence { } impl InMemoryLearningPersistence { + /// Create a new empty in-memory learning persistence store. pub fn new() -> Self { Self { data: std::sync::RwLock::new(HashMap::new()), @@ -691,24 +721,29 @@ impl SharedLearningStore { self.persistence.insert(learning).await } + /// Retrieve a learning by its unique identifier. pub async fn get(&self, id: &str) -> Result, LearningError> { self.persistence.get(id).await } + /// Return learnings relevant to `agent_name` at the configured minimum trust level. pub async fn query_relevant(&self, agent_name: &str) -> Result, LearningError> { self.persistence .query_relevant(agent_name, self.min_trust) .await } + /// Increment the applied count for the learning with the given id. pub async fn record_applied(&self, id: &str, applied_by: &str) -> Result<(), LearningError> { self.persistence.record_applied(id, applied_by).await } + /// Increment the effective count and auto-promote trust level for the given learning. pub async fn record_effective(&self, id: &str, applied_by: &str) -> Result<(), LearningError> { self.persistence.record_effective(id, applied_by).await } + /// Archive L0 learnings that have not been updated within `max_age_days` days. pub async fn archive_stale(&self, max_age_days: u32) -> Result { self.persistence.archive_stale(max_age_days).await } diff --git a/crates/terraphim_orchestrator/src/project_adf.rs b/crates/terraphim_orchestrator/src/project_adf.rs index 7e32690a0..02e8f4dc5 100644 --- a/crates/terraphim_orchestrator/src/project_adf.rs +++ b/crates/terraphim_orchestrator/src/project_adf.rs @@ -22,68 +22,103 @@ impl FromStr for AgentLayer { } } +/// Raw TOML representation of a project ADF configuration file. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TomlProjectAdfConfig { + /// Unique identifier for the project. pub project_id: String, + /// Human-readable project name. pub name: String, #[serde(default)] + /// List of agent definitions for this project. pub agents: Vec, #[serde(default)] + /// Optional list of PR dispatch entries for on-PR-open routing. pub pr_dispatch: Option>, } +/// Raw TOML representation of a single agent definition within a project ADF config. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TomlAdfAgent { + /// Unique agent name within the project. pub name: String, + /// Orchestration layer: Safety, Core, or Growth. pub layer: String, + /// CLI binary or command used to invoke this agent. pub cli_tool: String, + /// Task description passed to the agent. pub task: String, #[serde(default)] + /// Optional model identifier override for this agent. pub model: Option, #[serde(default)] + /// Optional cron schedule for periodic execution. pub schedule: Option, #[serde(default)] + /// List of capability tags describing what this agent can do. pub capabilities: Vec, #[serde(default)] + /// Monthly spend budget in cents for this agent. pub budget_monthly_cents: Option, #[serde(default)] + /// LLM provider name for this agent. pub provider: Option, #[serde(default)] + /// Persona identifier used to load agent-specific prompts. pub persona: Option, #[serde(default)] + /// Ordered list of skill names to execute in a chain. pub skill_chain: Vec, #[serde(default)] + /// Fallback LLM provider if the primary provider is unavailable. pub fallback_provider: Option, #[serde(default)] + /// Fallback model identifier used when the primary model fails. pub fallback_model: Option, #[serde(default)] + /// Grace period in seconds before forcibly terminating the agent. pub grace_period_secs: Option, #[serde(default)] + /// Maximum CPU seconds this agent may consume before being killed. pub max_cpu_seconds: Option, #[serde(default)] + /// Strategy for pre-run health or gate checks. pub pre_check: Option, #[serde(default)] + /// Gitea issue number this agent is responsible for resolving. pub gitea_issue: Option, #[serde(default)] + /// When true, the agent only runs in response to external events. pub event_only: bool, #[serde(default)] + /// When true, the agent participates in the evolution feedback loop. pub evolution_enabled: bool, #[serde(default)] + /// When true, knowledge-graph RLM routing is enabled for this agent. pub rlm_enabled: Option, } +/// Raw TOML entry describing which agent to dispatch when a PR is opened. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct TomlPrDispatchEntry { + /// Name of the agent to dispatch on PR open. pub name: String, + /// Context string passed to the agent (e.g. a required CI check name). pub context: String, } +/// Validated, runtime-ready project ADF configuration produced from a TOML file. #[derive(Debug, Clone)] pub struct ProjectAdfConfig { + /// Unique identifier for the project. pub project_id: String, + /// Human-readable project name. pub name: String, + /// Parsed agent definitions for this project. pub agents: Vec, + /// Optional PR dispatch configuration derived from the TOML entries. pub pr_dispatch: Option, + /// Absolute path to the `adf.toml` file that was loaded. pub discovered_path: PathBuf, } @@ -161,6 +196,7 @@ impl TomlProjectAdfConfig { } impl ProjectAdfConfig { + /// Returns the project root directory (two levels above the `adf.toml` file). pub fn project_root(&self) -> PathBuf { self.discovered_path .parent() @@ -169,6 +205,7 @@ impl ProjectAdfConfig { .unwrap_or_else(|| self.discovered_path.clone()) } + /// Returns the `.terraphim/skills` directory inside the project root. pub fn skills_dir(&self) -> PathBuf { self.project_root().join(".terraphim/skills") } @@ -189,6 +226,7 @@ impl ProjectAdfConfig { None } + /// Walk up from `cwd` to find a `.terraphim/adf.toml` file and parse it. pub fn discover_and_load(cwd: &Path) -> Result, OrchestratorError> { let terraphim_dir = match Self::discover_terraphim_dir(cwd) { Some(d) => d, From 941af50da35f440bdc5474ddf09d8dff9880ea56 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:45:06 +0200 Subject: [PATCH 08/11] docs(#2035): add doc comments to orchestrator lib + flow/state + pr_poller Refs #2035 Co-Authored-By: Terraphim AI --- .../terraphim_orchestrator/src/flow/state.rs | 21 +++++++++++++ crates/terraphim_orchestrator/src/lib.rs | 25 +++++++++++++++ .../terraphim_orchestrator/src/pr_poller.rs | 31 +++++++++++++++++-- 3 files changed, 74 insertions(+), 3 deletions(-) diff --git a/crates/terraphim_orchestrator/src/flow/state.rs b/crates/terraphim_orchestrator/src/flow/state.rs index 48c039bd8..9b91f60ea 100644 --- a/crates/terraphim_orchestrator/src/flow/state.rs +++ b/crates/terraphim_orchestrator/src/flow/state.rs @@ -6,22 +6,31 @@ use uuid::Uuid; use super::envelope::{MatrixResult, StepEnvelope}; +/// Persisted state for a single flow run. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FlowRunState { + /// Name of the flow being run. pub flow_name: String, + /// Unique correlation ID for this run instance. pub correlation_id: Uuid, + /// Current execution status. pub status: FlowRunStatus, + /// Timestamp when this run started. pub started_at: DateTime, + /// Timestamp when this run finished, if complete. pub finished_at: Option>, + /// Index of the next step to execute on resume. pub next_step_index: usize, /// Optional issue id supplied by local flow context. #[serde(default)] pub issue: Option, + /// Ordered envelopes from completed steps. pub step_envelopes: Vec, /// Results from matrix-expanded steps. Key is step name; value is the /// ordered list of sub-execution envelopes (one per matrix params row). #[serde(default)] pub matrix_envelopes: HashMap>, + /// Error message if the run failed. #[serde(default)] pub error: Option, /// Current iteration count for re-iteration loops. @@ -30,17 +39,24 @@ pub struct FlowRunState { pub iteration_count: u32, } +/// Current status of a flow run. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum FlowRunStatus { + /// The flow is actively executing. Running, + /// The flow is paused at a checkpoint. Paused, + /// The flow completed all steps successfully. Completed, + /// The flow terminated with an error. Failed, + /// The flow was forcibly aborted. Aborted, } impl FlowRunState { + /// Create a new flow run state in the `Running` status. pub fn new(flow_name: &str) -> Self { Self { flow_name: flow_name.to_string(), @@ -57,11 +73,13 @@ impl FlowRunState { } } + /// Attach an issue ID to this flow run and return the updated state. pub fn with_issue(mut self, issue: String) -> Self { self.issue = Some(issue); self } + /// Create a new flow run state pre-populated as `Failed` with the given reason. pub fn failed(flow_name: &str, reason: &str) -> Self { let mut state = Self::new(flow_name); state.status = FlowRunStatus::Failed; @@ -70,6 +88,7 @@ impl FlowRunState { state } + /// Return the envelope for the named step if it has already completed. pub fn step_output(&self, step_name: &str) -> Option<&StepEnvelope> { self.step_envelopes .iter() @@ -84,6 +103,7 @@ impl FlowRunState { .map(|envelopes| MatrixResult::from_envelopes(envelopes)) } + /// Atomically write the flow run state to a JSON file in `dir`. pub fn save_to_file(&self, dir: &Path) -> std::io::Result { std::fs::create_dir_all(dir)?; let filename = format!("flow-{}-{}.json", self.flow_name, self.correlation_id); @@ -95,6 +115,7 @@ impl FlowRunState { Ok(path) } + /// Load a flow run state from a JSON file at `path`. pub fn load_from_file(path: &Path) -> std::io::Result { let json = std::fs::read_to_string(path)?; serde_json::from_str(&json) diff --git a/crates/terraphim_orchestrator/src/lib.rs b/crates/terraphim_orchestrator/src/lib.rs index 95302471d..afdd0fdd3 100644 --- a/crates/terraphim_orchestrator/src/lib.rs +++ b/crates/terraphim_orchestrator/src/lib.rs @@ -31,40 +31,54 @@ pub mod adf_commands; pub mod agent_registry; +/// Command-line argument definitions and validation for per-agent invocation. pub mod agent_run_command; pub mod agent_run_record; +/// Utilities for spawning and supervising agent processes. pub mod agent_runner; +/// Compound multi-agent review swarm with persona-based specialisation. pub mod compound; pub mod concurrency; +/// Static and runtime configuration types for the orchestrator. pub mod config; pub mod control_plane; +/// Budget enforcement and per-agent spending tracking. pub mod cost_tracker; #[cfg(unix)] pub mod direct_dispatch; pub mod dispatcher; pub mod dual_mode; +/// Error types used across the orchestrator. pub mod error; pub mod error_signatures; +/// Agent evolution, replay, and adaptation mechanisms. pub mod evolution; +/// Multi-step flow execution with state persistence and matrix expansion. pub mod flow; pub mod gitea_skill_loader; +/// Inter-agent state handoff with TTL management. pub mod handoff; pub mod kg_router; pub mod learning; +/// Skill discovery from local filesystem paths. pub mod local_skills; pub mod mention; pub mod mention_chain; pub mod meta_coordinator; pub mod metrics_persistence; pub mod mode; +/// Drift detection, rate limiting, and dual-panel certificate validation. pub mod nightwatch; pub mod output_poster; +/// Persona registry and metaprompt rendering for specialised agent roles. pub mod persona; pub mod post_merge_gate; pub mod pr_dispatch; pub mod pr_gate; +/// PR polling and auto-merge evaluation for the rate-of-change pipeline. pub mod pr_poller; pub mod pr_review; +/// ADF project configuration and per-project orchestration settings. pub mod project_adf; pub mod project_control; pub mod provider_budget; @@ -74,7 +88,9 @@ pub mod quickwit; #[cfg(feature = "quickwit")] pub mod quickwit_bulk; pub mod rate_limiter; +/// Time-based and event-driven task scheduling. pub mod scheduler; +/// Scope definitions for agent concurrency boundaries. pub mod scope; pub mod webhook; pub mod worktree_guard; @@ -163,12 +179,19 @@ pub enum PreCheckResult { /// Status of a single agent in the fleet. #[derive(Debug, Clone)] pub struct AgentStatus { + /// Unique name identifying this agent. pub name: String, + /// Architectural layer this agent belongs to. pub layer: AgentLayer, + /// Whether the agent process is currently running. pub running: bool, + /// Last observed health state from the circuit breaker. pub health: HealthStatus, + /// Drift score from the nightwatch monitor, if available. pub drift_score: Option, + /// Wall-clock duration since the agent was last started. pub uptime: Duration, + /// Number of times this agent has been restarted. pub restart_count: u32, /// API calls remaining per provider (None if no limit known). pub api_calls_remaining: HashMap>, @@ -1695,11 +1718,13 @@ impl AgentOrchestrator { } #[cfg(feature = "quickwit")] + /// Attach a pre-built Quickwit fleet sink for log shipping. pub fn set_quickwit_sink(&mut self, sink: quickwit::QuickwitFleetSink) { self.quickwit_sink = Some(sink); } #[cfg(feature = "quickwit")] + /// Return the top-level Quickwit configuration, if any. pub fn quickwit_config(&self) -> Option<&QuickwitConfig> { self.config.quickwit.as_ref() } diff --git a/crates/terraphim_orchestrator/src/pr_poller.rs b/crates/terraphim_orchestrator/src/pr_poller.rs index 29d9df79a..ae0f1538b 100644 --- a/crates/terraphim_orchestrator/src/pr_poller.rs +++ b/crates/terraphim_orchestrator/src/pr_poller.rs @@ -50,10 +50,15 @@ pub const PR_POLL_MIN_INTERVAL: Duration = Duration::from_secs(60); /// that tests can construct it without a live Gitea server. #[derive(Debug, Clone, PartialEq, Eq)] pub struct PrSummary { + /// Gitea PR number. pub number: u64, + /// Login of the PR author. pub author_login: String, + /// SHA of the head commit at the time the summary was fetched. pub head_sha: String, + /// Target branch name (e.g. `main`). pub base_ref: String, + /// Approximate lines of code changed in the diff. pub diff_loc: u32, } @@ -61,8 +66,11 @@ pub struct PrSummary { /// parsing are captured; the full Gitea payload is deliberately not mirrored. #[derive(Debug, Clone, PartialEq, Eq)] pub struct PrComment { + /// Gitea comment ID, used as a tie-break when timestamps are equal. pub id: u64, + /// Login of the comment author. pub user_login: String, + /// Full comment body text. pub body: String, /// RFC3339-ish `updated_at` string from the Gitea API. Used only for /// ordering; comments without a timestamp sort as the earliest. @@ -74,7 +82,9 @@ pub struct PrComment { /// PR N carry?". Kept minimal so the test impl stays trivial. #[async_trait] pub trait PrTracker: Send + Sync { + /// Return all currently open pull requests for the tracked project. async fn list_open_prs(&self) -> Result, String>; + /// Return all comments on the given PR, ordered by creation time. async fn fetch_pr_comments(&self, pr_number: u64) -> Result, String>; } @@ -84,8 +94,11 @@ pub trait PrTracker: Send + Sync { /// concrete types into test code. #[derive(Debug, Clone, PartialEq, Eq)] pub struct MergeOutcome { + /// PR number that was merged. pub pr_number: u64, + /// SHA of the merge commit created by Gitea. pub merge_commit_sha: String, + /// PR title at the time of merge. pub title: String, } @@ -121,6 +134,7 @@ pub struct GiteaPrTracker { } impl GiteaPrTracker { + /// Wrap an existing [`terraphim_tracker::GiteaTracker`] instance. pub fn new(inner: terraphim_tracker::GiteaTracker) -> Self { Self { inner } } @@ -196,14 +210,23 @@ impl AutoMergeExecutor for GiteaPrTracker { #[derive(Debug, Clone, PartialEq, Eq)] pub enum EvaluationOutcome { /// Every gate cleared; the caller should enqueue [`crate::dispatcher::DispatchTask::AutoMerge`]. - Merge { head_sha: String }, + Merge { + /// Head SHA that was evaluated; must be re-confirmed before merging. + head_sha: String, + }, /// At least one gate failed. The reason is a short human-readable string /// suitable for logging or posting back to the PR. - HumanReviewNeeded { reason: String }, + HumanReviewNeeded { + /// Human-readable explanation of which gate was not satisfied. + reason: String, + }, /// No pr-reviewer comment found yet — nothing to evaluate this tick. NoReviewerComment, /// A reviewer comment exists but did not parse as a structural verdict. - ParseError { reason: String }, + ParseError { + /// Description of the parse failure. + reason: String, + }, } /// Return `true` when `comment.user_login == PR_REVIEWER_LOGIN` **or** the @@ -316,6 +339,7 @@ pub struct PrPollRateLimiter { } impl PrPollRateLimiter { + /// Create a new rate limiter with the given minimum polling interval. pub fn new(min_interval: Duration) -> Self { Self { last_poll: HashMap::new(), @@ -346,6 +370,7 @@ pub struct AutoMergeDedupeSet { } impl AutoMergeDedupeSet { + /// Create an empty dedupe set. pub fn new() -> Self { Self::default() } From ee58d25f22e029bc5c6f24cca14ab6a4732f960e Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:46:01 +0200 Subject: [PATCH 09/11] docs(#2035): add doc comments to orchestrator (error, webhook, mention, quickwit, pr_review, provider_probe, and more) Refs #2035 --- crates/terraphim_orchestrator/src/bin/adf.rs | 1 + .../src/control_plane/events.rs | 6 ++ .../src/control_plane/policy.rs | 4 + .../src/control_plane/routing.rs | 39 ++++++++ .../src/control_plane/telemetry.rs | 17 ++++ .../src/cost_tracker.rs | 18 +++- .../src/direct_dispatch.rs | 7 ++ .../terraphim_orchestrator/src/dual_mode.rs | 12 ++- crates/terraphim_orchestrator/src/error.rs | 96 +++++++++++++++++-- .../src/error_signatures.rs | 3 + .../terraphim_orchestrator/src/evolution.rs | 24 +++++ .../terraphim_orchestrator/src/flow/config.rs | 18 ++++ .../src/flow/envelope.rs | 12 +++ .../src/flow/executor.rs | 8 ++ crates/terraphim_orchestrator/src/flow/mod.rs | 6 ++ .../src/flow/token_parser.rs | 6 ++ .../terraphim_orchestrator/src/kg_router.rs | 3 + .../src/local_skills.rs | 8 ++ crates/terraphim_orchestrator/src/mention.rs | 19 +++- .../src/mention_chain.rs | 18 +++- .../terraphim_orchestrator/src/pr_review.rs | 15 +++ .../src/provider_probe.rs | 14 +++ crates/terraphim_orchestrator/src/quickwit.rs | 18 ++++ crates/terraphim_orchestrator/src/webhook.rs | 56 +++++++++++ 24 files changed, 413 insertions(+), 15 deletions(-) diff --git a/crates/terraphim_orchestrator/src/bin/adf.rs b/crates/terraphim_orchestrator/src/bin/adf.rs index 60054d8d5..bfeac9538 100644 --- a/crates/terraphim_orchestrator/src/bin/adf.rs +++ b/crates/terraphim_orchestrator/src/bin/adf.rs @@ -1,3 +1,4 @@ +//! ADF orchestrator binary — parses config, registers providers and runs agents. use std::io::Write; use std::path::PathBuf; use std::process::ExitCode; diff --git a/crates/terraphim_orchestrator/src/control_plane/events.rs b/crates/terraphim_orchestrator/src/control_plane/events.rs index 6f895eeac..3895dd845 100644 --- a/crates/terraphim_orchestrator/src/control_plane/events.rs +++ b/crates/terraphim_orchestrator/src/control_plane/events.rs @@ -210,11 +210,17 @@ pub fn normalize_polled_command( /// to fully populate a NormalizedAgentEvent. #[derive(Debug, Clone)] pub struct WebhookContext { + /// Full repository name in `owner/repo` format. pub repo_full_name: String, + /// Title of the issue or pull request. pub issue_title: String, + /// Current state of the issue (e.g. `open` or `closed`). pub issue_state: String, + /// ISO 8601 timestamp when the triggering comment was created. pub comment_created_at: String, + /// Login of the comment author. pub comment_author: String, + /// Full body text of the triggering comment. pub comment_body: String, } diff --git a/crates/terraphim_orchestrator/src/control_plane/policy.rs b/crates/terraphim_orchestrator/src/control_plane/policy.rs index 3c504c0cf..069ef30f4 100644 --- a/crates/terraphim_orchestrator/src/control_plane/policy.rs +++ b/crates/terraphim_orchestrator/src/control_plane/policy.rs @@ -22,8 +22,11 @@ pub struct PolicyResult { /// A candidate with its policy score. #[derive(Debug, Clone)] pub struct ScoredCandidate { + /// The route candidate being scored. pub candidate: RouteCandidate, + /// Composite score computed by the policy (higher is better). pub score: f64, + /// Per-factor score contributions for human-readable rationale. pub rationale_breakdown: Vec<(String, f64)>, } @@ -55,6 +58,7 @@ impl Default for PolicyConfig { } impl PolicyConfig { + /// Create a new policy config with default weight values. pub fn new() -> Self { Self::default() } diff --git a/crates/terraphim_orchestrator/src/control_plane/routing.rs b/crates/terraphim_orchestrator/src/control_plane/routing.rs index 6141ca988..97e7d3ad8 100644 --- a/crates/terraphim_orchestrator/src/control_plane/routing.rs +++ b/crates/terraphim_orchestrator/src/control_plane/routing.rs @@ -13,12 +13,18 @@ use std::path::PathBuf; use std::sync::Arc; use terraphim_types::capability::{CostLevel, Latency, Provider, ProviderType}; +/// The signal that produced a route candidate. #[derive(Debug, Clone, PartialEq)] pub enum RouteSource { + /// Route selected via knowledge-graph path scoring. KnowledgeGraph, + /// Route selected via keyword-based provider matching. KeywordRouting, + /// Route taken from the static `model` field in the agent definition. StaticConfig, + /// Route selected by combining KG and keyword signals. CombinedKgKeyword, + /// Default CLI model used as a last-resort fallback. CliDefault, } @@ -34,14 +40,19 @@ impl std::fmt::Display for RouteSource { } } +/// Current budget pressure level for a provider. #[derive(Debug, Clone, Copy, PartialEq)] pub enum BudgetPressure { + /// No budget concerns; full provider access. NoPressure, + /// Budget is approaching its limit; cheaper models preferred. NearExhaustion, + /// Budget is fully exhausted; provider should be avoided. Exhausted, } impl BudgetPressure { + /// Derive budget pressure from a budget verdict. pub fn from_verdict(verdict: &BudgetVerdict) -> Self { match verdict { BudgetVerdict::Exhausted { .. } => BudgetPressure::Exhausted, @@ -50,6 +61,7 @@ impl BudgetPressure { } } + /// Return the fractional score penalty to apply based on pressure level and cost tier. pub fn cost_penalty(&self, cost_level: &CostLevel) -> f64 { match self { BudgetPressure::NoPressure => 0.0, @@ -67,37 +79,59 @@ impl BudgetPressure { } } +/// Context used by the routing engine to select a model for an agent dispatch. #[derive(Debug, Clone)] pub struct DispatchContext { + /// Name of the agent being dispatched. pub agent_name: String, + /// Task description used for KG and keyword matching. pub task: String, + /// Statically configured model override from the agent definition. pub static_model: Option, + /// CLI tool used to invoke the selected model. pub cli_tool: String, + /// Agent layer classification used to adjust routing priorities. pub layer: crate::config::AgentLayer, + /// Optional session ID for telemetry grouping. pub session_id: Option, /// Default KG tier concept for this agent (e.g., "review_tier"). /// Passed through to KG router for tier-biased routing. pub default_tier: Option, } +/// A single candidate model selected by one routing signal. #[derive(Debug, Clone)] pub struct RouteCandidate { + /// Provider descriptor for this candidate. pub provider: Provider, + /// Model identifier to pass to the CLI tool. pub model: String, + /// CLI tool used to invoke this candidate. pub cli_tool: String, + /// Signal that produced this candidate. pub source: RouteSource, + /// Confidence score from the originating signal (0.0–1.0). pub confidence: f64, } +/// The final routing decision returned after evaluating all candidates. #[derive(Debug, Clone)] pub struct RoutingDecision { + /// The winning candidate that will be used for dispatch. pub candidate: RouteCandidate, + /// Human-readable rationale explaining the selection. pub rationale: String, + /// All candidates that were evaluated, for diagnostics. pub all_candidates: Vec, + /// Whether the primary (highest-confidence) candidate is available. pub primary_available: bool, + /// Which routing signal dominated the final selection. pub dominant_signal: RouteSource, + /// Current budget pressure at the time of the decision. pub budget_pressure: BudgetPressure, + /// Whether budget pressure altered the selected candidate. pub budget_influenced: bool, + /// Whether telemetry data influenced the final ranking. pub telemetry_influenced: bool, } @@ -128,6 +162,7 @@ struct CollectedCandidates { static_model: Option, } +/// Engine that combines multiple routing signals to select the best model for a dispatch. pub struct RoutingDecisionEngine { kg_router: Option>, /// Snapshot of unhealthy provider names at construction time. @@ -143,6 +178,7 @@ pub struct RoutingDecisionEngine { } impl RoutingDecisionEngine { + /// Create a new routing engine without budget tracking and using the `Fastest` strategy. pub fn new( kg_router: Option>, unhealthy_providers: Vec, @@ -159,6 +195,7 @@ impl RoutingDecisionEngine { ) } + /// Create a new routing engine with budget tracking and using the `Fastest` strategy. pub fn with_provider_budget( kg_router: Option>, unhealthy_providers: Vec, @@ -176,6 +213,7 @@ impl RoutingDecisionEngine { ) } + /// Create a routing engine with explicit budget tracker and route selection strategy. pub fn with_provider_budget_and_strategy( kg_router: Option>, unhealthy_providers: Vec, @@ -311,6 +349,7 @@ impl RoutingDecisionEngine { base * (1.0 - penalty) } + /// Select the best model for the given dispatch context. pub async fn decide_route( &self, ctx: &DispatchContext, diff --git a/crates/terraphim_orchestrator/src/control_plane/telemetry.rs b/crates/terraphim_orchestrator/src/control_plane/telemetry.rs index d446f8c9a..1c2cd9b52 100644 --- a/crates/terraphim_orchestrator/src/control_plane/telemetry.rs +++ b/crates/terraphim_orchestrator/src/control_plane/telemetry.rs @@ -45,11 +45,17 @@ pub struct CompletionEvent { /// Token breakdown from a completion event. #[derive(Debug, Clone, Serialize, Deserialize, Default, PartialEq)] pub struct TokenBreakdown { + /// Total tokens consumed (input + output). pub total: u64, + /// Input (prompt) token count. pub input: u64, + /// Output (completion) token count. pub output: u64, + /// Tokens used for extended reasoning, if applicable. pub reasoning: u64, + /// Tokens read from the prompt cache. pub cache_read: u64, + /// Tokens written to the prompt cache. pub cache_write: u64, } @@ -83,6 +89,7 @@ pub struct ModelPerformanceSnapshot { } impl ModelPerformanceSnapshot { + /// Create an empty snapshot for the given model with no recorded events. pub fn empty(model: &str, window_secs: u64) -> Self { Self { model: model.to_string(), @@ -100,6 +107,7 @@ impl ModelPerformanceSnapshot { } } + /// Return true when the snapshot's data is older than `max_staleness_secs`. pub fn is_stale(&self, max_staleness_secs: u64) -> bool { match self.last_event_at { None => true, @@ -110,6 +118,7 @@ impl ModelPerformanceSnapshot { } } + /// Return true when this model's subscription limit is currently active. pub fn is_subscription_limited(&self) -> bool { if !self.subscription_limit_reached { return false; @@ -140,14 +149,20 @@ pub struct UsageSnapshot { /// Per-model usage totals. #[derive(Debug, Clone, Serialize, Deserialize, Default)] pub struct ModelUsage { + /// Total input (prompt) tokens consumed. pub input_tokens: u64, + /// Total output (completion) tokens generated. pub output_tokens: u64, + /// Combined input and output token count. pub total_tokens: u64, + /// Estimated cost in USD. pub cost_usd: f64, + /// Number of messages in this window. pub message_count: u64, } impl ModelUsage { + /// Accumulate totals from another `ModelUsage` into this one. pub fn merge(&mut self, other: &ModelUsage) { self.input_tokens += other.input_tokens; self.output_tokens += other.output_tokens; @@ -293,6 +308,7 @@ struct TelemetryStoreInner { } impl TelemetryStore { + /// Create a new telemetry store with the given rolling window duration. pub fn new(window_secs: u64) -> Self { Self { inner: Arc::new(RwLock::new(TelemetryStoreInner { @@ -304,6 +320,7 @@ impl TelemetryStore { } } + /// Set a custom TTL for subscription-limit flags and return the updated store. pub fn with_subscription_limit_ttl(self, ttl_secs: u64) -> Self { let window_secs = self.inner.blocking_read().window_secs; Self { diff --git a/crates/terraphim_orchestrator/src/cost_tracker.rs b/crates/terraphim_orchestrator/src/cost_tracker.rs index 9eec92eb8..a9eef37e7 100644 --- a/crates/terraphim_orchestrator/src/cost_tracker.rs +++ b/crates/terraphim_orchestrator/src/cost_tracker.rs @@ -15,9 +15,19 @@ pub enum BudgetVerdict { /// Spend is within normal budget range. WithinBudget, /// Spend has reached warning threshold (80%). - NearExhaustion { spent_cents: u64, budget_cents: u64 }, + NearExhaustion { + /// Amount spent in hundredths of a cent. + spent_cents: u64, + /// Monthly budget cap in hundredths of a cent. + budget_cents: u64, + }, /// Spend has reached or exceeded 100% of budget. - Exhausted { spent_cents: u64, budget_cents: u64 }, + Exhausted { + /// Amount spent in hundredths of a cent. + spent_cents: u64, + /// Monthly budget cap in hundredths of a cent. + budget_cents: u64, + }, } impl BudgetVerdict { @@ -316,9 +326,13 @@ impl AgentCost { /// Snapshot of an agent's cost status (for serialization). #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CostSnapshot { + /// Name of the agent this snapshot belongs to. pub agent_name: String, + /// Total amount spent this month in USD. pub spent_usd: f64, + /// Monthly budget cap in hundredths of a cent, if set. pub budget_cents: Option, + /// Human-readable budget verdict string. pub verdict: String, } diff --git a/crates/terraphim_orchestrator/src/direct_dispatch.rs b/crates/terraphim_orchestrator/src/direct_dispatch.rs index 6ee0361ea..f06a77632 100644 --- a/crates/terraphim_orchestrator/src/direct_dispatch.rs +++ b/crates/terraphim_orchestrator/src/direct_dispatch.rs @@ -43,6 +43,7 @@ pub struct DirectDispatchAgentIndex { } impl DirectDispatchAgentIndex { + /// Build an agent index from a slice of agent definitions. pub fn from_agents(agents: &[crate::config::AgentDefinition]) -> Self { let bare_names: HashSet = agents .iter() @@ -59,6 +60,7 @@ impl DirectDispatchAgentIndex { } } + /// Return true if the given (project, agent) combination is registered. pub fn is_valid(&self, project: Option<&str>, agent: &str) -> bool { match project { Some(p) => self @@ -72,12 +74,15 @@ impl DirectDispatchAgentIndex { /// JSON response written back to adf-ctl. #[derive(Debug, serde::Serialize)] pub struct DispatchResponse { + /// Outcome of the dispatch: `"ok"` or `"error"`. pub status: String, + /// Optional human-readable detail message. #[serde(skip_serializing_if = "Option::is_none")] pub message: Option, } impl DispatchResponse { + /// Create a successful dispatch response. pub fn ok() -> Self { Self { status: "ok".to_string(), @@ -85,6 +90,7 @@ impl DispatchResponse { } } + /// Create an error dispatch response with a message. pub fn error(msg: &str) -> Self { Self { status: "error".to_string(), @@ -123,6 +129,7 @@ fn remove_stale_socket_if_present(socket_path: &std::path::Path) -> std::io::Res } } +/// Start the Unix domain socket listener for direct dispatch. pub fn start_direct_dispatch_listener( socket_path: PathBuf, dispatch_tx: tokio::sync::mpsc::Sender, diff --git a/crates/terraphim_orchestrator/src/dual_mode.rs b/crates/terraphim_orchestrator/src/dual_mode.rs index 463a0af5f..048317f5e 100644 --- a/crates/terraphim_orchestrator/src/dual_mode.rs +++ b/crates/terraphim_orchestrator/src/dual_mode.rs @@ -71,9 +71,17 @@ impl std::fmt::Display for ExecutionMode { #[derive(Debug, Clone)] pub enum SpawnTask { /// Time-driven agent task. - TimeTask { agent: Box }, + TimeTask { + /// The agent definition to spawn. + agent: Box, + }, /// Issue-driven agent task. - IssueTask { issue_id: String, title: String }, + IssueTask { + /// Unique identifier of the issue that triggered this task. + issue_id: String, + /// Title of the triggering issue. + title: String, + }, } /// Full dual-mode orchestrator. diff --git a/crates/terraphim_orchestrator/src/error.rs b/crates/terraphim_orchestrator/src/error.rs index 26d23f723..1422be7ee 100644 --- a/crates/terraphim_orchestrator/src/error.rs +++ b/crates/terraphim_orchestrator/src/error.rs @@ -4,106 +4,186 @@ use terraphim_spawner::SpawnerError; /// Errors that can occur during orchestrator operation. #[derive(Debug, thiserror::Error)] pub enum OrchestratorError { + /// A configuration value is invalid or missing. #[error("configuration error: {0}")] Config(String), + /// An agent failed to spawn. #[error("agent spawn failed for '{agent}': {reason}")] - SpawnFailed { agent: String, reason: String }, + SpawnFailed { + /// Name of the agent that failed to spawn. + agent: String, + /// Human-readable reason for the failure. + reason: String, + }, + /// Creating a git worktree for an agent failed. #[error("agent worktree creation failed for '{agent}' in '{repo}': {reason}")] WorktreeCreationFailed { + /// Name of the agent whose worktree could not be created. agent: String, + /// Repository path where the worktree should have been created. repo: String, + /// Human-readable reason for the failure. reason: String, }, + /// The requested agent does not exist in the registry. #[error("agent '{0}' not found")] AgentNotFound(String), + /// The scheduler encountered an error. #[error("scheduler error: {0}")] SchedulerError(String), + /// A compound (multi-agent) review operation failed. #[error("compound review failed: {0}")] CompoundReviewFailed(String), + /// The provided agent name contains invalid characters. #[error( "invalid agent name '{0}': must contain only alphanumeric, dash, or underscore characters" )] InvalidAgentName(String), + /// An agent handoff (delegation of control) failed. #[error("handoff failed from '{from}' to '{to}': {reason}")] HandoffFailed { + /// Agent that initiated the handoff. from: String, + /// Agent that was the intended recipient. to: String, + /// Human-readable reason for the failure. reason: String, }, + /// A spawner-level error propagated up from the spawner crate. #[error(transparent)] Spawner(#[from] SpawnerError), + /// A routing-level error propagated up from the router crate. #[error(transparent)] Routing(#[from] RoutingError), + /// An I/O error occurred. #[error(transparent)] Io(#[from] std::io::Error), + /// Pre-flight configuration validation failed for an agent. #[error("pre-check configuration error for agent '{agent}': {reason}")] - PreCheckConfig { agent: String, reason: String }, + PreCheckConfig { + /// Name of the agent whose pre-check failed. + agent: String, + /// Human-readable reason for the failure. + reason: String, + }, + /// A flow step or the entire flow failed. #[error("flow '{flow_name}' failed: {reason}")] - FlowFailed { flow_name: String, reason: String }, + FlowFailed { + /// Name of the flow that failed. + flow_name: String, + /// Human-readable reason for the failure. + reason: String, + }, + /// A flow gate step rejected the PR or payload. #[error("flow '{flow_name}' gate '{step_name}' rejected: {condition}")] FlowGateRejected { + /// Name of the flow containing the gate. flow_name: String, + /// Name of the gate step that rejected. step_name: String, + /// The condition or rule that caused the rejection. condition: String, }, + /// A flow template could not be expanded or rendered. #[error("flow template error: {0}")] FlowTemplateError(String), + /// Two projects share the same identifier, which must be unique. #[error( "duplicate project id '{0}' (project ids must be unique across base + included configs)" )] DuplicateProjectId(String), + /// An agent references a project that is not declared in the projects list. #[error( "agent '{agent}' references unknown project '{project}' (must match a Project.id in projects list)" )] - UnknownAgentProject { agent: String, project: String }, + UnknownAgentProject { + /// Name of the agent with the unknown project reference. + agent: String, + /// The project identifier that could not be resolved. + project: String, + }, + /// A flow references a project that is not declared in the projects list. #[error( "flow '{flow}' references unknown project '{project}' (must match a Project.id in projects list)" )] - UnknownFlowProject { flow: String, project: String }, + UnknownFlowProject { + /// Name of the flow with the unknown project reference. + flow: String, + /// The project identifier that could not be resolved. + project: String, + }, + /// An agent or flow specifies a disallowed LLM provider. #[error( "banned LLM provider '{provider}' in {field} for agent '{agent}' (allowed: claude-code, opencode-go, kimi-for-coding, minimax-coding-plan, zai-coding-plan)" )] BannedProvider { + /// Name of the agent or flow with the banned provider. agent: String, + /// The banned provider identifier. provider: String, + /// The config field (e.g. `model`) that contains the banned provider. field: String, }, + /// Some agents or flows lack a project assignment in mixed-mode configs. #[error( "mixed project mode: projects are defined but {kind} '{name}' has no project set; every agent and flow must declare a project" )] - MixedProjectMode { kind: &'static str, name: String }, + MixedProjectMode { + /// Whether the offending item is an `"agent"` or a `"flow"`. + kind: &'static str, + /// Name of the agent or flow missing a project assignment. + name: String, + }, + /// An include glob pattern is syntactically invalid. #[error("include glob '{pattern}' is invalid: {reason}")] - InvalidIncludeGlob { pattern: String, reason: String }, + InvalidIncludeGlob { + /// The glob pattern that could not be parsed. + pattern: String, + /// Human-readable reason the pattern is invalid. + reason: String, + }, + /// A numeric agent field is outside the allowed range. #[error("agent '{agent}' {field} value {value}s is outside allowed range [{min}s, {max}s]")] AgentFieldOutOfRange { + /// Name of the agent with the out-of-range field. agent: String, + /// Name of the field that is out of range. field: String, + /// The provided value (in seconds). value: u64, + /// Minimum allowed value (in seconds). min: u64, + /// Maximum allowed value (in seconds). max: u64, }, + /// The nightwatch probe TTL is too short for rate-limit protection. #[error("nightwatch probe_ttl_secs {value}s is below minimum {min}s (rate-limit protection)")] - ProbeTtlTooShort { value: u64, min: u64 }, + ProbeTtlTooShort { + /// The provided TTL value (in seconds). + value: u64, + /// The minimum acceptable TTL (in seconds). + min: u64, + }, } diff --git a/crates/terraphim_orchestrator/src/error_signatures.rs b/crates/terraphim_orchestrator/src/error_signatures.rs index 557bbcaf7..0dbe72d29 100644 --- a/crates/terraphim_orchestrator/src/error_signatures.rs +++ b/crates/terraphim_orchestrator/src/error_signatures.rs @@ -63,8 +63,11 @@ pub struct ProviderErrorSignatures { /// Compile error building per-provider regex patterns. #[derive(Debug)] pub struct CompileError { + /// Name of the provider whose pattern failed to compile. pub provider: String, + /// The regex pattern string that is invalid. pub pattern: String, + /// The underlying regex compilation error. pub source: regex::Error, } diff --git a/crates/terraphim_orchestrator/src/evolution.rs b/crates/terraphim_orchestrator/src/evolution.rs index 02946aca5..0fd817ddb 100644 --- a/crates/terraphim_orchestrator/src/evolution.rs +++ b/crates/terraphim_orchestrator/src/evolution.rs @@ -8,6 +8,7 @@ use terraphim_agent_evolution::{ TaskId, }; +/// Manages per-agent evolution systems, routing output events to memory storage. #[derive(Debug)] pub struct EvolutionManager { #[cfg(feature = "evolution")] @@ -17,15 +18,21 @@ pub struct EvolutionManager { enabled: bool, } +/// An agent output event to be recorded into the evolution memory store. #[derive(Debug)] pub struct EvolutionOutput { + /// Name of the agent that produced this output. pub agent_id: String, + /// The content of the output event. pub content: String, + /// Type of event (e.g. `stdout`, `stderr`, `lesson`). pub event_type: String, + /// Importance level string (e.g. `critical`, `high`, `medium`, `low`). pub importance: String, } impl EvolutionManager { + /// Create a new evolution manager from the given config. pub fn new(config: EvolutionConfig) -> Self { let enabled = config.enabled; Self { @@ -36,11 +43,13 @@ impl EvolutionManager { } } + /// Return whether evolution mode is active. pub fn is_enabled(&self) -> bool { self.enabled } #[cfg(feature = "evolution")] + /// Ensure an evolution system exists for the given agent, creating one if necessary. pub fn ensure_agent(&mut self, agent_id: &str) { if !self.enabled { return; @@ -51,9 +60,11 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Ensure an evolution system exists for the given agent (no-op without the `evolution` feature). pub fn ensure_agent(&mut self, _agent_id: &str) {} #[cfg(feature = "evolution")] + /// Record an agent output event into the evolution memory store. pub fn record_output(&mut self, output: EvolutionOutput) -> Result<(), String> { if !self.enabled { return Ok(()); @@ -95,11 +106,13 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Record an agent output event (no-op without the `evolution` feature). pub fn record_output(&mut self, _output: EvolutionOutput) -> Result<(), String> { Ok(()) } #[cfg(feature = "evolution")] + /// Record a new task start for the given agent and return its task ID. pub fn record_task_start(&mut self, agent_id: &str, task_content: &str) -> Option { if !self.enabled { return None; @@ -115,11 +128,13 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Record a task start (no-op without the `evolution` feature). pub fn record_task_start(&mut self, _agent_id: &str, _task_content: &str) -> Option { None } #[cfg(feature = "evolution")] + /// Record the completion of a task by its ID. pub fn record_task_complete( &mut self, agent_id: &str, @@ -142,6 +157,7 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Record task completion (no-op without the `evolution` feature). pub fn record_task_complete( &mut self, _agent_id: &str, @@ -152,6 +168,7 @@ impl EvolutionManager { } #[cfg(feature = "evolution")] + /// Record a lesson learned by the given agent. pub fn record_lesson( &mut self, agent_id: &str, @@ -180,6 +197,7 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Record a lesson (no-op without the `evolution` feature). pub fn record_lesson( &mut self, _agent_id: &str, @@ -192,6 +210,7 @@ impl EvolutionManager { } #[cfg(feature = "evolution")] + /// Snapshot the agent's evolution state on exit and return the storage path if successful. pub fn snapshot_on_exit(&mut self, agent_id: &str) -> Option { if !self.enabled { return None; @@ -216,11 +235,13 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Snapshot the agent's evolution state on exit (no-op without the `evolution` feature). pub fn snapshot_on_exit(&mut self, _agent_id: &str) -> Option { None } #[cfg(feature = "evolution")] + /// Render the evolution memory context for the agent as a markdown string. pub fn render_context(&self, agent_id: &str) -> String { if !self.enabled { return String::new(); @@ -281,11 +302,13 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Render the evolution memory context (no-op without the `evolution` feature). pub fn render_context(&self, _agent_id: &str) -> String { String::new() } #[cfg(feature = "evolution")] + /// Consolidate memories for all agents and return the number of agents consolidated. pub fn consolidate_all(&mut self) -> usize { if !self.enabled { return 0; @@ -304,6 +327,7 @@ impl EvolutionManager { } #[cfg(not(feature = "evolution"))] + /// Consolidate memories (no-op without the `evolution` feature). pub fn consolidate_all(&mut self) -> usize { 0 } diff --git a/crates/terraphim_orchestrator/src/flow/config.rs b/crates/terraphim_orchestrator/src/flow/config.rs index 8692b9627..c1e8ff218 100644 --- a/crates/terraphim_orchestrator/src/flow/config.rs +++ b/crates/terraphim_orchestrator/src/flow/config.rs @@ -47,20 +47,26 @@ fn default_matrix_fail_strategy() -> FailStrategy { FailStrategy::Continue } +/// Top-level definition of a multi-step flow. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct FlowDefinition { + /// Unique name identifying this flow. pub name: String, /// Project this flow belongs to. Required -- flows are per-project only (D14). /// Must match a `Project.id` when projects are defined. pub project: String, + /// Optional cron schedule expression for time-driven execution. #[serde(default)] pub schedule: Option, // cron expression + /// Filesystem path to the repository where this flow runs. pub repo_path: String, + /// Git base branch used as the comparison reference. #[serde(default = "default_base_branch")] pub base_branch: String, /// Global flow timeout in seconds. If the entire flow exceeds this, it is aborted. #[serde(default = "default_flow_timeout")] pub timeout_secs: u64, + /// Ordered list of steps that form this flow. #[serde(default)] pub steps: Vec, } @@ -73,9 +79,12 @@ fn default_flow_timeout() -> u64 { 3600 // 1 hour default } +/// Definition of a single step within a flow. #[derive(Debug, Clone, Default, Serialize, Deserialize)] pub struct FlowStepDef { + /// Unique name for this step within the flow. pub name: String, + /// The kind of step (action, agent, gate, or checkpoint). pub kind: StepKind, /// Shell command (for action steps). #[serde(default)] @@ -122,22 +131,31 @@ fn default_timeout() -> u64 { 600 } +/// The kind of execution a flow step performs. #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum StepKind { + /// Execute a shell command. #[default] Action, + /// Spawn an AI agent. Agent, + /// Evaluate a gate condition and optionally block the flow. Gate, + /// Save a checkpoint and optionally loop back to a prior step. Checkpoint, } +/// What the executor should do when a step fails. #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "snake_case")] pub enum FailStrategy { + /// Abort the entire flow immediately on failure. #[default] Abort, + /// Skip the failed step and continue with the next. SkipFailed, + /// Record the failure but continue to the end before deciding. Continue, } diff --git a/crates/terraphim_orchestrator/src/flow/envelope.rs b/crates/terraphim_orchestrator/src/flow/envelope.rs index 34c5e2e2b..5e7aaf705 100644 --- a/crates/terraphim_orchestrator/src/flow/envelope.rs +++ b/crates/terraphim_orchestrator/src/flow/envelope.rs @@ -17,6 +17,7 @@ pub struct MatrixResult { } impl MatrixResult { + /// Aggregate a slice of step envelopes into a matrix result summary. pub fn from_envelopes(envelopes: &[StepEnvelope]) -> Self { let success_count = envelopes.iter().filter(|e| e.exit_code == 0).count(); let failure_count = envelopes.len() - success_count; @@ -33,20 +34,31 @@ impl MatrixResult { } } +/// Output envelope produced by a single flow step execution. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct StepEnvelope { + /// Name of the step that produced this envelope. pub step_name: String, + /// Timestamp when the step started. pub started_at: DateTime, + /// Timestamp when the step finished. pub finished_at: DateTime, + /// Process exit code (0 = success). pub exit_code: i32, + /// Captured standard output from the step. pub stdout: String, + /// Captured standard error from the step. pub stderr: String, + /// Estimated cost in USD for this step, if available. #[serde(default)] pub cost_usd: Option, + /// Agent session ID, if this step spawned an agent. #[serde(default)] pub session_id: Option, + /// Number of input (prompt) tokens consumed, if available. #[serde(default)] pub input_tokens: Option, + /// Number of output (completion) tokens generated, if available. #[serde(default)] pub output_tokens: Option, /// Path to temp file containing stdout (for downstream action steps). diff --git a/crates/terraphim_orchestrator/src/flow/executor.rs b/crates/terraphim_orchestrator/src/flow/executor.rs index 8b676fff7..674f07385 100644 --- a/crates/terraphim_orchestrator/src/flow/executor.rs +++ b/crates/terraphim_orchestrator/src/flow/executor.rs @@ -59,14 +59,21 @@ fn resolve_matrix_vars(template: &str, row: &MatrixParams) -> String { /// constructed. #[derive(Debug, Clone, Default)] pub struct ProjectRuntime { + /// Working directory for agents in this project. pub working_dir: PathBuf, + /// Gitea repository owner for status posting, if configured. pub gitea_owner: Option, + /// Gitea repository name for status posting, if configured. pub gitea_repo: Option, } +/// Executor that drives flow steps sequentially or in a matrix fan-out. pub struct FlowExecutor { + /// Top-level working directory used when no per-project override is present. pub working_dir: PathBuf, + /// Spawner used to launch agent processes. pub spawner: AgentSpawner, + /// Directory where flow run state files are persisted. pub flow_state_dir: PathBuf, /// Per-project runtime metadata, keyed by project id. Missing entries /// mean "use the FlowExecutor's top-level working_dir" (legacy mode). @@ -74,6 +81,7 @@ pub struct FlowExecutor { } impl FlowExecutor { + /// Create a new executor with the given working directory and state directory. pub fn new(working_dir: PathBuf, flow_state_dir: PathBuf) -> Self { Self { working_dir: working_dir.clone(), diff --git a/crates/terraphim_orchestrator/src/flow/mod.rs b/crates/terraphim_orchestrator/src/flow/mod.rs index c9f221b2a..46aecb501 100644 --- a/crates/terraphim_orchestrator/src/flow/mod.rs +++ b/crates/terraphim_orchestrator/src/flow/mod.rs @@ -1,5 +1,11 @@ +//! Multi-step flow orchestration for ADF agent pipelines. +/// Flow configuration types (step definitions, matrix expansion, strategies). pub mod config; +/// Step result envelopes that carry output, timing, and cost data. pub mod envelope; +/// Flow executor that drives step-by-step execution with timeout and error handling. pub mod executor; +/// Persistent flow state and run-record storage. pub mod state; +/// Template token parser for `{{steps..*}}` variable substitution. pub mod token_parser; diff --git a/crates/terraphim_orchestrator/src/flow/token_parser.rs b/crates/terraphim_orchestrator/src/flow/token_parser.rs index 7c0adafbc..f2db77c59 100644 --- a/crates/terraphim_orchestrator/src/flow/token_parser.rs +++ b/crates/terraphim_orchestrator/src/flow/token_parser.rs @@ -9,9 +9,13 @@ use std::sync::LazyLock; /// Token usage data extracted from CLI output. #[derive(Debug, Clone, Default, PartialEq)] pub struct TokenUsage { + /// Number of input (prompt) tokens consumed. pub input_tokens: Option, + /// Number of output (completion) tokens generated. pub output_tokens: Option, + /// Total tokens (input + output). pub total_tokens: Option, + /// Estimated cost in USD for this completion. pub cost_usd: Option, } @@ -104,12 +108,14 @@ pub fn parse_opencode_output(output: &str) -> TokenUsage { parse_token_usage(output) } +/// Parse token usage from Claude CLI output. pub fn parse_claude_output(output: &str) -> TokenUsage { // Claude CLI may output usage differently // Example: "Input tokens: 1234, Output tokens: 567" parse_token_usage(output) } +/// Parse token usage from Codex/OpenAI CLI output. pub fn parse_codex_output(output: &str) -> TokenUsage { // Codex/OpenAI CLI output parse_token_usage(output) diff --git a/crates/terraphim_orchestrator/src/kg_router.rs b/crates/terraphim_orchestrator/src/kg_router.rs index da65e7146..44102f841 100644 --- a/crates/terraphim_orchestrator/src/kg_router.rs +++ b/crates/terraphim_orchestrator/src/kg_router.rs @@ -398,10 +398,13 @@ impl KgRouter { } } +/// Errors returned by the KG router. #[derive(Debug, thiserror::Error)] pub enum KgRouterError { + /// The taxonomy directory does not exist at the expected path. #[error("taxonomy directory not found: {0}")] TaxonomyNotFound(String), + /// The taxonomy markdown files could not be parsed. #[error("failed to parse taxonomy: {0}")] ParseError(String), } diff --git a/crates/terraphim_orchestrator/src/local_skills.rs b/crates/terraphim_orchestrator/src/local_skills.rs index e4e3b8c2c..ba9235ca4 100644 --- a/crates/terraphim_orchestrator/src/local_skills.rs +++ b/crates/terraphim_orchestrator/src/local_skills.rs @@ -2,17 +2,23 @@ use std::path::{Path, PathBuf}; use terraphim_spawner::SpawnContext; +/// Configuration for local skill discovery in a project directory. #[derive(Debug, Clone, PartialEq, Eq)] pub struct LocalSkillConfig { + /// Path to the `.terraphim/skills` directory containing skill definitions. pub skills_dir: PathBuf, } +/// CLI tools that natively support loading local skills. #[derive(Debug, Copy, Clone, PartialEq, Eq)] pub enum SupportedSkillCli { + /// The `opencode` CLI tool. Opencode, + /// The `claude` or `claude-code` CLI tool. Claude, } +/// Discover a local skill configuration in the given project root, if present. pub fn discover_local_skills(project_root: &Path) -> Option { let skills_dir = project_root.join(".terraphim/skills"); skills_dir @@ -20,6 +26,7 @@ pub fn discover_local_skills(project_root: &Path) -> Option { .then_some(LocalSkillConfig { skills_dir }) } +/// Detect which supported skill CLI (if any) matches the given tool name. pub fn detect_skill_cli(cli_tool: &str) -> Option { match cli_name(cli_tool) { "opencode" => Some(SupportedSkillCli::Opencode), @@ -28,6 +35,7 @@ pub fn detect_skill_cli(cli_tool: &str) -> Option { } } +/// Augment a spawn context with local skill loading directives for the given CLI. pub fn prepare_local_skill_loading( cli_tool: &str, project_root: &Path, diff --git a/crates/terraphim_orchestrator/src/mention.rs b/crates/terraphim_orchestrator/src/mention.rs index ad01b677b..52d1e0e81 100644 --- a/crates/terraphim_orchestrator/src/mention.rs +++ b/crates/terraphim_orchestrator/src/mention.rs @@ -28,14 +28,21 @@ static MENTION_RE: LazyLock = LazyLock::new(|| { /// How a mention was resolved. #[derive(Debug, Clone, PartialEq)] pub enum MentionResolution { + /// The mention resolved to a registered agent name. AgentName, - PersonaName { persona: String }, + /// The mention resolved to a persona. + PersonaName { + /// The persona name that was resolved. + persona: String, + }, } /// Parsed tokens of a single `@adf:[project/]name` mention. #[derive(Debug, Clone, PartialEq)] pub struct MentionTokens { + /// Optional project prefix extracted from a qualified mention. pub project: Option, + /// Bare agent name extracted from the mention. pub agent: String, } @@ -61,13 +68,21 @@ pub fn parse_mention_tokens(text: &str) -> Vec { /// A detected and resolved mention. #[derive(Debug, Clone)] pub struct DetectedMention { + /// Gitea issue number containing the mention. pub issue_number: u64, + /// Gitea comment ID containing the mention. pub comment_id: u64, + /// The raw `@adf:name` text as it appeared in the comment. pub raw_mention: String, + /// Resolved agent name to dispatch. pub agent_name: String, + /// How the mention was resolved (agent or persona). pub resolution: MentionResolution, + /// Full body of the comment containing the mention. pub comment_body: String, + /// Login of the user who posted the comment. pub mentioner: String, + /// ISO 8601 timestamp of the comment. pub timestamp: String, /// Project id the mention was detected in. /// @@ -162,6 +177,7 @@ impl MentionCursor { format!("adf/mention_cursor/{}", project_id) } + /// Load the cursor from persistence or create a fresh one at the current time. pub async fn load_or_now(project_id: &str) -> Self { let key = Self::cursor_key(project_id); @@ -550,6 +566,7 @@ pub struct MentionTracker { } impl MentionTracker { + /// Create a tracker that enforces the given per-issue dispatch limit. pub fn new(max_dispatches_per_issue: u32) -> Self { Self { max_dispatches_per_issue, diff --git a/crates/terraphim_orchestrator/src/mention_chain.rs b/crates/terraphim_orchestrator/src/mention_chain.rs index d18b73e04..791845ad0 100644 --- a/crates/terraphim_orchestrator/src/mention_chain.rs +++ b/crates/terraphim_orchestrator/src/mention_chain.rs @@ -14,18 +14,32 @@ pub const DEFAULT_MAX_MENTION_DEPTH: u32 = 3; /// Errors from mention chain validation. #[derive(Debug, thiserror::Error)] pub enum MentionChainError { + /// An agent attempted to mention itself. #[error("agent '{agent}' cannot mention itself")] - SelfMention { agent: String }, + SelfMention { + /// The agent that attempted a self-mention. + agent: String, + }, + /// The mention chain has exceeded the configured depth limit. #[error("mention chain depth {depth} exceeds max {max_depth} for agent '{agent}'")] DepthExceeded { + /// Current depth in the chain. depth: u32, + /// Maximum allowed depth from config. max_depth: u32, + /// The agent that would be dispatched next. agent: String, }, + /// A cycle was detected in the mention chain. #[error("cycle detected: {from} -> {to} would create a loop")] - CycleDetected { from: String, to: String }, + CycleDetected { + /// The agent that initiated the mention. + from: String, + /// The agent being mentioned (same as a previous chain member). + to: String, + }, } /// Stateless mention chain validation. diff --git a/crates/terraphim_orchestrator/src/pr_review.rs b/crates/terraphim_orchestrator/src/pr_review.rs index 3d0073474..164249fb6 100644 --- a/crates/terraphim_orchestrator/src/pr_review.rs +++ b/crates/terraphim_orchestrator/src/pr_review.rs @@ -51,11 +51,17 @@ pub struct ReviewVerdict { /// policy with a 500 LoC diff cap and an agent-author requirement. #[derive(Debug, Clone, PartialEq, Eq)] pub struct AutoMergeCriteria { + /// Minimum confidence score (1–5) required for auto-merge. pub min_confidence: u8, + /// Maximum number of P0 (critical) findings allowed. pub max_p0: u32, + /// Maximum number of P1 (important) findings allowed. pub max_p1: u32, + /// Whether all criteria must be met (true) or any single failure blocks the merge. pub require_all_criteria: bool, + /// Maximum combined lines of code changed (additions + deletions). pub max_diff_loc: u32, + /// Whether the PR author must be an agent (not a human). pub require_agent_author: bool, } @@ -75,10 +81,15 @@ impl Default for AutoMergeCriteria { /// Minimum PR metadata required by [`evaluate`]. #[derive(Debug, Clone, PartialEq, Eq)] pub struct PrMetadata { + /// Pull request number. pub pr_number: u64, + /// Login of the PR author. pub author_login: String, + /// Total lines changed (additions + deletions). pub diff_loc: u32, + /// HEAD commit SHA of the PR. pub head_sha: String, + /// Target branch the PR is merging into. pub base_branch: String, } @@ -96,12 +107,16 @@ pub enum AutoMergeDecision { /// `structural-pr-review` template. #[derive(Error, Debug, PartialEq, Eq)] pub enum VerdictParseError { + /// The review comment is missing the confidence score header. #[error("missing confidence score header in review comment")] MissingConfidence, + /// The confidence score is outside the valid range (1–5). #[error("confidence score out of range: got {0}")] ConfidenceOutOfRange(u8), + /// The inline findings section is absent from the review comment. #[error("missing inline findings section")] MissingFindings, + /// The footer line does not follow the expected format. #[error("malformed footer (expected `Last reviewed commit: `)")] MalformedFooter, } diff --git a/crates/terraphim_orchestrator/src/provider_probe.rs b/crates/terraphim_orchestrator/src/provider_probe.rs index fca34e950..50c7c88d6 100644 --- a/crates/terraphim_orchestrator/src/provider_probe.rs +++ b/crates/terraphim_orchestrator/src/provider_probe.rs @@ -16,12 +16,19 @@ use crate::rate_limiter::RateLimiter; /// Result of probing a single provider+model combination. #[derive(Debug, Clone, serde::Serialize)] pub struct ProbeResult { + /// LLM provider identifier (e.g. `"anthropic"`, `"openai"`). pub provider: String, + /// Model identifier within the provider (e.g. `"claude-sonnet-4-6"`). pub model: String, + /// CLI tool used to invoke this provider (e.g. `"claude"`, `"opencode"`). pub cli_tool: String, + /// Outcome of the probe attempt. pub status: ProbeStatus, + /// Round-trip latency in milliseconds, if the probe succeeded. pub latency_ms: Option, + /// Error message if the probe failed. pub error: Option, + /// ISO 8601 timestamp of the probe. pub timestamp: String, } @@ -29,9 +36,13 @@ pub struct ProbeResult { #[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize)] #[serde(rename_all = "snake_case")] pub enum ProbeStatus { + /// The provider responded successfully within the timeout. Success, + /// The provider returned an error response. Error, + /// The provider did not respond within the timeout. Timeout, + /// The probe was skipped due to an active rate limit. RateLimited, } @@ -74,11 +85,13 @@ impl ProviderHealthMap { } } + /// Attach a rate limiter to throttle probe frequency. pub fn with_rate_limiter(mut self, rate_limiter: RateLimiter) -> Self { self.rate_limiter = Some(rate_limiter); self } + /// Return true if the given provider is currently rate-limited. pub fn is_rate_limited(&self, provider: &str) -> bool { self.rate_limited.contains(provider) } @@ -724,6 +737,7 @@ async fn probe_single( } impl ProviderHealthMap { + /// Ship all current probe results to the Quickwit telemetry sink. pub async fn send_to_quickwit( &self, sink: &crate::quickwit::QuickwitFleetSink, diff --git a/crates/terraphim_orchestrator/src/quickwit.rs b/crates/terraphim_orchestrator/src/quickwit.rs index dc09dbc62..6ef2a8eb8 100644 --- a/crates/terraphim_orchestrator/src/quickwit.rs +++ b/crates/terraphim_orchestrator/src/quickwit.rs @@ -124,36 +124,54 @@ impl From> for QuickwitError { pub enum OrchestratorEvent { /// Reviewer agent completed and a verdict was successfully parsed. PrReviewed { + /// Pull request number that was reviewed. pr_number: u64, + /// Project identifier for the reviewed PR. project: String, + /// HEAD commit SHA of the reviewed PR. head_sha: String, + /// Login of the reviewer agent that produced the verdict. reviewer_login: String, /// Confidence score 1-5 from the `structural-pr-review` comment. confidence: u8, + /// Number of P0 (critical) findings. p0_count: u32, + /// Number of P1 (important) findings. p1_count: u32, /// `"GO"` | `"CONDITIONAL"` | `"NO-GO"` verdict: String, }, /// AutoMerge handler merged the PR successfully. PrAutoMerged { + /// Pull request number that was merged. pr_number: u64, + /// Project identifier for the merged PR. project: String, + /// Commit SHA produced by the merge. merge_sha: String, + /// Pull request title. title: String, }, /// Post-merge test gate passed; the merge is stable. PrAutoMergedVerified { + /// Pull request number whose merge was verified. pr_number: u64, + /// Project identifier. project: String, + /// Merge commit SHA that was verified. merge_sha: String, + /// Wall-clock time in seconds taken by the verification run. wall_time_secs: f64, }, /// Post-merge test gate failed and the merge was reverted. PrAutoReverted { + /// Pull request number whose merge was reverted. pr_number: u64, + /// Project identifier. project: String, + /// The failed merge commit SHA. merge_sha: String, + /// The revert commit SHA. revert_sha: String, /// Classified failure kind (e.g. `"TestFailure"`, `"Timeout"`). reason: String, diff --git a/crates/terraphim_orchestrator/src/webhook.rs b/crates/terraphim_orchestrator/src/webhook.rs index 4800e1886..f4bde60b0 100644 --- a/crates/terraphim_orchestrator/src/webhook.rs +++ b/crates/terraphim_orchestrator/src/webhook.rs @@ -33,8 +33,10 @@ struct GiteaComment { created_at: String, } +/// A Gitea user object included in webhook payloads. #[derive(Debug, Deserialize)] pub struct GiteaUser { + /// Gitea username (login handle) of this user. pub login: String, } @@ -45,69 +47,106 @@ struct GiteaIssue { state: String, } +/// A Gitea repository object included in webhook payloads. #[derive(Debug, Deserialize)] pub struct GiteaRepository { + /// Full repository name in `owner/repo` format. pub full_name: String, } /// Gitea webhook payload for pull_request events. #[derive(Debug, Deserialize)] pub struct GiteaPullRequestPayload { + /// The action that triggered this event (e.g. `opened`, `closed`, `synchronized`). pub action: String, + /// PR number within the repository. pub number: u64, + /// Detailed pull request fields. pub pull_request: PullRequestFields, + /// Repository in which the PR was created. pub repository: GiteaRepository, } +/// Detailed fields of a Gitea pull request. #[derive(Debug, Deserialize)] pub struct PullRequestFields { + /// The head (source) branch reference. pub head: PrRef, + /// The base (target) branch reference. pub base: PrRef, + /// The user who opened the pull request. pub user: GiteaUser, + /// Pull request title. pub title: String, + /// Whether the pull request is a draft. pub draft: bool, + /// Number of lines added. pub additions: u32, + /// Number of lines deleted. pub deletions: u32, } +/// A git ref (branch/tag) in a pull request head or base. #[derive(Debug, Deserialize)] pub struct PrRef { + /// Full SHA of the commit at the tip of this ref. pub sha: String, #[serde(rename = "ref")] + /// The branch or tag name (e.g. `main`, `task/123-fix`). pub ref_name: String, } /// A dispatch request sent from the webhook handler to the orchestrator. #[derive(Debug, serde::Serialize, serde::Deserialize)] pub enum WebhookDispatch { + /// Spawn a named agent in response to an @adf: mention. SpawnAgent { + /// Resolved agent name to spawn. agent_name: String, /// Project extracted from a qualified `@adf:project/name` mention, or /// `None` for unqualified `@adf:name` mentions. detected_project: Option, + /// Gitea issue number the comment was posted on. issue_number: u64, + /// Gitea comment ID that triggered the mention. comment_id: u64, + /// Full text of the comment used as agent context. context: String, /// Optional synthetic event for direct dispatch of event-only agents. #[serde(default)] synthetic_event: Option, }, + /// Spawn a persona agent in response to an @adf: persona mention. SpawnPersona { + /// Name of the persona to instantiate. persona_name: String, + /// Gitea issue number the comment was posted on. issue_number: u64, + /// Gitea comment ID that triggered the mention. comment_id: u64, + /// Full text of the comment used as context. context: String, }, + /// Trigger a compound (multi-agent) review. CompoundReview { + /// Gitea issue number to review. issue_number: u64, + /// Gitea comment ID that requested the review. comment_id: u64, }, + /// Trigger a single-agent PR review. ReviewPr { + /// Pull request number to review. pr_number: u64, + /// Project identifier resolved from the repository name. project: String, + /// HEAD commit SHA of the pull request. head_sha: String, + /// GitHub/Gitea login of the PR author. author_login: String, + /// Pull request title. title: String, + /// Total lines of code changed (additions + deletions). diff_loc: u32, }, /// Push event dispatch — triggers the deterministic `build-runner` agent @@ -184,39 +223,56 @@ where #[derive(Debug, Deserialize)] pub struct GiteaPushPayload { #[serde(rename = "ref")] + /// Full git ref that was pushed (e.g. `refs/heads/main`). pub ref_name: String, + /// Parent commit SHA before the push (all zeros for branch creation). pub before: String, + /// New tip commit SHA after the push. pub after: String, #[serde(default, deserialize_with = "deserialize_null_default")] + /// The user who performed the push. pub pusher: GiteaPusher, + /// Repository that received the push. pub repository: GiteaRepository, #[serde(default, deserialize_with = "deserialize_null_default_vec")] + /// List of commits included in the push. pub commits: Vec, } +/// The user who performed a git push. #[derive(Debug, Default, Deserialize)] pub struct GiteaPusher { #[serde(default)] + /// Gitea username of the pusher. pub login: String, } +/// File-change lists from a single commit in a push event. #[derive(Debug, Deserialize)] pub struct GiteaPushCommit { #[serde(default, deserialize_with = "deserialize_null_default_vec")] + /// Paths of files added by this commit. pub added: Vec, #[serde(default, deserialize_with = "deserialize_null_default_vec")] + /// Paths of files removed by this commit. pub removed: Vec, #[serde(default, deserialize_with = "deserialize_null_default_vec")] + /// Paths of files modified by this commit. pub modified: Vec, } /// Shared state for the webhook handler. #[derive(Clone)] pub struct WebhookState { + /// Names of all registered agents, used for group alias expansion. pub agent_names: Vec, + /// Registry of persona agents that can be summoned by name. pub persona_registry: std::sync::Arc, + /// Channel for forwarding parsed dispatch requests to the orchestrator. pub dispatch_tx: tokio::sync::mpsc::Sender, + /// Optional HMAC secret used to verify Gitea webhook signatures. pub secret: Option, + /// Map from repository full name (`owner/repo`) to project identifier. pub project_by_repo: std::collections::HashMap, } From f8611223228855f62c5e8138485d3353441442d7 Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:49:22 +0200 Subject: [PATCH 10/11] docs(#2035): complete doc comments for terraphim_orchestrator (0 warnings) Refs #2035 --- .../src/meta_coordinator.rs | 12 ++++++-- .../src/metrics_persistence.rs | 3 ++ .../terraphim_orchestrator/src/nightwatch.rs | 8 +++++ crates/terraphim_orchestrator/src/persona.rs | 3 ++ .../src/post_merge_gate.rs | 9 ++++++ .../terraphim_orchestrator/src/pr_dispatch.rs | 6 ++++ crates/terraphim_orchestrator/src/pr_gate.rs | 29 ++++++++++++++++--- .../src/provider_budget.rs | 4 +++ .../terraphim_orchestrator/src/scheduler.rs | 6 +++- 9 files changed, 73 insertions(+), 7 deletions(-) diff --git a/crates/terraphim_orchestrator/src/meta_coordinator.rs b/crates/terraphim_orchestrator/src/meta_coordinator.rs index 9c1e6a1a0..8b390459e 100644 --- a/crates/terraphim_orchestrator/src/meta_coordinator.rs +++ b/crates/terraphim_orchestrator/src/meta_coordinator.rs @@ -108,13 +108,21 @@ pub struct CandidateIssue { #[derive(Debug, Clone, PartialEq, Eq)] pub enum DispatchResult { /// Successfully dispatched. - Dispatched { agent: String, issue_id: String }, + Dispatched { + /// Name of the agent that was dispatched. + agent: String, + /// ID of the issue that was claimed. + issue_id: String, + }, /// No ready issues found. NoIssues, /// Issue was already dispatched recently. AlreadyDispatched, /// Failed to claim the issue. - ClaimFailed { reason: String }, + ClaimFailed { + /// Human-readable reason the claim failed. + reason: String, + }, /// No matching agent for the issue. NoMatchingAgent, } diff --git a/crates/terraphim_orchestrator/src/metrics_persistence.rs b/crates/terraphim_orchestrator/src/metrics_persistence.rs index fecf0e7e3..b98347fb4 100644 --- a/crates/terraphim_orchestrator/src/metrics_persistence.rs +++ b/crates/terraphim_orchestrator/src/metrics_persistence.rs @@ -87,12 +87,15 @@ pub trait MetricsPersistence: Send + Sync { /// Errors that can occur during metrics persistence operations. #[derive(Debug, thiserror::Error)] pub enum MetricsPersistenceError { + /// A storage backend operation failed. #[error("storage error: {0}")] Storage(String), + /// JSON serialisation or deserialisation failed. #[error("serialization error: {0}")] Serialization(#[from] serde_json::Error), + /// No metrics exist for the requested agent. #[error("agent not found: {0}")] NotFound(String), } diff --git a/crates/terraphim_orchestrator/src/nightwatch.rs b/crates/terraphim_orchestrator/src/nightwatch.rs index 76787730c..507e59ba3 100644 --- a/crates/terraphim_orchestrator/src/nightwatch.rs +++ b/crates/terraphim_orchestrator/src/nightwatch.rs @@ -220,9 +220,13 @@ pub struct DriftMetrics { /// Drift score combining all metrics into a single 0.0-1.0 value. #[derive(Debug, Clone)] pub struct DriftScore { + /// Name of the agent that was scored. pub agent_name: String, + /// Composite drift score in the range 0.0 (none) to 1.0 (maximum). pub score: f64, + /// Raw drift metrics that produced this score. pub metrics: DriftMetrics, + /// Correction level derived from the score. pub level: CorrectionLevel, } @@ -244,8 +248,11 @@ pub enum CorrectionLevel { /// Alert emitted by NightwatchMonitor when drift exceeds threshold. #[derive(Debug, Clone)] pub struct DriftAlert { + /// Name of the agent that triggered the alert. pub agent_name: String, + /// The drift score that triggered this alert. pub drift_score: DriftScore, + /// Recommended corrective action for the orchestrator to take. pub recommended_action: CorrectionAction, } @@ -437,6 +444,7 @@ impl NightwatchMonitor { .expect("alert channel should never close while monitor exists") } + /// Take ownership of the alert receiver channel, replacing it with an empty one. pub fn take_alert_rx(&mut self) -> Option> { let (_, rx) = mpsc::channel(1); Some(std::mem::replace(&mut self.alert_rx, rx)) diff --git a/crates/terraphim_orchestrator/src/persona.rs b/crates/terraphim_orchestrator/src/persona.rs index d385f56ce..33a33940d 100644 --- a/crates/terraphim_orchestrator/src/persona.rs +++ b/crates/terraphim_orchestrator/src/persona.rs @@ -110,10 +110,13 @@ const TEMPLATE_NAME: &str = "metaprompt"; /// Error type for metaprompt rendering operations. #[derive(Debug, thiserror::Error)] pub enum MetapromptRenderError { + /// An underlying I/O error occurred while reading a template file. #[error("IO error: {0}")] Io(#[from] std::io::Error), + /// The Handlebars template could not be compiled. #[error("Template compilation error: {0}")] Template(String), + /// The template rendered with errors. #[error("Template render error: {0}")] Render(String), } diff --git a/crates/terraphim_orchestrator/src/post_merge_gate.rs b/crates/terraphim_orchestrator/src/post_merge_gate.rs index 43cea135f..6ebed6455 100644 --- a/crates/terraphim_orchestrator/src/post_merge_gate.rs +++ b/crates/terraphim_orchestrator/src/post_merge_gate.rs @@ -161,8 +161,10 @@ pub struct FailureClassification { /// Errors produced while running or reverting on the gate. #[derive(Debug, thiserror::Error)] pub enum GateError { + /// A command executed by the gate runner failed. #[error("command error: {0}")] Command(#[from] CommandError), + /// The automatic revert commit or push failed. #[error("revert failed: {0}")] Revert(String), } @@ -429,8 +431,11 @@ async fn tail_stream(reader: R, max_lines: usize) -> Strin /// assert the handler invoked the expected commands in the expected order. #[derive(Debug, Clone, PartialEq, Eq)] pub struct CallRecord { + /// The command (binary) that was invoked. pub cmd: String, + /// Arguments passed to the command. pub args: Vec, + /// Working directory in which the command was run. pub cwd: PathBuf, } @@ -444,10 +449,12 @@ pub struct ScriptedRunner { } impl ScriptedRunner { + /// Create a new scripted runner with an empty response queue. pub fn new() -> Self { Self::default() } + /// Enqueue a successful command response with the given exit code and output. pub fn push_ok(&self, code: i32, stdout: &str, stderr: &str) { self.responses.lock().unwrap().push_back(Ok(CommandOutput { exit_code: Some(code), @@ -457,10 +464,12 @@ impl ScriptedRunner { })); } + /// Enqueue an error response for the next command call. pub fn push_err(&self, err: CommandError) { self.responses.lock().unwrap().push_back(Err(err)); } + /// Return all recorded command calls in order. pub fn calls(&self) -> Vec { self.calls.lock().unwrap().clone() } diff --git a/crates/terraphim_orchestrator/src/pr_dispatch.rs b/crates/terraphim_orchestrator/src/pr_dispatch.rs index 5774b915f..dee2d6c51 100644 --- a/crates/terraphim_orchestrator/src/pr_dispatch.rs +++ b/crates/terraphim_orchestrator/src/pr_dispatch.rs @@ -21,11 +21,17 @@ use crate::config::{AgentDefinition, OrchestratorConfig}; /// have to know about the dispatcher enum variant shape. #[derive(Debug, Clone, PartialEq, Eq)] pub struct ReviewPrRequest { + /// Pull request number. pub pr_number: u64, + /// Project ID this PR belongs to. pub project: String, + /// HEAD commit SHA of the PR branch. pub head_sha: String, + /// Login of the PR author. pub author_login: String, + /// Title of the pull request. pub title: String, + /// Approximate lines-of-change count. pub diff_loc: u32, } diff --git a/crates/terraphim_orchestrator/src/pr_gate.rs b/crates/terraphim_orchestrator/src/pr_gate.rs index 31f1b1e9d..e2dc79a59 100644 --- a/crates/terraphim_orchestrator/src/pr_gate.rs +++ b/crates/terraphim_orchestrator/src/pr_gate.rs @@ -9,9 +9,13 @@ /// Terminal state of a single commit status context. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum CommitStatusState { + /// Status has been posted but not yet resolved. Pending, + /// The check completed successfully. Success, + /// The check completed with a failure. Failure, + /// The check encountered an error (distinct from an explicit failure). Error, } @@ -35,7 +39,9 @@ impl CommitStatusState { /// One commit status entry posted against a SHA. #[derive(Debug, Clone, PartialEq, Eq)] pub struct CommitStatusSummary { + /// The context name of this status (e.g. `"adf/build"`). pub context: String, + /// Current state of this status context. pub state: CommitStatusState, /// Unix timestamp (seconds) when the status was created, if available. pub created_at_unix: Option, @@ -47,8 +53,11 @@ pub const STALE_PENDING_TIMEOUT_SECS: i64 = 3600; /// Snapshot of everything the reconciler needs to classify a PR head. #[derive(Debug, Clone, PartialEq, Eq)] pub struct PrGateSnapshot { + /// Pull request number. pub pr_number: u64, + /// HEAD commit SHA of the pull request. pub head_sha: String, + /// Target branch the PR is merging into. pub base_branch: String, /// Context names required by branch protection (e.g. `["adf/build", "adf/pr-reviewer"]`). pub required_contexts: Vec, @@ -64,13 +73,25 @@ pub enum PrGateDecision { /// All required contexts green; proceed to auto-merge policy evaluation. ReadyForPolicy, /// Required contexts not yet posted; enqueue the responsible agents. - EnqueueMissingChecks { missing: Vec }, + EnqueueMissingChecks { + /// Context names that have not yet been posted. + missing: Vec, + }, /// Required contexts posted but still pending; wait for next reconcile tick. - AwaitingChecks { pending: Vec }, + AwaitingChecks { + /// Context names that are still in the pending state. + pending: Vec, + }, /// At least one required context failed; open remediation issue. - BlockedByFailedChecks { failed: Vec<(String, String)> }, + BlockedByFailedChecks { + /// Pairs of `(context_name, failure_reason)` for each failed context. + failed: Vec<(String, String)>, + }, /// Status API or branch protection API failure; service fault. - FactoryFault { error: String }, + FactoryFault { + /// Human-readable error from the upstream API. + error: String, + }, } /// Reconcile the PR gate state from a snapshot. Pure function. diff --git a/crates/terraphim_orchestrator/src/provider_budget.rs b/crates/terraphim_orchestrator/src/provider_budget.rs index da4a44f4d..db809c838 100644 --- a/crates/terraphim_orchestrator/src/provider_budget.rs +++ b/crates/terraphim_orchestrator/src/provider_budget.rs @@ -55,13 +55,16 @@ pub struct WindowState { /// Serialisable snapshot of a single provider's two windows. #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] pub struct ProviderSnapshotEntry { + /// Rolling-hour usage window state. pub hour: WindowState, + /// Rolling-day usage window state. pub day: WindowState, } /// Serialisable snapshot of the whole tracker. #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq)] pub struct ProviderBudgetSnapshot { + /// Usage snapshots keyed by provider identifier. pub providers: HashMap, } @@ -261,6 +264,7 @@ impl ProviderBudgetTracker { self.configs.keys().map(|s| s.as_str()) } + /// Return true if no provider configurations are registered. pub fn is_empty(&self) -> bool { self.configs.is_empty() } diff --git a/crates/terraphim_orchestrator/src/scheduler.rs b/crates/terraphim_orchestrator/src/scheduler.rs index 6673d597e..e94f2862d 100644 --- a/crates/terraphim_orchestrator/src/scheduler.rs +++ b/crates/terraphim_orchestrator/src/scheduler.rs @@ -13,7 +13,10 @@ pub enum ScheduleEvent { /// Time to spawn this agent. Spawn(Box), /// Time to stop this agent. - Stop { agent_name: String }, + Stop { + /// Name of the agent that should be stopped. + agent_name: String, + }, /// Time to run compound review. CompoundReview, /// Time to run a flow. @@ -81,6 +84,7 @@ impl TimeScheduler { }) } + /// Take the event receiver, replacing it with a fresh dummy channel. pub fn take_event_rx(&mut self) -> Option> { let (_, rx) = mpsc::channel(1); Some(std::mem::replace(&mut self.event_rx, rx)) From d834cd36bbcafdc066f634a22e7a21ff756560dd Mon Sep 17 00:00:00 2001 From: Test User Date: Wed, 3 Jun 2026 18:51:11 +0200 Subject: [PATCH 11/11] docs(#2035): suppress macro-generated missing_docs in terraphim_settings; all 18 crates at 0 warnings Refs #2035 --- crates/terraphim_settings/src/lib.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/terraphim_settings/src/lib.rs b/crates/terraphim_settings/src/lib.rs index 76cab8bd1..bfdafa9f5 100644 --- a/crates/terraphim_settings/src/lib.rs +++ b/crates/terraphim_settings/src/lib.rs @@ -3,6 +3,9 @@ //! Loads configuration from TOML files, environment variables, and optional //! 1Password secrets via the `onepassword` feature flag. Settings follow a //! layered precedence: defaults < file < environment < 1Password. +// The `twelf::config` macro generates a `with_layers` constructor that the +// compiler flags as missing-docs; suppress at crate level for that one item. +#![allow(missing_docs)] use directories::ProjectDirs; use serde::de::{self, Deserializer}; use serde::{Deserialize, Serialize};