Merge remote-tracking branch 'upstream/main' into dev

github-actions[bot] · github-actions[bot] · commit ef0856b56640 · 2026-04-17T00:20:24.000Z
diff --git a/prd.json b/prd.json
@@ -160,6 +160,182 @@
       ],
       "passes": true,
       "priority": "P2"
+    },
+    {
+      "id": "US-012",
+      "title": "Trust prompt resolver with allowlist auto-trust",
+      "description": "Add allowlisted auto-trust behavior for known repos/worktrees. Trust prompts currently block TUI startup and require manual intervention. Implement automatic trust resolution for pre-approved repositories.",
+      "acceptanceCriteria": [
+        "TrustAllowlist config structure with repo patterns",
+        "Auto-trust behavior for allowlisted repos/worktrees",
+        "trust_required event emitted when trust prompt detected",
+        "trust_resolved event emitted when trust is granted",
+        "Non-allowlisted repos remain gated (manual trust required)",
+        "Integration with worker boot lifecycle",
+        "Tests for allowlist matching and event emission"
+      ],
+      "passes": true,
+      "priority": "P1"
+    },
+    {
+      "id": "US-013",
+      "title": "Phase 2 - Session event ordering + terminal-state reconciliation",
+      "description": "When the same session emits contradictory lifecycle events (idle, error, completed, transport/server-down) in close succession, expose deterministic final truth. Attach monotonic sequence/causal ordering metadata, classify terminal vs advisory events, reconcile duplicate/out-of-order terminal events into one canonical lane outcome.",
+      "acceptanceCriteria": [
+        "Monotonic sequence / causal ordering metadata attached to session lifecycle events",
+        "Terminal vs advisory event classification implemented",
+        "Reconcile duplicate or out-of-order terminal events into one canonical outcome",
+        "Distinguish 'session terminal state unknown because transport died' from real 'completed'",
+        "Tests verify reconciliation behavior with out-of-order event bursts"
+      ],
+      "passes": true,
+      "priority": "P1"
+    },
+    {
+      "id": "US-014",
+      "title": "Phase 2 - Event provenance / environment labeling",
+      "description": "Every emitted event should declare its source (live_lane, test, healthcheck, replay, transport) so claws do not mistake test noise for production truth. Include environment/channel label, emitter identity, and confidence/trust level.",
+      "acceptanceCriteria": [
+        "EventProvenance enum with live_lane, test, healthcheck, replay, transport variants",
+        "Environment/channel label attached to all events",
+        "Emitter identity field on events",
+        "Confidence/trust level field for downstream automation",
+        "Tests verify provenance labeling and filtering"
+      ],
+      "passes": true,
+      "priority": "P1"
+    },
+    {
+      "id": "US-015",
+      "title": "Phase 2 - Session identity completeness at creation time",
+      "description": "A newly created session should emit stable title, workspace/worktree path, and lane/session purpose at creation time. If any field is not yet known, emit explicit typed placeholder reason rather than bare unknown string.",
+      "acceptanceCriteria": [
+        "Session creation emits stable title, workspace/worktree path, purpose immediately",
+        "Explicit typed placeholder when fields unknown (not bare 'unknown' strings)",
+        "Later-enriched metadata reconciles onto same session identity without ambiguity",
+        "Tests verify session identity completeness and placeholder handling"
+      ],
+      "passes": true,
+      "priority": "P1"
+    },
+    {
+      "id": "US-016",
+      "title": "Phase 2 - Duplicate terminal-event suppression",
+      "description": "When the same session emits repeated completed/failed/terminal notifications, collapse duplicates before they trigger repeated downstream reactions. Attach canonical terminal-event fingerprint per lane/session outcome.",
+      "acceptanceCriteria": [
+        "Canonical terminal-event fingerprint attached per lane/session outcome",
+        "Suppress/coalesce repeated terminal notifications within reconciliation window",
+        "Preserve raw event history for audit while exposing one actionable outcome downstream",
+        "Surface when later duplicate materially differs from original terminal payload",
+        "Tests verify deduplication and material difference detection"
+      ],
+      "passes": true,
+      "priority": "P2"
+    },
+    {
+      "id": "US-017",
+      "title": "Phase 2 - Lane ownership / scope binding",
+      "description": "Each session and lane event should declare who owns it and what workflow scope it belongs to. Attach owner/assignee identity, workflow scope (claw-code-dogfood, external-git-maintenance, infra-health, manual-operator), and mark whether watcher is expected to act, observe only, or ignore.",
+      "acceptanceCriteria": [
+        "Owner/assignee identity attached to sessions and lane events",
+        "Workflow scope field (claw-code-dogfood, external-git-maintenance, etc.)",
+        "Watcher action expectation field (act, observe-only, ignore)",
+        "Preserve scope through session restarts, resumes, and late terminal events",
+        "Tests verify ownership and scope binding"
+      ],
+      "passes": true,
+      "priority": "P2"
+    },
+    {
+      "id": "US-018",
+      "title": "Phase 2 - Nudge acknowledgment / dedupe contract",
+      "description": "Periodic clawhip nudges should carry nudge id/cycle id and delivery timestamp. Expose whether claw has already acknowledged or responded for that cycle. Distinguish new nudge, retry nudge, and stale duplicate.",
+      "acceptanceCriteria": [
+        "Nudge id / cycle id and delivery timestamp attached",
+        "Acknowledgment state exposed (already acknowledged or not)",
+        "Distinguish new nudge vs retry nudge vs stale duplicate",
+        "Allow downstream summaries to bind reported pinpoint back to triggering nudge id",
+        "Tests verify nudge deduplication and acknowledgment tracking"
+      ],
+      "passes": true,
+      "priority": "P2"
+    },
+    {
+      "id": "US-019",
+      "title": "Phase 2 - Stable roadmap-id assignment for newly filed pinpoints",
+      "description": "When a claw records a new pinpoint/follow-up, assign or expose a stable tracking id immediately. Expose that id in structured event/report payload and preserve across edits, reorderings, and summary compression.",
+      "acceptanceCriteria": [
+        "Canonical roadmap id assigned at filing time",
+        "Roadmap id exposed in structured event/report payload",
+        "Same id preserved across edits, reorderings, summary compression",
+        "Distinguish 'new roadmap filing' from 'update to existing roadmap item'",
+        "Tests verify stable id assignment and update detection"
+      ],
+      "passes": true,
+      "priority": "P2"
+    },
+    {
+      "id": "US-020",
+      "title": "Phase 2 - Roadmap item lifecycle state contract",
+      "description": "Each roadmap pinpoint should carry machine-readable lifecycle state (filed, acknowledged, in_progress, blocked, done, superseded). Attach last state-change timestamp and preserve lineage when one pinpoint supersedes or merges into another.",
+      "acceptanceCriteria": [
+        "Lifecycle state enum with filed, acknowledged, in_progress, blocked, done, superseded",
+        "Last state-change timestamp attached",
+        "New report can declare first filing, status update, or closure",
+        "Preserve lineage when one pinpoint supersedes or merges into another",
+        "Tests verify lifecycle state transitions"
+      ],
+      "passes": true,
+      "priority": "P2"
+    },
+    {
+      "id": "US-021",
+      "title": "Request body size pre-flight check for OpenAI-compatible provider",
+      "description": "Implement pre-flight request body size estimation to prevent 400 Bad Request errors from API gateways with size limits. Based on dogfood findings with kimi-k2.5 testing, DashScope API has a 6MB request body limit that was exceeded by large system prompts.",
+      "acceptanceCriteria": [
+        "Pre-flight size estimation before sending requests to OpenAI-compatible providers",
+        "Clear error message when request exceeds provider-specific size limit",
+        "Configuration for different provider limits (6MB DashScope, 100MB OpenAI, etc.)",
+        "Unit tests for size estimation and limit checking",
+        "Integration with existing error handling for actionable user messages"
+      ],
+      "passes": true,
+      "priority": "P1"
+    },
+    {
+      "id": "US-022",
+      "title": "Enhanced error context for API failures",
+      "description": "Add structured error context to API failures including request ID tracking across retries, provider-specific error code mapping, and suggested user actions based on error type (e.g., 'Reduce prompt size' for 413, 'Check API key' for 401).",
+      "acceptanceCriteria": [
+        "Request ID tracking across retries with full context in error messages",
+        "Provider-specific error code mapping with actionable suggestions",
+        "Suggested user actions for common error types (401, 403, 413, 429, 500, 502-504)",
+        "Unit tests for error context extraction",
+        "All existing tests pass and clippy is clean"
+      ],
+      "passes": true,
+      "priority": "P1"
+    },
+    {
+      "id": "US-023",
+      "title": "Add automatic routing for kimi models to DashScope",
+      "description": "Based on dogfood findings with kimi-k2.5 testing, users must manually prefix with dashscope/kimi-k2.5 instead of just using kimi-k2.5. Add automatic routing for kimi/ and kimi- prefixed models to DashScope (similar to qwen models), and add a 'kimi' alias to the model registry.",
+      "acceptanceCriteria": [
+        "kimi/ and kimi- prefix routing to DashScope in metadata_for_model()",
+        "'kimi' alias in MODEL_REGISTRY that resolves to 'kimi-k2.5'",
+        "resolve_model_alias() handles the kimi alias correctly",
+        "Unit tests for kimi routing (similar to qwen routing tests)",
+        "All tests pass and clippy is clean"
+      ],
+      "passes": true,
+      "priority": "P1"
     }
-  ]
+  ],
+  "metadata": {
+    "lastUpdated": "2026-04-16",
+    "completedStories": ["US-001", "US-002", "US-003", "US-004", "US-005", "US-006", "US-007", "US-008", "US-009", "US-010", "US-011", "US-012", "US-013", "US-014", "US-015", "US-016", "US-017", "US-018", "US-019", "US-020", "US-021", "US-022", "US-023"],
+    "inProgressStories": [],
+    "totalStories": 23,
+    "status": "completed"
+  }
 }
diff --git a/rust/crates/api/src/error.rs b/rust/crates/api/src/error.rs
@@ -53,6 +53,8 @@ pub enum ApiError {
         request_id: Option<String>,
         body: String,
         retryable: bool,
+        /// Suggested user action based on error type (e.g., "Reduce prompt size" for 413)
+        suggested_action: Option<String>,
     },
     RetriesExhausted {
         attempts: u32,
@@ -239,6 +241,7 @@ impl ApiError {
 }
 
 impl Display for ApiError {
+    #[allow(clippy::too_many_lines)]
     fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
         match self {
             Self::MissingCredentials {
@@ -340,9 +343,7 @@ impl Display for ApiError {
                 provider,
             } => write!(
                 f,
-                "request body size ({} bytes) exceeds {provider} limit ({} bytes); reduce prompt length or context before retrying",
-                estimated_bytes,
-                max_bytes
+                "request body size ({estimated_bytes} bytes) exceeds {provider} limit ({max_bytes} bytes); reduce prompt length or context before retrying"
             ),
         }
     }
@@ -489,6 +490,7 @@ mod tests {
             request_id: Some("req_jobdori_123".to_string()),
             body: String::new(),
             retryable: true,
+            suggested_action: None,
         };
 
         assert!(error.is_generic_fatal_wrapper());
@@ -511,6 +513,7 @@ mod tests {
                 request_id: Some("req_nested_456".to_string()),
                 body: String::new(),
                 retryable: true,
+                suggested_action: None,
             }),
         };
 
@@ -531,6 +534,7 @@ mod tests {
             request_id: Some("req_ctx_123".to_string()),
             body: String::new(),
             retryable: false,
+            suggested_action: None,
         };
 
         assert!(error.is_context_window_failure());
diff --git a/rust/crates/api/src/providers/anthropic.rs b/rust/crates/api/src/providers/anthropic.rs
@@ -885,6 +885,7 @@ async fn expect_success(response: reqwest::Response) -> Result<reqwest::Response
         request_id,
         body,
         retryable,
+        suggested_action: None,
     })
 }
 
@@ -909,6 +910,7 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
         request_id,
         body,
         retryable,
+        suggested_action,
     } = error
     else {
         return error;
@@ -921,6 +923,7 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
             request_id,
             body,
             retryable,
+            suggested_action,
         };
     }
     let Some(bearer_token) = auth.bearer_token() else {
@@ -931,6 +934,7 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
             request_id,
             body,
             retryable,
+            suggested_action,
         };
     };
     if !bearer_token.starts_with("sk-ant-") {
@@ -941,6 +945,7 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
             request_id,
             body,
             retryable,
+            suggested_action,
         };
     }
     // Only append the hint when the AuthSource is pure BearerToken. If both
@@ -955,6 +960,7 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
             request_id,
             body,
             retryable,
+            suggested_action,
         };
     }
     let enriched_message = match message {
@@ -968,6 +974,7 @@ fn enrich_bearer_auth_error(error: ApiError, auth: &AuthSource) -> ApiError {
         request_id,
         body,
         retryable,
+        suggested_action,
     }
 }
 
@@ -1555,6 +1562,7 @@ mod tests {
             request_id: Some("req_varleg_001".to_string()),
             body: String::new(),
             retryable: false,
+            suggested_action: None,
         };
 
         // when
@@ -1595,6 +1603,7 @@ mod tests {
             request_id: None,
             body: String::new(),
             retryable: true,
+            suggested_action: None,
         };
 
         // when
@@ -1623,6 +1632,7 @@ mod tests {
             request_id: None,
             body: String::new(),
             retryable: false,
+            suggested_action: None,
         };
 
         // when
@@ -1650,6 +1660,7 @@ mod tests {
             request_id: None,
             body: String::new(),
             retryable: false,
+            suggested_action: None,
         };
 
         // when
@@ -1674,6 +1685,7 @@ mod tests {
             request_id: None,
             body: String::new(),
             retryable: false,
+            suggested_action: None,
         };
 
         // when
diff --git a/rust/crates/api/src/providers/mod.rs b/rust/crates/api/src/providers/mod.rs
@@ -123,6 +123,15 @@ const MODEL_REGISTRY: &[(&str, ProviderMetadata)] = &[
             default_base_url: openai_compat::DEFAULT_XAI_BASE_URL,
         },
     ),
+    (
+        "kimi",
+        ProviderMetadata {
+            provider: ProviderKind::OpenAi,
+            auth_env: "DASHSCOPE_API_KEY",
+            base_url_env: "DASHSCOPE_BASE_URL",
+            default_base_url: openai_compat::DEFAULT_DASHSCOPE_BASE_URL,
+        },
+    ),
 ];
 
 #[must_use]
@@ -570,6 +579,34 @@ mod tests {
         );
     }
 
+    #[test]
+    fn kimi_prefix_routes_to_dashscope() {
+        // Kimi models via DashScope (kimi-k2.5, kimi-k1.5, etc.)
+        let meta = super::metadata_for_model("kimi-k2.5")
+            .expect("kimi-k2.5 must resolve to DashScope metadata");
+        assert_eq!(meta.auth_env, "DASHSCOPE_API_KEY");
+        assert_eq!(meta.base_url_env, "DASHSCOPE_BASE_URL");
+        assert!(meta.default_base_url.contains("dashscope.aliyuncs.com"));
+        assert_eq!(meta.provider, ProviderKind::OpenAi);
+
+        // With provider prefix
+        let meta2 = super::metadata_for_model("kimi/kimi-k2.5")
+            .expect("kimi/kimi-k2.5 must resolve to DashScope metadata");
+        assert_eq!(meta2.auth_env, "DASHSCOPE_API_KEY");
+        assert_eq!(meta2.provider, ProviderKind::OpenAi);
+
+        // Different kimi variants
+        let meta3 = super::metadata_for_model("kimi-k1.5")
+            .expect("kimi-k1.5 must resolve to DashScope metadata");
+        assert_eq!(meta3.auth_env, "DASHSCOPE_API_KEY");
+    }
+
+    #[test]
+    fn kimi_alias_resolves_to_kimi_k2_5() {
+        assert_eq!(super::resolve_model_alias("kimi"), "kimi-k2.5");
+        assert_eq!(super::resolve_model_alias("KIMI"), "kimi-k2.5"); // case insensitive
+    }
+
     #[test]
     fn keeps_existing_max_token_heuristic() {
         assert_eq!(max_tokens_for_model("opus"), 32_000);
diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs