US-021: Add request body size pre-flight check for OpenAI-compatible provider

Yeachan-Heo · Yeachan-Heo · commit 5e65b3304299 · 2026-04-16T17:41:57.000Z
diff --git a/rust/crates/api/src/error.rs b/rust/crates/api/src/error.rs
@@ -63,6 +63,11 @@ pub enum ApiError {
         attempt: u32,
         base_delay: Duration,
     },
+    RequestBodySizeExceeded {
+        estimated_bytes: usize,
+        max_bytes: usize,
+        provider: &'static str,
+    },
 }
 
 impl ApiError {
@@ -129,7 +134,8 @@ impl ApiError {
             | Self::Io(_)
             | Self::Json { .. }
             | Self::InvalidSseFrame(_)
-            | Self::BackoffOverflow { .. } => false,
+            | Self::BackoffOverflow { .. }
+            | Self::RequestBodySizeExceeded { .. } => false,
         }
     }
 
@@ -147,7 +153,8 @@ impl ApiError {
             | Self::Io(_)
             | Self::Json { .. }
             | Self::InvalidSseFrame(_)
-            | Self::BackoffOverflow { .. } => None,
+            | Self::BackoffOverflow { .. }
+            | Self::RequestBodySizeExceeded { .. } => None,
         }
     }
 
@@ -172,6 +179,7 @@ impl ApiError {
                 "provider_transport"
             }
             Self::InvalidApiKeyEnv(_) | Self::Io(_) | Self::Json { .. } => "runtime_io",
+            Self::RequestBodySizeExceeded { .. } => "request_size",
         }
     }
 
@@ -194,7 +202,8 @@ impl ApiError {
             | Self::Io(_)
             | Self::Json { .. }
             | Self::InvalidSseFrame(_)
-            | Self::BackoffOverflow { .. } => false,
+            | Self::BackoffOverflow { .. }
+            | Self::RequestBodySizeExceeded { .. } => false,
         }
     }
 
@@ -223,7 +232,8 @@ impl ApiError {
             | Self::Io(_)
             | Self::Json { .. }
             | Self::InvalidSseFrame(_)
-            | Self::BackoffOverflow { .. } => false,
+            | Self::BackoffOverflow { .. }
+            | Self::RequestBodySizeExceeded { .. } => false,
         }
     }
 }
@@ -324,6 +334,16 @@ impl Display for ApiError {
                 f,
                 "retry backoff overflowed on attempt {attempt} with base delay {base_delay:?}"
             ),
+            Self::RequestBodySizeExceeded {
+                estimated_bytes,
+                max_bytes,
+                provider,
+            } => write!(
+                f,
+                "request body size ({} bytes) exceeds {provider} limit ({} bytes); reduce prompt length or context before retrying",
+                estimated_bytes,
+                max_bytes
+            ),
         }
     }
 }
diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs
@@ -31,12 +31,22 @@ pub struct OpenAiCompatConfig {
     pub api_key_env: &'static str,
     pub base_url_env: &'static str,
     pub default_base_url: &'static str,
+    /// Maximum request body size in bytes. Provider-specific limits:
+    /// - DashScope: 6MB (6_291_456 bytes) - observed in dogfood testing
+    /// - OpenAI: 100MB (104_857_600 bytes)
+    /// - xAI: 50MB (52_428_800 bytes)
+    pub max_request_body_bytes: usize,
 }
 
 const XAI_ENV_VARS: &[&str] = &["XAI_API_KEY"];
 const OPENAI_ENV_VARS: &[&str] = &["OPENAI_API_KEY"];
 const DASHSCOPE_ENV_VARS: &[&str] = &["DASHSCOPE_API_KEY"];
 
+// Provider-specific request body size limits in bytes
+const XAI_MAX_REQUEST_BODY_BYTES: usize = 52_428_800; // 50MB
+const OPENAI_MAX_REQUEST_BODY_BYTES: usize = 104_857_600; // 100MB
+const DASHSCOPE_MAX_REQUEST_BODY_BYTES: usize = 6_291_456; // 6MB (observed limit in dogfood)
+
 impl OpenAiCompatConfig {
     #[must_use]
     pub const fn xai() -> Self {
@@ -45,6 +55,7 @@ impl OpenAiCompatConfig {
             api_key_env: "XAI_API_KEY",
             base_url_env: "XAI_BASE_URL",
             default_base_url: DEFAULT_XAI_BASE_URL,
+            max_request_body_bytes: XAI_MAX_REQUEST_BODY_BYTES,
         }
     }
 
@@ -55,6 +66,7 @@ impl OpenAiCompatConfig {
             api_key_env: "OPENAI_API_KEY",
             base_url_env: "OPENAI_BASE_URL",
             default_base_url: DEFAULT_OPENAI_BASE_URL,
+            max_request_body_bytes: OPENAI_MAX_REQUEST_BODY_BYTES,
         }
     }
 
@@ -69,6 +81,7 @@ impl OpenAiCompatConfig {
             api_key_env: "DASHSCOPE_API_KEY",
             base_url_env: "DASHSCOPE_BASE_URL",
             default_base_url: DEFAULT_DASHSCOPE_BASE_URL,
+            max_request_body_bytes: DASHSCOPE_MAX_REQUEST_BODY_BYTES,
         }
     }
 
@@ -249,6 +262,9 @@ impl OpenAiCompatClient {
         &self,
         request: &MessageRequest,
     ) -> Result<reqwest::Response, ApiError> {
+        // Pre-flight check: verify request body size against provider limits
+        check_request_body_size(request, self.config())?;
+
         let request_url = chat_completions_endpoint(&self.base_url);
         self.http
             .post(&request_url)
@@ -791,9 +807,41 @@ fn strip_routing_prefix(model: &str) -> &str {
     }
 }
 
+/// Estimate the serialized JSON size of a request payload in bytes.
+/// This is a pre-flight check to avoid hitting provider-specific size limits.
+pub fn estimate_request_body_size(request: &MessageRequest, config: OpenAiCompatConfig) -> usize {
+    let payload = build_chat_completion_request(request, config);
+    // serde_json::to_vec gives us the exact byte size of the serialized JSON
+    serde_json::to_vec(&payload).map_or(0, |v| v.len())
+}
+
+/// Pre-flight check for request body size against provider limits.
+/// Returns Ok(()) if the request is within limits, or an error with
+/// a clear message about the size limit being exceeded.
+pub fn check_request_body_size(
+    request: &MessageRequest,
+    config: OpenAiCompatConfig,
+) -> Result<(), ApiError> {
+    let estimated_bytes = estimate_request_body_size(request, config);
+    let max_bytes = config.max_request_body_bytes;
+
+    if estimated_bytes > max_bytes {
+        Err(ApiError::RequestBodySizeExceeded {
+            estimated_bytes,
+            max_bytes,
+            provider: config.provider_name,
+        })
+    } else {
+        Ok(())
+    }
+}
+
 /// Builds a chat completion request payload from a `MessageRequest`.
 /// Public for benchmarking purposes.
-pub fn build_chat_completion_request(request: &MessageRequest, config: OpenAiCompatConfig) -> Value {
+pub fn build_chat_completion_request(
+    request: &MessageRequest,
+    config: OpenAiCompatConfig,
+) -> Value {
     let mut messages = Vec::new();
     if let Some(system) = request.system.as_ref().filter(|value| !value.is_empty()) {
         messages.push(json!({
@@ -2031,4 +2079,102 @@ mod tests {
         assert_eq!(tool_msg_gpt["content"], json!("file contents"));
         assert_eq!(tool_msg_kimi["content"], json!("file contents"));
     }
+
+    // ============================================================================
+    // US-021: Request body size pre-flight check tests
+    // ============================================================================
+
+    #[test]
+    fn estimate_request_body_size_returns_reasonable_estimate() {
+        let request = MessageRequest {
+            model: "gpt-4o".to_string(),
+            max_tokens: 100,
+            messages: vec![InputMessage::user_text("Hello world".to_string())],
+            stream: false,
+            ..Default::default()
+        };
+
+        let size = super::estimate_request_body_size(&request, OpenAiCompatConfig::openai());
+        // Should be non-zero and reasonable for a small request
+        assert!(size > 0, "estimated size should be positive");
+        assert!(size < 10_000, "small request should be under 10KB");
+    }
+
+    #[test]
+    fn check_request_body_size_passes_for_small_requests() {
+        let request = MessageRequest {
+            model: "gpt-4o".to_string(),
+            max_tokens: 100,
+            messages: vec![InputMessage::user_text("Hello".to_string())],
+            stream: false,
+            ..Default::default()
+        };
+
+        // Should pass for all providers with a small request
+        assert!(super::check_request_body_size(&request, OpenAiCompatConfig::openai()).is_ok());
+        assert!(super::check_request_body_size(&request, OpenAiCompatConfig::xai()).is_ok());
+        assert!(super::check_request_body_size(&request, OpenAiCompatConfig::dashscope()).is_ok());
+    }
+
+    #[test]
+    fn check_request_body_size_fails_for_dashscope_when_exceeds_6mb() {
+        // Create a request that exceeds DashScope's 6MB limit
+        let large_content = "x".repeat(7_000_000); // 7MB of content
+        let request = MessageRequest {
+            model: "qwen-plus".to_string(),
+            max_tokens: 100,
+            messages: vec![InputMessage::user_text(large_content)],
+            stream: false,
+            ..Default::default()
+        };
+
+        let result = super::check_request_body_size(&request, OpenAiCompatConfig::dashscope());
+        assert!(result.is_err(), "should fail for 7MB request to DashScope");
+
+        let err = result.unwrap_err();
+        match err {
+            crate::error::ApiError::RequestBodySizeExceeded {
+                estimated_bytes,
+                max_bytes,
+                provider,
+            } => {
+                assert_eq!(provider, "DashScope");
+                assert_eq!(max_bytes, 6_291_456); // 6MB limit
+                assert!(estimated_bytes > max_bytes);
+            }
+            _ => panic!("expected RequestBodySizeExceeded error, got {:?}", err),
+        }
+    }
+
+    #[test]
+    fn check_request_body_size_allows_large_requests_for_openai() {
+        // Create a request that exceeds DashScope's limit but is under OpenAI's 100MB limit
+        let large_content = "x".repeat(10_000_000); // 10MB of content
+        let request = MessageRequest {
+            model: "gpt-4o".to_string(),
+            max_tokens: 100,
+            messages: vec![InputMessage::user_text(large_content)],
+            stream: false,
+            ..Default::default()
+        };
+
+        // Should pass for OpenAI (100MB limit)
+        assert!(
+            super::check_request_body_size(&request, OpenAiCompatConfig::openai()).is_ok(),
+            "10MB request should pass for OpenAI's 100MB limit"
+        );
+
+        // Should fail for DashScope (6MB limit)
+        assert!(
+            super::check_request_body_size(&request, OpenAiCompatConfig::dashscope()).is_err(),
+            "10MB request should fail for DashScope's 6MB limit"
+        );
+    }
+
+    #[test]
+    fn provider_specific_size_limits_are_correct() {
+        assert_eq!(OpenAiCompatConfig::dashscope().max_request_body_bytes, 6_291_456); // 6MB
+        assert_eq!(OpenAiCompatConfig::openai().max_request_body_bytes, 104_857_600); // 100MB
+        assert_eq!(OpenAiCompatConfig::xai().max_request_body_bytes, 52_428_800); // 50MB
+    }
 }

Original file line number	Diff line number	Diff line change
`@@ -63,6 +63,11 @@ pub enum ApiError {`
`63`	`63`	`attempt: u32,`
`64`	`64`	`base_delay: Duration,`
`65`	`65`	`},`
	`66`	`+ RequestBodySizeExceeded {`
	`67`	`+ estimated_bytes: usize,`
	`68`	`+ max_bytes: usize,`
	`69`	`+ provider: &'static str,`
	`70`	`+ },`
`66`	`71`	`}`
`67`	`72`
`68`	`73`	`impl ApiError {`
`@@ -129,7 +134,8 @@ impl ApiError {`
`129`	`134`	`\| Self::Io(_)`
`130`	`135`	`\| Self::Json { .. }`
`131`	`136`	`\| Self::InvalidSseFrame(_)`
`132`		`- \| Self::BackoffOverflow { .. } => false,`
	`137`	`+ \| Self::BackoffOverflow { .. }`
	`138`	`+ \| Self::RequestBodySizeExceeded { .. } => false,`
`133`	`139`	`}`
`134`	`140`	`}`
`135`	`141`
`@@ -147,7 +153,8 @@ impl ApiError {`
`147`	`153`	`\| Self::Io(_)`
`148`	`154`	`\| Self::Json { .. }`
`149`	`155`	`\| Self::InvalidSseFrame(_)`
`150`		`- \| Self::BackoffOverflow { .. } => None,`
	`156`	`+ \| Self::BackoffOverflow { .. }`
	`157`	`+ \| Self::RequestBodySizeExceeded { .. } => None,`
`151`	`158`	`}`
`152`	`159`	`}`
`153`	`160`
`@@ -172,6 +179,7 @@ impl ApiError {`
`172`	`179`	`"provider_transport"`
`173`	`180`	`}`
`174`	`181`	`Self::InvalidApiKeyEnv(_) \| Self::Io(_) \| Self::Json { .. } => "runtime_io",`
	`182`	`+ Self::RequestBodySizeExceeded { .. } => "request_size",`
`175`	`183`	`}`
`176`	`184`	`}`
`177`	`185`
`@@ -194,7 +202,8 @@ impl ApiError {`
`194`	`202`	`\| Self::Io(_)`
`195`	`203`	`\| Self::Json { .. }`
`196`	`204`	`\| Self::InvalidSseFrame(_)`
`197`		`- \| Self::BackoffOverflow { .. } => false,`
	`205`	`+ \| Self::BackoffOverflow { .. }`
	`206`	`+ \| Self::RequestBodySizeExceeded { .. } => false,`
`198`	`207`	`}`
`199`	`208`	`}`
`200`	`209`
`@@ -223,7 +232,8 @@ impl ApiError {`
`223`	`232`	`\| Self::Io(_)`
`224`	`233`	`\| Self::Json { .. }`
`225`	`234`	`\| Self::InvalidSseFrame(_)`
`226`		`- \| Self::BackoffOverflow { .. } => false,`
	`235`	`+ \| Self::BackoffOverflow { .. }`
	`236`	`+ \| Self::RequestBodySizeExceeded { .. } => false,`
`227`	`237`	`}`
`228`	`238`	`}`
`229`	`239`	`}`
`@@ -324,6 +334,16 @@ impl Display for ApiError {`
`324`	`334`	`f,`
`325`	`335`	`"retry backoff overflowed on attempt {attempt} with base delay {base_delay:?}"`
`326`	`336`	`),`
	`337`	`+ Self::RequestBodySizeExceeded {`
	`338`	`+ estimated_bytes,`
	`339`	`+ max_bytes,`
	`340`	`+ provider,`
	`341`	`+ } => write!(`
	`342`	`+ f,`
	`343`	`+ "request body size ({} bytes) exceeds {provider} limit ({} bytes); reduce prompt length or context before retrying",`
	`344`	`+ estimated_bytes,`
	`345`	`+ max_bytes`
	`346`	`+ ),`
`327`	`347`	`}`
`328`	`348`	`}`
`329`	`349`	`}`