From 0e7a5a89a474a424f15381f9c61bd03b072a4ccb Mon Sep 17 00:00:00 2001 From: "Niraj Chaudhari (Persistent Systems Inc)" Date: Mon, 18 May 2026 15:59:37 +0530 Subject: [PATCH 1/5] Fix for bug 43627 --- src/App/src/api/httpClient.ts | 2 +- src/App/src/pages/PlanPage.tsx | 28 ++++++++++++++++++----- src/App/src/store/WebSocketService.tsx | 31 ++++++++++++++++++++++++++ src/backend/v4/api/router.py | 23 +++++++++++++------ src/backend/v4/config/settings.py | 6 +++-- 5 files changed, 75 insertions(+), 15 deletions(-) diff --git a/src/App/src/api/httpClient.ts b/src/App/src/api/httpClient.ts index 866709c34..9c30256df 100644 --- a/src/App/src/api/httpClient.ts +++ b/src/App/src/api/httpClient.ts @@ -20,7 +20,7 @@ class HttpClient { private responseInterceptors: ResponseInterceptor[] = []; private timeout: number; - constructor(baseUrl = '', timeout = 30000) { + constructor(baseUrl = '', timeout = 180000) { this.baseUrl = baseUrl; this.timeout = timeout; } diff --git a/src/App/src/pages/PlanPage.tsx b/src/App/src/pages/PlanPage.tsx index 981323e9a..fe9799e71 100644 --- a/src/App/src/pages/PlanPage.tsx +++ b/src/App/src/pages/PlanPage.tsx @@ -182,19 +182,37 @@ const PlanPage: React.FC = () => { if (!planApprovalRequest) return; dispatch(setProcessingApproval(true)); const id = showToast('Submitting Approval', 'progress'); - try { - await apiService.approvePlan({ + + const submitApproval = () => + apiService.approvePlan({ m_plan_id: planApprovalRequest.id, plan_id: planData?.plan?.id ?? '', approved: true, feedback: 'Plan approved by user', }); + + try { + await submitApproval(); dismissToast(id); /* P0: single compound action replaces 3 separate dispatches */ dispatch(planApprovalAccepted()); - } catch { - dismissToast(id); - showToast('Failed to submit approval', 'error'); + } catch (firstError) { + // Approval failed — the backend may have timed out or the WS dropped. + // Reconnect the WebSocket and retry once before giving up. + try { + if (!webSocketService.isConnected() && planData?.plan?.id) { + await webSocketService.connect(planData.plan.id); + } + await submitApproval(); + dismissToast(id); + dispatch(planApprovalAccepted()); + } catch { + dismissToast(id); + showToast( + 'Failed to submit approval. The plan may have timed out — please start a new task and try again.', + 'error', + ); + } } finally { dispatch(setProcessingApproval(false)); } diff --git a/src/App/src/store/WebSocketService.tsx b/src/App/src/store/WebSocketService.tsx index 7fff80a56..9c532b208 100644 --- a/src/App/src/store/WebSocketService.tsx +++ b/src/App/src/store/WebSocketService.tsx @@ -11,6 +11,7 @@ class WebSocketService { private listeners: Map void>> = new Map(); private planSubscriptions: Set = new Set(); private reconnectTimer: ReturnType | null = null; + private keepaliveTimer: ReturnType | null = null; private isConnecting = false; private intentionalDisconnect = false; private lastPlanId: string | undefined; @@ -59,6 +60,7 @@ class WebSocketService { clearTimeout(this.reconnectTimer); this.reconnectTimer = null; } + this.startKeepalive(); this.emit('connection_status', { connected: true }); resolve(); }; @@ -75,6 +77,7 @@ class WebSocketService { this.ws.onclose = (event) => { this.isConnecting = false; this.ws = null; + this.stopKeepalive(); this.emit('connection_status', { connected: false }); /* P1: Only auto-reconnect if not intentional and not a clean close */ if (!this.intentionalDisconnect && event.code !== 1000 && @@ -99,6 +102,7 @@ class WebSocketService { disconnect(): void { this.intentionalDisconnect = true; + this.stopKeepalive(); if (this.reconnectTimer) { clearTimeout(this.reconnectTimer); this.reconnectTimer = null; @@ -197,6 +201,11 @@ class WebSocketService { } private handleMessage(message: StreamMessage): void { + // Ignore keepalive ping/pong messages from the server + const msgType = message.type as string; + if (msgType === 'ping' || msgType === 'pong') { + return; + } switch (message.type) { case WebsocketMessageType.PLAN_APPROVAL_REQUEST: { @@ -277,6 +286,28 @@ class WebSocketService { } } + private startKeepalive(): void { + this.stopKeepalive(); + // Send a ping every 30 seconds to prevent idle timeout from + // Azure Container Apps / reverse proxies closing the WebSocket. + this.keepaliveTimer = setInterval(() => { + if (this.ws && this.ws.readyState === WebSocket.OPEN) { + try { + this.ws.send(JSON.stringify({ type: 'pong' })); + } catch { + // If send fails, the onclose handler will trigger reconnect + } + } + }, 30_000); + } + + private stopKeepalive(): void { + if (this.keepaliveTimer) { + clearInterval(this.keepaliveTimer); + this.keepaliveTimer = null; + } + } + private attemptReconnect(): void { if (this.reconnectAttempts >= this.maxReconnectAttempts) { this.emit('error', { error: 'Max reconnection attempts reached' }); diff --git a/src/backend/v4/api/router.py b/src/backend/v4/api/router.py index 2a3d5fd97..e9ce3bf08 100644 --- a/src/backend/v4/api/router.py +++ b/src/backend/v4/api/router.py @@ -93,18 +93,26 @@ async def start_comms( # Keep the connection open - FastAPI will close the connection if this returns try: # Keep the connection open - FastAPI will close the connection if this returns + # Send periodic pings to prevent idle timeout from Azure infra / reverse proxies. + ping_interval = 30 # seconds + last_ping = asyncio.get_event_loop().time() while True: # no expectation that we will receive anything from the client but this keeps # the connection open and does not take cpu cycle try: - message = await websocket.receive_text() + message = await asyncio.wait_for( + websocket.receive_text(), timeout=ping_interval + ) + last_ping = asyncio.get_event_loop().time() logging.debug(f"Received WebSocket message from {user_id}: {message}") except asyncio.TimeoutError: - # Ignore timeouts to keep the WebSocket connection open, but avoid a tight loop. - logging.debug( - f"WebSocket receive timeout for user {user_id}, process {process_id}" - ) - await asyncio.sleep(0.1) + # No message received within ping_interval — send a ping to keep alive. + try: + await websocket.send_text('{"type":"ping"}') + last_ping = asyncio.get_event_loop().time() + except Exception: + logging.info(f"Ping failed for {user_id}/{process_id}, connection likely closed") + break except WebSocketDisconnect: dc_props = {"process_id": process_id, "user_id": user_id} if session_id: @@ -537,7 +545,8 @@ async def plan_approval( human_feedback.m_plan_id ) raise HTTPException( - status_code=404, detail="No active plan found for approval" + status_code=404, + detail="No active plan found for approval. The plan may have timed out due to inactivity. Please start a new task.", ) except Exception as e: logging.error(f"Error processing plan approval: {e}") diff --git a/src/backend/v4/config/settings.py b/src/backend/v4/config/settings.py index fa112fcd9..66bec660b 100644 --- a/src/backend/v4/config/settings.py +++ b/src/backend/v4/config/settings.py @@ -97,8 +97,10 @@ def __init__(self): self._approval_events: Dict[str, asyncio.Event] = {} self._clarification_events: Dict[str, asyncio.Event] = {} - # Default timeout (seconds) for waiting operations - self.default_timeout: float = 300.0 + # Default timeout (seconds) for waiting operations. + # Set to 30 minutes to allow users time to review plans before approving. + # Previous value of 300s (5 min) caused approval failures after idle periods. + self.default_timeout: float = 1800.0 def get_current_orchestration(self, user_id: str) -> Any: """Get existing orchestration workflow instance for user_id.""" From 2b02847a0c84b90d63665904cbb3bbb1db2cce50 Mon Sep 17 00:00:00 2001 From: "Niraj Chaudhari (Persistent Systems Inc)" Date: Tue, 19 May 2026 15:39:00 +0530 Subject: [PATCH 2/5] remove httpClient changes --- src/App/src/api/httpClient.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/App/src/api/httpClient.ts b/src/App/src/api/httpClient.ts index 9c30256df..866709c34 100644 --- a/src/App/src/api/httpClient.ts +++ b/src/App/src/api/httpClient.ts @@ -20,7 +20,7 @@ class HttpClient { private responseInterceptors: ResponseInterceptor[] = []; private timeout: number; - constructor(baseUrl = '', timeout = 180000) { + constructor(baseUrl = '', timeout = 30000) { this.baseUrl = baseUrl; this.timeout = timeout; } From 7eefb4ec9b0895966d495d390a08a8f911b367c7 Mon Sep 17 00:00:00 2001 From: "Niraj Chaudhari (Persistent Systems Inc)" Date: Tue, 19 May 2026 15:53:55 +0530 Subject: [PATCH 3/5] Remove Comment --- src/backend/v4/config/settings.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/v4/config/settings.py b/src/backend/v4/config/settings.py index 66bec660b..9c88488a8 100644 --- a/src/backend/v4/config/settings.py +++ b/src/backend/v4/config/settings.py @@ -98,8 +98,6 @@ def __init__(self): self._clarification_events: Dict[str, asyncio.Event] = {} # Default timeout (seconds) for waiting operations. - # Set to 30 minutes to allow users time to review plans before approving. - # Previous value of 300s (5 min) caused approval failures after idle periods. self.default_timeout: float = 1800.0 def get_current_orchestration(self, user_id: str) -> Any: From 8f8355e0747f50e1dc0135bcd7bbab7fd5d57d45 Mon Sep 17 00:00:00 2001 From: "Niraj Chaudhari (Persistent Systems Inc)" Date: Tue, 19 May 2026 16:01:59 +0530 Subject: [PATCH 4/5] Resolve pylint issue --- src/backend/v4/api/router.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/backend/v4/api/router.py b/src/backend/v4/api/router.py index e9ce3bf08..6c92776e3 100644 --- a/src/backend/v4/api/router.py +++ b/src/backend/v4/api/router.py @@ -95,7 +95,6 @@ async def start_comms( # Keep the connection open - FastAPI will close the connection if this returns # Send periodic pings to prevent idle timeout from Azure infra / reverse proxies. ping_interval = 30 # seconds - last_ping = asyncio.get_event_loop().time() while True: # no expectation that we will receive anything from the client but this keeps # the connection open and does not take cpu cycle @@ -103,13 +102,11 @@ async def start_comms( message = await asyncio.wait_for( websocket.receive_text(), timeout=ping_interval ) - last_ping = asyncio.get_event_loop().time() logging.debug(f"Received WebSocket message from {user_id}: {message}") except asyncio.TimeoutError: # No message received within ping_interval — send a ping to keep alive. try: await websocket.send_text('{"type":"ping"}') - last_ping = asyncio.get_event_loop().time() except Exception: logging.info(f"Ping failed for {user_id}/{process_id}, connection likely closed") break From e74ab69115be3bbfed2670e46cf2965973baabca Mon Sep 17 00:00:00 2001 From: "Niraj Chaudhari (Persistent Systems Inc)" Date: Tue, 19 May 2026 16:08:09 +0530 Subject: [PATCH 5/5] Update test settigs.py file as per changes --- src/tests/backend/v4/config/test_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tests/backend/v4/config/test_settings.py b/src/tests/backend/v4/config/test_settings.py index 1c055fe33..3d894c0ae 100644 --- a/src/tests/backend/v4/config/test_settings.py +++ b/src/tests/backend/v4/config/test_settings.py @@ -212,7 +212,7 @@ def test_orchestration_config_creation(self): self.assertEqual(config.max_rounds, 20) self.assertIsInstance(config._approval_events, dict) self.assertIsInstance(config._clarification_events, dict) - self.assertEqual(config.default_timeout, 300.0) + self.assertEqual(config.default_timeout, 1800.0) def test_get_current_orchestration(self): """Test getting current orchestration."""