From 689cc8b0b9eefb4cdee7130a1428ead88f86b0ea Mon Sep 17 00:00:00 2001 From: brown9804 Date: Sun, 1 Feb 2026 18:40:30 -0600 Subject: [PATCH 1/3] backend fix --- src/a2a/status_automation.ps1 | 31 +--- src/app/agents/local_agent_processor.py | 139 ++++++++++++++++- src/app/tools/singleAgentExample.py | 12 +- src/chat_app_multi_agent.py | 105 ++++++++++++- src/prompts/ShopperAgentPrompt.txt | 12 +- src/services/azure_auth.py | 4 + src/services/handoff_service.py | 6 + terraform-infrastructure/main.tf | 178 +++++++++++++++++++--- terraform-infrastructure/terraform.tfvars | 2 +- 9 files changed, 422 insertions(+), 67 deletions(-) diff --git a/src/a2a/status_automation.ps1 b/src/a2a/status_automation.ps1 index 27db885..fd08fe0 100644 --- a/src/a2a/status_automation.ps1 +++ b/src/a2a/status_automation.ps1 @@ -1,35 +1,18 @@ -param( - [string]$WebAppName = $env:WEB_APP_NAME, - [string]$StatusUrl = $env:A2A_AUTOMATION_STATUS_URL -) - -# Check A2A Automation Framework Status +# Check A2A Automation Framework Status Write-Host "Checking A2A Automation Framework status..." -$processes = Get-CimInstance Win32_Process -ErrorAction SilentlyContinue | - Where-Object { $_.CommandLine -like "*automated_main*" } - -if ($processes) { + = Get-Process -Name "python" -ErrorAction SilentlyContinue | Where-Object { .CommandLine -like "*automated_main*" } +if () { Write-Host "A2A Automation Framework is RUNNING" - Write-Host "Processes: $($processes.Count)" - $processes | Select-Object ProcessId,Name,CreationDate | Format-Table -AutoSize + Write-Host "Processes: 0" + | Format-Table Id,ProcessName,StartTime } else { Write-Host "A2A Automation Framework is STOPPED" } -# Build status URL dynamically -if (-not $StatusUrl -and $WebAppName) { - $StatusUrl = "https://$WebAppName.azurewebsites.net/a2a/automation/status" -} - -if (-not $StatusUrl) { - Write-Host "Automation endpoint not accessible (missing WebAppName or StatusUrl)" - return -} - # Check automation endpoint try { - $status = Invoke-RestMethod -Uri $StatusUrl -TimeoutSec 5 - Write-Host "Automation Status: $($status | ConvertTo-Json -Compress)" + = Invoke-RestMethod -Uri "https://zava-8e5461ee-app.azurewebsites.net/a2a/automation/status" -TimeoutSec 5 + Write-Host "Automation Status: " } catch { Write-Host "Automation endpoint not accessible" } diff --git a/src/app/agents/local_agent_processor.py b/src/app/agents/local_agent_processor.py index a54e592..ede9abe 100644 --- a/src/app/agents/local_agent_processor.py +++ b/src/app/agents/local_agent_processor.py @@ -1,5 +1,8 @@ import json import os +import logging +import traceback +import uuid from typing import List, Dict, Any, Generator from azure.ai.inference import ChatCompletionsClient from azure.core.credentials import AzureKeyCredential @@ -34,6 +37,12 @@ class LocalAgentProcessor: def __init__(self, agent_id: str, domain: str): self.agent_id = agent_id self.domain = domain + self.logger = logging.getLogger("local_agent_processor") + self._debug = os.getenv("A2A_DEBUG", "").lower() in {"1", "true", "yes"} + self._last_error_id: str | None = None + self._last_error_detail: str | None = None + if self._debug: + self.logger.setLevel(logging.DEBUG) # Initialize GPT client (shared across all agents) endpoint = ( @@ -58,6 +67,13 @@ def __init__(self, agent_id: str, domain: str): self.use_gpt = False self.client = None self.model = deployment + self._inference_endpoint: str | None = None + self._using_key_auth: bool = False + + # Default to managed identity / Entra ID auth in Azure. + # Only use key-based auth when explicitly enabled. + self._prefer_aad: bool = os.getenv("A2A_PREFER_AAD", "true").lower() in {"1", "true", "yes"} + self._allow_key_auth: bool = os.getenv("A2A_USE_KEY_AUTH", "").lower() in {"1", "true", "yes"} if endpoint and deployment: # Convert endpoint to Foundry format if needed @@ -67,21 +83,67 @@ def __init__(self, agent_id: str, domain: str): if not foundry_endpoint.endswith('/models'): foundry_endpoint = f"{foundry_endpoint.rstrip('/')}/models" + self._inference_endpoint = foundry_endpoint + try: - # Prefer key auth if present; otherwise use token-based auth (Managed Identity in cloud). - if api_key: + # Prefer token-based auth (Managed Identity in cloud). + # Keys are often disabled (disableLocalAuth) and should be opt-in. + if api_key and self._allow_key_auth and not self._prefer_aad: credential = AzureKeyCredential(api_key) + self._using_key_auth = True else: credential = get_inference_credential( api_key=None, default_credential=get_default_credential(), endpoint=foundry_endpoint, ) + self._using_key_auth = False self.client = ChatCompletionsClient(endpoint=foundry_endpoint, credential=credential) self.use_gpt = True except Exception: + self.logger.exception("Failed to initialize ChatCompletionsClient (endpoint=%s, deployment=%s)", foundry_endpoint, deployment) self.use_gpt = False + def _format_exception_detail(self, error_id: str, exc: Exception) -> str: + """Format a detailed, UI-safe error message for troubleshooting.""" + parts: list[str] = [] + parts.append(f"error_id={error_id}") + parts.append(f"agent_domain={self.domain}") + parts.append(f"model={self.model}") + parts.append(f"endpoint={self._inference_endpoint}") + parts.append(f"auth_mode={'key' if self._using_key_auth else 'aad'}") + + # Helpful identity context when running on Azure + azure_client_id = os.getenv("AZURE_CLIENT_ID") + if azure_client_id: + parts.append(f"AZURE_CLIENT_ID={azure_client_id}") + + parts.append(f"exception_type={type(exc).__name__}") + parts.append(f"exception={str(exc)}") + + # Try to extract HTTP response details if present + response = getattr(exc, "response", None) + if response is not None: + status_code = getattr(response, "status_code", None) + if status_code is not None: + parts.append(f"http_status={status_code}") + + headers = getattr(response, "headers", None) or {} + for header_name in ("x-ms-request-id", "x-ms-client-request-id", "x-ms-correlation-request-id"): + header_value = headers.get(header_name) + if header_value: + parts.append(f"{header_name}={header_value}") + + status_code = getattr(exc, "status_code", None) + if status_code is not None and "http_status=" not in "\n".join(parts): + parts.append(f"http_status={status_code}") + + # Include traceback only in debug mode + if self._debug: + parts.append("traceback=\n" + traceback.format_exc()) + + return "\n".join(parts) + def _call_gpt(self, user_message: str, conversation_history: List[Dict[str, str]] | None = None, additional_context: Dict[str, Any] | None = None) -> str: """Call GPT with domain-specific system prompt.""" if not self.use_gpt: @@ -115,10 +177,55 @@ def _call_gpt(self, user_message: str, conversation_history: List[Dict[str, str] temperature=0.7, max_tokens=500 ) - + self._last_error_id = None + self._last_error_detail = None return response.choices[0].message.content except Exception as e: - return f"I'm having trouble connecting right now. Error: {str(e)[:100]}" + # If we attempted key auth and the resource has local auth disabled, + # transparently retry once with Entra ID (managed identity) auth. + if self._using_key_auth and self._inference_endpoint: + try: + error_code = getattr(e, "error", None) + if hasattr(e, "response") and getattr(getattr(e, "response", None), "status_code", None) == 403: + # Heuristic: the common case we see is AuthenticationTypeDisabled. + msg = str(e) or "" + if "AuthenticationTypeDisabled" in msg or "Key based authentication is disabled" in msg: + self.logger.warning( + "Key auth disabled for inference endpoint; retrying with AAD (domain=%s, endpoint=%s)", + self.domain, + self._inference_endpoint, + ) + aad_cred = get_inference_credential( + api_key=None, + default_credential=get_default_credential(), + endpoint=self._inference_endpoint, + ) + self.client = ChatCompletionsClient(endpoint=self._inference_endpoint, credential=aad_cred) + self._using_key_auth = False + retry = self.client.complete( + messages=messages, + model=self.model, + temperature=0.7, + max_tokens=500, + ) + self._last_error_id = None + self._last_error_detail = None + return retry.choices[0].message.content + except Exception: + # Fall through to normal error handling + pass + + error_id = uuid.uuid4().hex + self._last_error_id = error_id + self._last_error_detail = self._format_exception_detail(error_id, e) + self.logger.exception( + "GPT call failed (error_id=%s, domain=%s, endpoint=%s, model=%s)", + error_id, + self.domain, + self._inference_endpoint, + self.model, + ) + return f"I'm having trouble connecting right now. (error_id={error_id})" def _interior_design(self, user_message: str, conversation_history: List[Dict[str, str]] | None = None, additional_context: Dict[str, Any] | None = None) -> Dict[str, Any]: # Check if this is an image generation request @@ -154,12 +261,20 @@ def _interior_design(self, user_message: str, conversation_history: List[Dict[st def _inventory(self, user_message: str, conversation_history: List[Dict[str, str]] | None = None, additional_context: Dict[str, Any] | None = None) -> Dict[str, Any]: answer = self._call_gpt(user_message, conversation_history, additional_context) - return {"answer": answer} + result: Dict[str, Any] = {"answer": answer} + if self._last_error_detail: + result["error"] = self._last_error_detail + result["error_id"] = self._last_error_id + return result def _customer_loyalty(self, customer_id: str | None, conversation_history: List[Dict[str, str]] | None = None, additional_context: Dict[str, Any] | None = None) -> Dict[str, Any]: user_message = f"Check loyalty benefits for customer {customer_id or 'current customer'}" answer = self._call_gpt(user_message, conversation_history, additional_context) - return {"answer": answer, "discount_percentage": "10"} + result: Dict[str, Any] = {"answer": answer, "discount_percentage": "10"} + if self._last_error_detail: + result["error"] = self._last_error_detail + result["error_id"] = self._last_error_id + return result def _cart_management(self, user_message: str, conversation_history: List[Dict[str, str]] | None = None, additional_context: Dict[str, Any] | None = None) -> Dict[str, Any]: cart = additional_context.get("cart", []) if additional_context else [] @@ -176,11 +291,19 @@ def _cart_management(self, user_message: str, conversation_history: List[Dict[st # Get GPT response about the cart action answer = self._call_gpt(user_message, conversation_history, {"cart": cart}) - return {"answer": answer, "cart": cart} + result: Dict[str, Any] = {"answer": answer, "cart": cart} + if self._last_error_detail: + result["error"] = self._last_error_detail + result["error_id"] = self._last_error_id + return result def _cora(self, user_message: str, conversation_history: List[Dict[str, str]] | None = None, additional_context: Dict[str, Any] | None = None) -> Dict[str, Any]: answer = self._call_gpt(user_message, conversation_history, additional_context) - return {"answer": answer} + result: Dict[str, Any] = {"answer": answer} + if self._last_error_detail: + result["error"] = self._last_error_detail + result["error_id"] = self._last_error_id + return result def run_conversation_with_text_stream( self, diff --git a/src/app/tools/singleAgentExample.py b/src/app/tools/singleAgentExample.py index d072ddb..1bb6f23 100644 --- a/src/app/tools/singleAgentExample.py +++ b/src/app/tools/singleAgentExample.py @@ -1,5 +1,6 @@ import os import time +import logging from azure.ai.inference import ChatCompletionsClient from azure.core.credentials import AzureKeyCredential from azure.identity import DefaultAzureCredential @@ -29,6 +30,9 @@ # Global client instance client = None +logger = logging.getLogger("single_agent_example") +if os.getenv("A2A_DEBUG", "").lower() in {"1", "true", "yes"}: + logging.basicConfig(level=logging.DEBUG) def get_client(): """Lazily initialize and return the MSFT Foundry client""" @@ -115,7 +119,8 @@ def generate_response(text_input): ] # Call MSFT Foundry chat API - response = client.complete( + try: + response = client.complete( model=deployment, messages=messages, max_tokens=10000, @@ -123,7 +128,10 @@ def generate_response(text_input): top_p=1.0, frequency_penalty=0, presence_penalty=0 - ) + ) + except Exception: + logger.exception("singleAgentExample GPT call failed (endpoint=%s, deployment=%s)", getattr(client, "_endpoint", None), deployment) + raise end_sum = time.time() print(f"generate_response Execution Time: {end_sum - start_time} seconds") diff --git a/src/chat_app_multi_agent.py b/src/chat_app_multi_agent.py index b09a4d5..b217475 100644 --- a/src/chat_app_multi_agent.py +++ b/src/chat_app_multi_agent.py @@ -1,6 +1,8 @@ import os import logging import json +import uuid +import traceback from typing import Any, Dict from fastapi import FastAPI, WebSocket, WebSocketDisconnect, Request from fastapi.responses import HTMLResponse @@ -28,6 +30,17 @@ ) logger = logging.getLogger(__name__) + +def _debug_enabled() -> bool: + return os.getenv("A2A_DEBUG", "").lower() in {"1", "true", "yes"} + + +def _format_exception_for_client(error_id: str, exc: Exception) -> str: + parts: list[str] = [f"error_id={error_id}", f"exception_type={type(exc).__name__}", f"exception={str(exc)}"] + if _debug_enabled(): + parts.append("traceback=\n" + traceback.format_exc()) + return "\n".join(parts) + # Initialize FastAPI app app = FastAPI(title="Zava AI Shopping Assistant") @@ -67,8 +80,58 @@ def _extract_plain_answer(raw: str) -> str: return inner.strip() except Exception: pass + + # Support legacy non-JSON "answer: ...\nimage_output: ...\nproducts: ..." blocks. + legacy = _parse_legacy_kv_block(text) + if legacy and isinstance(legacy.get("answer"), str): + return legacy["answer"].strip() return text + +def _parse_legacy_kv_block(text: str) -> Dict[str, Any] | None: + """Parse legacy key-value blocks emitted by older prompts. + + Example input: + answer: hello there + image_output: [] + products: [] + + Returns a dict with keys when recognized, else None. + """ + if not text: + return None + + lines = [ln.strip() for ln in text.splitlines() if ln.strip()] + if not lines: + return None + + # Quick reject: must contain at least an answer line. + has_answer = any(ln.lower().startswith("answer:") for ln in lines) + if not has_answer: + return None + + parsed: Dict[str, Any] = {} + for line in lines: + # Only parse simple "key: value" lines. + if ":" not in line: + continue + key, value = line.split(":", 1) + key = key.strip().lower() + value = value.strip().rstrip(",") + + if key in {"answer", "image_output", "products"}: + if key == "answer": + parsed["answer"] = value + continue + + # Try to JSON-decode arrays/objects, otherwise keep as string. + try: + parsed[key] = json.loads(value) + except Exception: + parsed[key] = value + + return parsed or None + def _flatten_response_json(response_json: Dict[str, Any]) -> str: """Derive a single natural language answer from structured fields.""" base = response_json.get('answer') or '' @@ -242,6 +305,27 @@ async def websocket_endpoint(websocket: WebSocket): parsed_json = None if parsed_json: + # Some prompts return a JSON object as a *string* inside the + # outer {"answer": "..."} wrapper (local agents). Lift it. + if isinstance(parsed_json.get("answer"), str): + inner_text = parsed_json["answer"].strip() + if inner_text.startswith("{") and '"answer"' in inner_text: + try: + inner_obj = json.loads(inner_text) + if isinstance(inner_obj, dict): + # Prefer inner values for these well-known fields. + for key in ("answer", "products", "image_output", "cart", "discount_percentage", "image_url"): + if key in inner_obj and inner_obj[key] is not None: + parsed_json[key] = inner_obj[key] + except Exception: + pass + + # Some older prompts embed structured fields inside the + # answer string (e.g., "answer: ...\nproducts: []"). Normalize. + legacy = _parse_legacy_kv_block(parsed_json["answer"]) + if legacy: + parsed_json.update({k: v for k, v in legacy.items() if v is not None}) + if "cart" in parsed_json and isinstance(parsed_json["cart"], list): persistent_cart = parsed_json["cart"] if "discount_percentage" in parsed_json and parsed_json["discount_percentage"]: @@ -262,6 +346,20 @@ async def websocket_endpoint(websocket: WebSocket): "cart": persistent_cart, "discount": customer_discount } + + # Forward structured fields if present. + if parsed_json: + if "products" in parsed_json: + response_data["products"] = parsed_json.get("products") + if "image_output" in parsed_json: + response_data["image_output"] = parsed_json.get("image_output") + + # If the agent returned diagnostic error details, forward them to the UI. + if parsed_json: + if isinstance(parsed_json.get("error"), str): + response_data["error"] = parsed_json["error"] + if parsed_json.get("error_id") is not None: + response_data["error_id"] = parsed_json.get("error_id") # Include image URL if available if image_url: @@ -285,10 +383,11 @@ async def websocket_endpoint(websocket: WebSocket): logger.info("Response sent successfully from single agent") except Exception as e: - logger.error("Error during response generation", exc_info=True) + error_id = uuid.uuid4().hex + logger.error("Error during response generation (error_id=%s)", error_id, exc_info=True) await websocket.send_text(fast_json_dumps({ - "answer": "I'm sorry, I encountered an error processing your request. Please try again.", - "error": str(e), + "answer": f"I'm sorry, I encountered an error processing your request. Please try again. (error_id={error_id})", + "error": _format_exception_for_client(error_id, e), "cart": persistent_cart })) diff --git a/src/prompts/ShopperAgentPrompt.txt b/src/prompts/ShopperAgentPrompt.txt index 1e00cb4..2fd5192 100644 --- a/src/prompts/ShopperAgentPrompt.txt +++ b/src/prompts/ShopperAgentPrompt.txt @@ -2,11 +2,15 @@ Shopper Agent Guidelines ======================================== - You are the public facing assistant of Zava - Greet people and help them as needed -- Return response in following json format (image_output and products empty) +- Return response as a valid JSON object with the following shape: -answer: your answer, -image_output: [] -products: [] +{ + "answer": "", + "image_output": [], + "products": [] +} + +IMPORTANT: Your entire response must be valid JSON only (no extra text). Shopper Agent Tool diff --git a/src/services/azure_auth.py b/src/services/azure_auth.py index 5bc00f2..1d4a7b6 100644 --- a/src/services/azure_auth.py +++ b/src/services/azure_auth.py @@ -2,6 +2,7 @@ import os from dataclasses import dataclass +import logging from typing import Sequence from azure.core.credentials import AccessToken, TokenCredential @@ -11,6 +12,8 @@ COGNITIVE_SERVICES_SCOPE = "https://cognitiveservices.azure.com/.default" AI_FOUNDRY_SCOPE = "https://ai.azure.com/.default" +logger = logging.getLogger("azure_auth") + @dataclass(frozen=True) class FixedScopeTokenCredential(TokenCredential): @@ -49,6 +52,7 @@ def get_inference_credential( scope = COGNITIVE_SERVICES_SCOPE if endpoint and "services.ai.azure.com" in endpoint: scope = AI_FOUNDRY_SCOPE + logger.debug("Using token scope %s for endpoint %s", scope, endpoint) return FixedScopeTokenCredential(default_credential, scope=scope) diff --git a/src/services/handoff_service.py b/src/services/handoff_service.py index 4393e2f..0ed0b47 100644 --- a/src/services/handoff_service.py +++ b/src/services/handoff_service.py @@ -4,6 +4,7 @@ """ import os import json +import logging from typing import Dict, Any, Optional from pydantic import BaseModel from azure.ai.inference import ChatCompletionsClient @@ -37,6 +38,9 @@ class HandoffService: def __init__(self): """Initialize the handoff service with GPT client""" + self.logger = logging.getLogger("handoff_service") + if os.getenv("A2A_DEBUG", "").lower() in {"1", "true", "yes"}: + logging.basicConfig(level=logging.DEBUG) endpoint = ( os.getenv("gpt_endpoint") or os.getenv("AZURE_OPENAI_ENDPOINT") @@ -63,6 +67,7 @@ def __init__(self): foundry_endpoint = foundry_endpoint.replace('.services.azure.com', '.services.ai.azure.com') if not foundry_endpoint.endswith('/models'): foundry_endpoint = f"{foundry_endpoint.rstrip('/')}/models" + self.foundry_endpoint = foundry_endpoint if api_key: credential = AzureKeyCredential(api_key) @@ -177,6 +182,7 @@ def classify_intent( } except Exception as e: + self.logger.exception("Intent classification failed (endpoint=%s, deployment=%s)", self.foundry_endpoint, self.deployment) # Default to cora on error return { "domain": "cora", diff --git a/terraform-infrastructure/main.tf b/terraform-infrastructure/main.tf index 5111645..8c9ac9e 100644 --- a/terraform-infrastructure/main.tf +++ b/terraform-infrastructure/main.tf @@ -13,6 +13,9 @@ resource "random_id" "suffix" { byte_length = 4 } +# Stable GUID for custom role definitions +resource "random_uuid" "maas_inference_role_id" {} + locals { # Use provided user_principal_id or default to current Azure CLI user principal_id = var.user_principal_id != null ? var.user_principal_id : data.azurerm_client_config.current.object_id @@ -46,6 +49,30 @@ locals { deploy_to_container_apps = var.deployment_target == "containerapps" } +# Custom role to allow Azure AI Foundry MaaS inference endpoints via AAD. +# Some built-in roles don't include the MaaS dataActions required by the +# /models/chat/completions endpoint. +resource "azurerm_role_definition" "maas_inference_user" { + name = "${var.name_prefix}-${local.suffix}-maas-inference-user" + role_definition_id = random_uuid.maas_inference_role_id.result + scope = "/subscriptions/${data.azurerm_client_config.current.subscription_id}" + description = "Allows calling Azure AI Foundry MaaS chat/embeddings inference endpoints." + + permissions { + actions = [] + data_actions = [ + "Microsoft.CognitiveServices/accounts/MaaS/chat/completions/action", + "Microsoft.CognitiveServices/accounts/MaaS/embeddings/action", + ] + not_actions = [] + not_data_actions = [] + } + + assignable_scopes = [ + "/subscriptions/${data.azurerm_client_config.current.subscription_id}", + ] +} + resource "azurerm_cosmosdb_account" "cosmos" { name = local.cosmos_account_name location = azurerm_resource_group.rg.location @@ -108,7 +135,8 @@ resource "azapi_resource" "storage" { } # AI Foundry account (preview) using AzAPI provider. -# Using managed identity authentication (disableLocalAuth = true for better security) +# Managed identity is used by the app, but local auth must remain enabled for some +# automation steps (e.g., index/vectorizer configuration that requires an API key). resource "azapi_resource" "ai_foundry" { type = "Microsoft.CognitiveServices/accounts@2024-10-01" name = local.ai_foundry_name @@ -122,7 +150,8 @@ resource "azapi_resource" "ai_foundry" { properties = { allowProjectManagement = true customSubDomainName = local.ai_foundry_name - disableLocalAuth = true + disableLocalAuth = false + publicNetworkAccess = "Enabled" } }) } @@ -348,6 +377,15 @@ resource "azurerm_container_app" "app" { value = "8000" } + env { + name = "A2A_DEBUG" + value = "true" + } + env { + name = "APP_BUILD_ID" + value = local.app_source_hash + } + env { name = "USE_MULTI_AGENT" value = var.enable_multi_agent ? "true" : "false" @@ -362,12 +400,12 @@ resource "azurerm_container_app" "app" { value = local.ai_project_name } env { - name = "AZURE_AI_PROJECT_ENDPOINT" - value = "https://${local.ai_foundry_name}.services.ai.azure.com/api/projects/${local.ai_project_name}" + name = "AZURE_AI_PROJECT_ENDPOINT" + secret_name = "agent-endpoint" } env { - name = "AZURE_AI_AGENT_ENDPOINT" - value = "https://${local.ai_foundry_name}.services.ai.azure.com/api/projects/${local.ai_project_name}" + name = "AZURE_AI_AGENT_ENDPOINT" + secret_name = "agent-endpoint" } env { name = "AZURE_AI_AGENT_MODEL_DEPLOYMENT_NAME" @@ -463,6 +501,14 @@ resource "azurerm_container_app" "app" { name = "SEARCH_SERVICE_KEY" secret_name = "search-service-key" } + env { + name = "AZURE_OPENAI_API_KEY" + secret_name = "ai-foundry-key" + } + env { + name = "gpt_api_key" + secret_name = "ai-foundry-key" + } env { name = "STORAGE_CONNECTION_STRING" secret_name = "storage-connection-string" @@ -480,7 +526,7 @@ resource "azurerm_container_app" "app" { for_each = var.enable_cosmos_local_auth ? [] : [1] content { name = "COSMOS_DB_KEY" - value = "AAD_AUTH" + value = "" } } @@ -526,6 +572,9 @@ resource "azurerm_container_app" "app" { value = data.external.agents_state.result["agent_cart_manager_id"] } } + + min_replicas = 1 + max_replicas = 2 } ingress { @@ -553,6 +602,16 @@ resource "azurerm_container_app" "app" { key_vault_secret_id = "${azurerm_key_vault.kv.vault_uri}secrets/storage-connection-string" identity = azurerm_user_assigned_identity.containerapp_identity[0].id } + secret { + name = "agent-endpoint" + key_vault_secret_id = "${azurerm_key_vault.kv.vault_uri}secrets/agent-endpoint" + identity = azurerm_user_assigned_identity.containerapp_identity[0].id + } + secret { + name = "ai-foundry-key" + key_vault_secret_id = "${azurerm_key_vault.kv.vault_uri}secrets/ai-foundry-key" + identity = azurerm_user_assigned_identity.containerapp_identity[0].id + } dynamic "secret" { for_each = var.enable_cosmos_local_auth ? [1] : [] content { @@ -567,6 +626,10 @@ resource "azurerm_container_app" "app" { azurerm_user_assigned_identity.containerapp_identity, azurerm_role_assignment.kv_secrets_user_containerapp, azurerm_role_assignment.containerapp_acr_pull, + azapi_resource.containerapp_cosmos_data_contributor, + azurerm_role_assignment.containerapp_foundry_openai_user, + azurerm_role_assignment.containerapp_project_openai_user, + azurerm_role_assignment.containerapp_project_ai_user, null_resource.docker_image_build, null_resource.set_kv_secrets, null_resource.set_agent_kv_secrets @@ -729,6 +792,7 @@ resource "azurerm_linux_web_app" "app" { site_config { always_on = true http2_enabled = true + websockets_enabled = true minimum_tls_version = "1.2" # Ensure App Service waits for container readiness health_check_path = "/health" @@ -746,6 +810,8 @@ resource "azurerm_linux_web_app" "app" { WEBSITES_ENABLE_APP_SERVICE_STORAGE = "false" DOCKER_ENABLE_CI = "true" WEBSITES_PORT = "8000" + A2A_DEBUG = "true" + APP_BUILD_ID = local.app_source_hash # GPT Configuration (using managed identity) gpt_endpoint = "https://${local.ai_foundry_name}.cognitiveservices.azure.com/" @@ -766,7 +832,7 @@ resource "azurerm_linux_web_app" "app" { # External Service Keys via Key Vault SEARCH_SERVICE_KEY = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/search-admin-key)" - COSMOS_DB_KEY = var.enable_cosmos_local_auth ? "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/cosmos-primary-key)" : "AAD_AUTH" + COSMOS_DB_KEY = var.enable_cosmos_local_auth ? "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/cosmos-primary-key)" : "" STORAGE_CONNECTION_STRING = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/storage-connection-string)" # Multi-Agent Configuration - Agent IDs from Key Vault @@ -905,12 +971,43 @@ resource "null_resource" "set_kv_secrets" { command = <<-EOT $kv = "${azurerm_key_vault.kv.name}" Write-Host "Setting Key Vault secrets (search/storage/cosmos/agent-endpoint)..." + + # Foundry endpoint (authoritative) -> also derive Agents API endpoint + $rawAiFoundryEndpoint = az cognitiveservices account show ` + --resource-group "${azurerm_resource_group.rg.name}" ` + --name "${local.ai_foundry_name}" ` + --query "properties.endpoint" ` + --output tsv + $agentEndpointBase = $rawAiFoundryEndpoint -replace "cognitiveservices\.azure\.com", "services.ai.azure.com" + $agentEndpointBase = $agentEndpointBase.TrimEnd("/") + $agentsProjectEndpoint = "$agentEndpointBase/api/projects/${local.ai_project_name}" + az keyvault secret set --vault-name $kv --name "search-admin-key" --value "${jsondecode(data.azapi_resource_action.search_admin_keys[0].output).primaryKey}" | Out-Null az keyvault secret set --vault-name $kv --name "storage-connection-string" --value "DefaultEndpointsProtocol=https;AccountName=${local.storage_account};AccountKey=${jsondecode(data.azapi_resource_action.storage_keys_unconditional.output).keys[0].value};EndpointSuffix=core.windows.net" | Out-Null + # Required for local automation/pipelines (some SDKs require an API key) + # The Foundry account can take a while to become terminal; retry key fetch. + $aiFoundryKey = $null + for ($i = 0; $i -lt 90; $i++) { + try { + $aiFoundryKey = az cognitiveservices account keys list ` + --resource-group "${azurerm_resource_group.rg.name}" ` + --name "${local.ai_foundry_name}" ` + --query "key1" ` + --output tsv + if ($aiFoundryKey) { break } + } catch { + # ignore and retry + } + Start-Sleep -Seconds 10 + } + if (-not $aiFoundryKey) { + throw "Timed out waiting for AI Foundry keys. Try re-running terraform apply after the account finishes provisioning." + } + az keyvault secret set --vault-name $kv --name "ai-foundry-key" --value $aiFoundryKey | Out-Null if (${var.enable_cosmos_local_auth ? "$true" : "$false"}) { az keyvault secret set --vault-name $kv --name "cosmos-primary-key" --value "${jsondecode(data.azapi_resource_action.cosmos_keys[0].output).primaryMasterKey}" | Out-Null } - az keyvault secret set --vault-name $kv --name "agent-endpoint" --value "https://${local.ai_foundry_name}.services.ai.azure.com/api/projects/${local.ai_project_name}" | Out-Null + az keyvault secret set --vault-name $kv --name "agent-endpoint" --value $agentsProjectEndpoint | Out-Null EOT interpreter = ["PowerShell", "-Command"] working_dir = path.module @@ -1189,13 +1286,13 @@ locals { # Assign Cosmos DB Built-in Data Contributor role to specified user principal resource "azapi_resource" "cosmos_user_data_contributor" { type = "Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-04-15" - name = md5("${azurerm_cosmosdb_account.cosmos.id}-${local.principal_id}-${local.cosmos_db_data_contributor_role_id}") + name = md5("${azurerm_cosmosdb_account.cosmos.id}-${local.principal_id}-${local.cosmos_db_data_contributor_role_id}-${azurerm_cosmosdb_sql_database.cosmosdb.name}") parent_id = azurerm_cosmosdb_account.cosmos.id body = jsonencode({ properties = { roleDefinitionId = "${azurerm_cosmosdb_account.cosmos.id}/sqlRoleDefinitions/${local.cosmos_db_data_contributor_role_id}" principalId = local.principal_id - scope = azurerm_cosmosdb_account.cosmos.id + scope = "${azurerm_cosmosdb_account.cosmos.id}/dbs/${azurerm_cosmosdb_sql_database.cosmosdb.name}" } }) } @@ -1204,16 +1301,19 @@ resource "azapi_resource" "cosmos_user_data_contributor" { resource "azapi_resource" "containerapp_cosmos_data_contributor" { count = local.deploy_to_container_apps ? 1 : 0 type = "Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-04-15" - name = md5("${azurerm_cosmosdb_account.cosmos.id}-${azurerm_user_assigned_identity.containerapp_identity[0].principal_id}-${local.cosmos_db_data_contributor_role_id}") + name = md5("${azurerm_cosmosdb_account.cosmos.id}-${azurerm_user_assigned_identity.containerapp_identity[0].principal_id}-${local.cosmos_db_data_contributor_role_id}-${azurerm_cosmosdb_sql_database.cosmosdb.name}") parent_id = azurerm_cosmosdb_account.cosmos.id body = jsonencode({ properties = { roleDefinitionId = "${azurerm_cosmosdb_account.cosmos.id}/sqlRoleDefinitions/${local.cosmos_db_data_contributor_role_id}" principalId = azurerm_user_assigned_identity.containerapp_identity[0].principal_id - scope = azurerm_cosmosdb_account.cosmos.id + scope = "${azurerm_cosmosdb_account.cosmos.id}/dbs/${azurerm_cosmosdb_sql_database.cosmosdb.name}" } }) - depends_on = [azurerm_container_app.app] + depends_on = [ + azurerm_user_assigned_identity.containerapp_identity, + azurerm_cosmosdb_sql_database.cosmosdb + ] } # Role assignments for Search managed identity @@ -1226,26 +1326,26 @@ resource "azurerm_role_assignment" "search_cosmos_account_reader" { resource "azapi_resource" "search_cosmos_data_reader" { type = "Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-04-15" - name = md5("${azurerm_cosmosdb_account.cosmos.id}-${azurerm_search_service.search.identity[0].principal_id}-${local.cosmos_db_data_reader_role_id}") + name = md5("${azurerm_cosmosdb_account.cosmos.id}-${azurerm_search_service.search.identity[0].principal_id}-${local.cosmos_db_data_reader_role_id}-${azurerm_cosmosdb_sql_database.cosmosdb.name}") parent_id = azurerm_cosmosdb_account.cosmos.id body = jsonencode({ properties = { roleDefinitionId = "${azurerm_cosmosdb_account.cosmos.id}/sqlRoleDefinitions/${local.cosmos_db_data_reader_role_id}" principalId = azurerm_search_service.search.identity[0].principal_id - scope = azurerm_cosmosdb_account.cosmos.id + scope = "${azurerm_cosmosdb_account.cosmos.id}/dbs/${azurerm_cosmosdb_sql_database.cosmosdb.name}" } }) } resource "azapi_resource" "search_cosmos_data_contributor" { type = "Microsoft.DocumentDB/databaseAccounts/sqlRoleAssignments@2023-04-15" - name = md5("${azurerm_cosmosdb_account.cosmos.id}-${azurerm_search_service.search.identity[0].principal_id}-${local.cosmos_db_data_contributor_role_id}") + name = md5("${azurerm_cosmosdb_account.cosmos.id}-${azurerm_search_service.search.identity[0].principal_id}-${local.cosmos_db_data_contributor_role_id}-${azurerm_cosmosdb_sql_database.cosmosdb.name}") parent_id = azurerm_cosmosdb_account.cosmos.id body = jsonencode({ properties = { roleDefinitionId = "${azurerm_cosmosdb_account.cosmos.id}/sqlRoleDefinitions/${local.cosmos_db_data_contributor_role_id}" principalId = azurerm_search_service.search.identity[0].principal_id - scope = azurerm_cosmosdb_account.cosmos.id + scope = "${azurerm_cosmosdb_account.cosmos.id}/dbs/${azurerm_cosmosdb_sql_database.cosmosdb.name}" } }) } @@ -1282,6 +1382,15 @@ resource "azurerm_role_assignment" "webapp_foundry_openai_user" { depends_on = [azurerm_linux_web_app.app] } +resource "azurerm_role_assignment" "webapp_foundry_maas_inference_user" { + count = local.deploy_to_appservice ? 1 : 0 + scope = azapi_resource.ai_foundry.id + role_definition_id = azurerm_role_definition.maas_inference_user.role_definition_resource_id + principal_id = data.azurerm_linux_web_app.app_identity[0].identity[0].principal_id + principal_type = "ServicePrincipal" + depends_on = [azurerm_linux_web_app.app, azapi_resource.ai_foundry, azurerm_role_definition.maas_inference_user] +} + resource "azurerm_role_assignment" "webapp_project_openai_user" { count = local.deploy_to_appservice ? 1 : 0 scope = azapi_resource.ai_project.id @@ -1298,7 +1407,16 @@ resource "azurerm_role_assignment" "containerapp_foundry_openai_user" { role_definition_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/providers/Microsoft.Authorization/roleDefinitions/${local.cognitive_openai_user_role_id}" principal_id = azurerm_user_assigned_identity.containerapp_identity[0].principal_id principal_type = "ServicePrincipal" - depends_on = [azurerm_container_app.app] + depends_on = [azurerm_user_assigned_identity.containerapp_identity, azapi_resource.ai_foundry] +} + +resource "azurerm_role_assignment" "containerapp_foundry_maas_inference_user" { + count = local.deploy_to_container_apps ? 1 : 0 + scope = azapi_resource.ai_foundry.id + role_definition_id = azurerm_role_definition.maas_inference_user.role_definition_resource_id + principal_id = azurerm_user_assigned_identity.containerapp_identity[0].principal_id + principal_type = "ServicePrincipal" + depends_on = [azurerm_user_assigned_identity.containerapp_identity, azapi_resource.ai_foundry, azurerm_role_definition.maas_inference_user] } resource "azurerm_role_assignment" "containerapp_project_openai_user" { @@ -1307,7 +1425,16 @@ resource "azurerm_role_assignment" "containerapp_project_openai_user" { role_definition_id = "/subscriptions/${data.azurerm_client_config.current.subscription_id}/providers/Microsoft.Authorization/roleDefinitions/${local.cognitive_openai_user_role_id}" principal_id = azurerm_user_assigned_identity.containerapp_identity[0].principal_id principal_type = "ServicePrincipal" - depends_on = [azurerm_container_app.app] + depends_on = [azurerm_user_assigned_identity.containerapp_identity, azapi_resource.ai_project] +} + +resource "azurerm_role_assignment" "containerapp_project_ai_user" { + count = local.deploy_to_container_apps ? 1 : 0 + scope = azapi_resource.ai_project.id + role_definition_name = "Azure AI User" + principal_id = azurerm_user_assigned_identity.containerapp_identity[0].principal_id + principal_type = "ServicePrincipal" + depends_on = [azurerm_user_assigned_identity.containerapp_identity, azapi_resource.ai_project] } # Grant AcrPull role to Container App managed identity for ACR pulls @@ -1463,7 +1590,7 @@ data "azapi_resource_action" "cosmos_keys" { depends_on = [azurerm_cosmosdb_account.cosmos] } -# AI Foundry now uses managed identity authentication - no keys needed +# AI Foundry uses managed identity for the app, but automation may still require keys # Connect resources to MSFT Foundry project using ARM templates resource "azapi_resource" "storage_connection" { @@ -1679,7 +1806,7 @@ resource "null_resource" "create_env_file" { $searchKey = az keyvault secret show --vault-name $kv --name search-admin-key --query value -o tsv if (${var.enable_cosmos_local_auth ? "$true" : "$false"}) { $cosmosKey = az keyvault secret show --vault-name $kv --name cosmos-primary-key --query value -o tsv - } else { $cosmosKey = "AAD_AUTH" } + } else { $cosmosKey = "" } $storageConnectionString = az keyvault secret show --vault-name $kv --name storage-connection-string --query value -o tsv # Create .env file content @@ -1691,7 +1818,7 @@ AZURE_AI_PROJECT_NAME=${local.ai_project_name} AZURE_AI_AGENT_ENDPOINT=$agentsProjectEndpoint # Azure OpenAI Model Deployments - AZURE_OPENAI_CHAT_DEPLOYMENT=${var.chat_model_deployment} +AZURE_OPENAI_CHAT_DEPLOYMENT=${var.chat_model_deployment} AZURE_OPENAI_EMBEDDING_DEPLOYMENT=text-embedding-3-small AZURE_OPENAI_ENDPOINT=$openAiEndpoint AZURE_OPENAI_API_KEY=$aiFoundryKey @@ -1699,7 +1826,7 @@ AZURE_OPENAI_API_VERSION=2024-02-01 # GPT Model Configuration (for single-agent chat) gpt_endpoint=$openAiEndpoint - gpt_deployment=${var.chat_model_deployment} +gpt_deployment=${var.chat_model_deployment} gpt_api_key=$aiFoundryKey gpt_api_version=2024-02-01 @@ -1729,7 +1856,7 @@ AZURE_RESOURCE_GROUP=${azurerm_resource_group.rg.name} AZURE_LOCATION=${var.location} # Multi-Agent Configuration -USE_MULTI_AGENT=true +USE_MULTI_AGENT=${var.enable_multi_agent ? "true" : "false"} AZURE_AI_PROJECT_ENDPOINT=$agentsProjectEndpoint AZURE_AI_AGENT_MODEL_DEPLOYMENT_NAME=${var.chat_model_deployment} @@ -2471,6 +2598,7 @@ aiosignal>=1.3.0 A2A_HOST=${var.a2a_host} A2A_PORT=${var.a2a_port} A2A_LOG_LEVEL=INFO +A2A_DEBUG=true # Base application URL for monitoring BASE_APP_URL=https://${local.web_app_name}.azurewebsites.net diff --git a/terraform-infrastructure/terraform.tfvars b/terraform-infrastructure/terraform.tfvars index 1ed8bf8..381ba1a 100644 --- a/terraform-infrastructure/terraform.tfvars +++ b/terraform-infrastructure/terraform.tfvars @@ -1,4 +1,4 @@ -resource_group_name = "RG-AI-Retail-DemoX0" +resource_group_name = "RG-AI-Retail-DemoX34" location = "eastus2" name_prefix = "zava" From af552a74167a30d98579aa37442121a71a365848 Mon Sep 17 00:00:00 2001 From: brown9804 Date: Mon, 2 Feb 2026 10:16:03 -0600 Subject: [PATCH 2/3] a2a protocol depending on the request --- src/a2a/agent/agent_adapters.py | 6 +- src/app/agents/agent_processor.py | 190 +++++++++++++++-- src/app/agents/deploy_real_agents.py | 305 ++++++++++++++++++++++++--- src/app/agents/quick_verify.py | 90 ++++++-- src/app/templates/index.html | 32 ++- src/chat_app_multi_agent.py | 223 +++++++++++--------- src/requirements.txt | 1 + terraform-infrastructure/main.tf | 17 +- 8 files changed, 695 insertions(+), 169 deletions(-) diff --git a/src/a2a/agent/agent_adapters.py b/src/a2a/agent/agent_adapters.py index 052184c..aa9b53a 100644 --- a/src/a2a/agent/agent_adapters.py +++ b/src/a2a/agent/agent_adapters.py @@ -71,9 +71,9 @@ def _initialize_agent(self) -> None: remote_endpoint = os.getenv("AZURE_AI_AGENT_ENDPOINT") or os.getenv("AZURE_AI_PROJECT_ENDPOINT") # Try remote first if available - if (remote_endpoint and agent_id and - agent_id.startswith("asst_") and - not agent_id.startswith("asst_local_")): + # Real Foundry agent IDs are not guaranteed to start with "asst_". + # Only treat explicit "asst_local_*" IDs as local simulation. + if (remote_endpoint and agent_id and not agent_id.startswith("asst_local_")): try: self._agent_processor = AgentProcessor( agent_id=agent_id, diff --git a/src/app/agents/agent_processor.py b/src/app/agents/agent_processor.py index e8c4558..59a5f72 100644 --- a/src/app/agents/agent_processor.py +++ b/src/app/agents/agent_processor.py @@ -9,6 +9,11 @@ from azure.ai.projects import AIProjectClient # type: ignore from azure.identity import DefaultAzureCredential # type: ignore from services.azure_auth import get_default_credential, get_inference_credential # type: ignore + try: + # Preferred runtime client for threads/messages/runs + from azure.ai.agents import AgentsClient # type: ignore + except Exception: + AgentsClient = None # type: ignore _REMOTE_AVAILABLE = True except Exception: _REMOTE_AVAILABLE = False @@ -113,6 +118,7 @@ def __init__(self, agent_id: str, project_endpoint: str = None): project_endpoint: Optional project endpoint (reads from env if not provided) """ self.agent_id = agent_id + self._runtime_agent_id = agent_id raw_endpoint = ( project_endpoint @@ -139,6 +145,142 @@ def __init__(self, agent_id: str, project_endpoint: str = None): self.project_endpoint = full_project_endpoint self.client = AIProjectClient(endpoint=self.project_endpoint, credential=get_default_credential()) + + # Best-effort: resolve the underlying OpenAI-style assistant id (asst_...) + # when the configured id is a friendly/name-based id. + self._runtime_agent_id = self._maybe_resolve_assistant_id(self._runtime_agent_id) + + # Some azure-ai-projects builds expose only agent-management operations on .agents. + # In that case, use azure-ai-agents AgentsClient for thread/message/run operations. + self._agents_api = None + try: + if ( + hasattr(self.client, "agents") + and hasattr(self.client.agents, "threads") + and hasattr(self.client.agents.threads, "create") + and hasattr(self.client.agents, "messages") + and hasattr(self.client.agents.messages, "create") + and hasattr(self.client.agents, "runs") + and hasattr(self.client.agents.runs, "create_and_process") + ): + self._agents_api = self.client.agents + except Exception: + self._agents_api = None + + if self._agents_api is None: + if AgentsClient is None: + raise ValueError( + "Remote agent support unavailable: this SDK build doesn't expose threads on AIProjectClient.agents " + "and azure-ai-agents is not installed." + ) + # AgentsClient expects the project endpoint (per Microsoft docs snippets). + self._agents_api = AgentsClient(endpoint=self.project_endpoint, credential=get_default_credential()) + + def _maybe_resolve_assistant_id(self, configured_id: str) -> str: + if not configured_id: + return configured_id + # Local simulation stays untouched. + if configured_id.startswith("asst_local_"): + return configured_id + # Already an assistant id. + if configured_id.startswith("asst"): + return configured_id + + try: + agents = getattr(self.client, "agents", None) + if not agents or not hasattr(agents, "list"): + return configured_id + for agent in agents.list(): + agent_id = getattr(agent, "id", None) + agent_name = getattr(agent, "name", None) + if configured_id not in {agent_id, agent_name}: + continue + for attr in ( + "assistant_id", + "assistantId", + "openai_assistant_id", + "openaiAssistantId", + "assistantID", + ): + value = getattr(agent, attr, None) + if isinstance(value, str) and value.startswith("asst"): + return value + # Some SDKs only populate `id` with an assistant id. + if isinstance(agent_id, str) and agent_id.startswith("asst"): + return agent_id + except Exception: + # Best-effort only; keep configured value. + return configured_id + + return configured_id + + @staticmethod + def _get_obj_id(obj: Any) -> str | None: + if obj is None: + return None + # SDK models can be rich objects or MutableMapping + if hasattr(obj, "id"): + return getattr(obj, "id") + if isinstance(obj, dict): + return obj.get("id") + return None + + def _create_thread(self): + agents = self._agents_api + if hasattr(agents, "threads") and hasattr(agents.threads, "create"): + return agents.threads.create() + if hasattr(agents, "create_thread"): + return agents.create_thread() + raise AttributeError("No supported thread creation method on agents client") + + def _delete_thread(self, thread_id: str) -> None: + agents = self._agents_api + if hasattr(agents, "threads") and hasattr(agents.threads, "delete"): + agents.threads.delete(thread_id) + return + if hasattr(agents, "delete_thread"): + agents.delete_thread(thread_id) + return + + def _create_message(self, thread_id: str, role: str, content: str) -> None: + agents = self._agents_api + if hasattr(agents, "messages") and hasattr(agents.messages, "create"): + agents.messages.create(thread_id=thread_id, role=role, content=content) + return + if hasattr(agents, "create_message"): + agents.create_message(thread_id=thread_id, role=role, content=content) + return + raise AttributeError("No supported message creation method on agents client") + + def _run_and_process(self, thread_id: str): + agents = self._agents_api + runtime_id = self._runtime_agent_id + # Preferred (azure-ai-agents style) + if hasattr(agents, "runs") and hasattr(agents.runs, "create_and_process"): + # Different SDK builds use either `agent_id` or `assistant_id`. + try: + return agents.runs.create_and_process(thread_id=thread_id, agent_id=runtime_id) + except TypeError: + return agents.runs.create_and_process(thread_id=thread_id, assistant_id=runtime_id) + # Older helper naming + if hasattr(agents, "create_and_process_run"): + # This helper is typically OpenAI-assistants shaped. + return agents.create_and_process_run(thread_id=thread_id, assistant_id=runtime_id) + # Some clients expose a one-shot convenience + if hasattr(agents, "create_thread_and_process_run"): + try: + return agents.create_thread_and_process_run(agent_id=runtime_id) + except TypeError: + return agents.create_thread_and_process_run(assistant_id=runtime_id) + raise AttributeError("No supported run method on agents client") + + def _list_messages(self, thread_id: str): + agents = self._agents_api + if hasattr(agents, "messages") and hasattr(agents.messages, "list"): + return agents.messages.list(thread_id=thread_id) + if hasattr(agents, "list_messages"): + return agents.list_messages(thread_id=thread_id) + raise AttributeError("No supported message listing method on agents client") def run_conversation_with_text_stream( self, @@ -157,9 +299,13 @@ def run_conversation_with_text_stream( Yields: Chunks of the agent's response """ + thread_id: str | None = None try: # Create a thread for this conversation - thread = self.client.agents.create_thread() + thread = self._create_thread() + thread_id = self._get_obj_id(thread) + if not thread_id: + raise RuntimeError("Agent thread creation returned no id") # Build the message content message_content = user_message @@ -167,30 +313,42 @@ def run_conversation_with_text_stream( message_content = f"Context: {json.dumps(additional_context)}\n\nUser: {user_message}" # Add message to thread - self.client.agents.create_message( - thread_id=thread.id, - role="user", - content=message_content - ) + self._create_message(thread_id=thread_id, role="user", content=message_content) # Run the agent - run = self.client.agents.create_and_process_run( - thread_id=thread.id, - assistant_id=self.agent_id - ) + self._run_and_process(thread_id=thread_id) # Get messages - messages = self.client.agents.list_messages(thread_id=thread.id) + messages = self._list_messages(thread_id=thread_id) # Find the assistant's response for message in messages: if message.role == "assistant": - for content in message.content: - if hasattr(content, 'text'): + # Message content can be a list of blocks or a mapping + contents = getattr(message, "content", None) + if isinstance(message, dict) and contents is None: + contents = message.get("content") + if not contents: + continue + for content in contents: + # SDK content blocks commonly expose .text.value + if hasattr(content, "text") and hasattr(content.text, "value"): yield content.text.value - - # Clean up - self.client.agents.delete_thread(thread.id) + elif isinstance(content, dict): + text = content.get("text") + if isinstance(text, dict) and isinstance(text.get("value"), str): + yield text["value"] + elif isinstance(text, str): + yield text + elif isinstance(content, str): + yield content except Exception as e: yield f"Error communicating with agent: {str(e)}" + finally: + if thread_id: + try: + self._delete_thread(thread_id) + except Exception: + # Best-effort cleanup; ignore failures + pass diff --git a/src/app/agents/deploy_real_agents.py b/src/app/agents/deploy_real_agents.py index 922c8e4..1198848 100644 --- a/src/app/agents/deploy_real_agents.py +++ b/src/app/agents/deploy_real_agents.py @@ -8,6 +8,11 @@ import hashlib from typing import Optional from azure.ai.projects import AIProjectClient +try: + # Prefer this for runtime agent IDs (asst_...) + from azure.ai.agents import AgentsClient # type: ignore +except Exception: + AgentsClient = None # type: ignore from dotenv import load_dotenv @@ -57,6 +62,18 @@ def _resolve_model_name(model: str) -> str: return resolved +def _resolve_agents_client_model(model: str) -> str: + """Resolve model for AgentsClient.create_agent/update_agent. + + The Agents runtime API rejects special/router deployments like "model-router". + Default to a concrete chat model deployment (env override supported). + """ + resolved = _resolve_model_name(model) + if resolved in ("model-router", "model_router"): + return get_env("AZURE_AI_AGENT_MODEL_DEPLOYMENT", "gpt-4o-mini") or "gpt-4o-mini" + return resolved + + def _sanitize_agent_name(name: str) -> str: """Sanitize agent name for API constraints (lowercase, hyphens, <=63 chars).""" import re @@ -148,6 +165,83 @@ def _create_agent(project_client: AIProjectClient, *, model: str, name: str, ins raise AttributeError("No supported agent creation method found in Azure AI Projects SDK") +def _extract_agent_resource_id(agent_obj) -> str | None: + """Return the Azure AI Foundry agent resource id (used for manage/list/delete).""" + if agent_obj is None: + return None + for attr in ("id", "agent_id", "agentId"): + value = getattr(agent_obj, attr, None) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _extract_assistant_id(agent_obj) -> str | None: + """Return the OpenAI-style assistant id (asst_...) when present. + + Some SDK surfaces expose both a Foundry agent resource id and the underlying + assistant id used by the threads/runs API. + """ + if agent_obj is None: + return None + for attr in ( + "assistant_id", + "assistantId", + "openai_assistant_id", + "openaiAssistantId", + "assistantID", + ): + value = getattr(agent_obj, attr, None) + if isinstance(value, str) and value.strip().startswith("asst"): + return value.strip() + # Some SDKs only populate `id` with an assistant id. + value = getattr(agent_obj, "id", None) + if isinstance(value, str) and value.strip().startswith("asst"): + return value.strip() + return None + + +def _extract_runtime_id(agent_obj, env_var: str) -> str: + """Select the id that the runtime (threads/runs) API expects.""" + assistant_id = _extract_assistant_id(agent_obj) + if assistant_id: + return assistant_id + resource_id = _extract_agent_resource_id(agent_obj) + if resource_id: + return resource_id + return f"unknown-{env_var}" + + +def _get_agent_details(project_client: AIProjectClient, resource_id: str): + """Best-effort fetch of full agent details. + + List operations sometimes return a lightweight object without `assistant_id`. + This tries common SDK shapes to retrieve a fuller representation. + """ + if not project_client or not resource_id: + return None + + agents = getattr(project_client, "agents", None) + if agents is None: + return None + + # Newer SDKs + if hasattr(agents, "get_agent"): + try: + return agents.get_agent(agent_id=resource_id) + except Exception: + pass + + # Generic get + if hasattr(agents, "get"): + try: + return agents.get(resource_id) + except Exception: + pass + + return None + + from services.azure_auth import get_default_credential def deploy_agents(): @@ -274,8 +368,164 @@ def deploy_agents(): prior_state = {} deployed_agents = {} + deployed_resource_ids: dict[str, str] = {} statuses = {} + # Preferred path: provision runtime agents via AgentsClient (OpenAI-style asst_* IDs). + # This matches what the threads/runs API expects at runtime. + if AgentsClient is not None: + try: + print("Initializing AgentsClient (runtime threads/runs API)...") + + credential = get_default_credential() + project_name = get_env("AZURE_AI_PROJECT_NAME") + if not project_name: + raise ValueError("Missing required environment variable: AZURE_AI_PROJECT_NAME") + + if project_endpoint and "cognitiveservices.azure.com" in project_endpoint: + print("Converting endpoint domain: cognitiveservices.azure.com -> services.ai.azure.com") + project_endpoint = project_endpoint.replace("cognitiveservices.azure.com", "services.ai.azure.com") + os.environ["AZURE_AI_PROJECT_ENDPOINT"] = project_endpoint + + base_endpoint = project_endpoint.split("/api/")[0].rstrip("/") + full_project_endpoint = f"{base_endpoint}/api/projects/{project_name}" + print(f"Project Endpoint (full): {full_project_endpoint}") + + agents_client = AgentsClient(endpoint=full_project_endpoint, credential=credential) + + print("Fetching existing runtime agents (asst_* IDs)...") + existing_agents = {} + try: + agent_list = list(agents_client.list_agents()) + for a in agent_list: + name = getattr(a, "name", None) + if isinstance(name, str) and name: + existing_agents[name] = a + existing_agents[_sanitize_agent_name(name)] = a + print(f"Found {len(agent_list)} runtime agent(s)") + except Exception as list_err: + print(f"Could not list runtime agents: {list_err}") + agent_list = [] + + for cfg in agents_config: + name = cfg["name"] + env_var = cfg["env_var"] + instr = cfg["instructions"] + sanitized_name = _sanitize_agent_name(name) + instr_hash = _hash_instructions(instr) + prior_hash = prior_state.get(env_var, {}).get("hash") + + existing = existing_agents.get(sanitized_name) or existing_agents.get(name) + if existing: + agent_id = getattr(existing, "id", None) or f"unknown-{env_var}" + + if prior_hash and prior_hash != instr_hash: + print(f"[{env_var}] Updating runtime agent (instructions changed): {sanitized_name}") + try: + updated = agents_client.update_agent( + agent_id, + model=_resolve_agents_client_model(cfg["model"]), + name=sanitized_name, + description=name, + instructions=instr, + ) + agent_id = getattr(updated, "id", None) or agent_id + statuses[env_var] = "updated" + except Exception as ue: + print(f"[{env_var}] Failed to update agent {sanitized_name}: {ue}") + statuses[env_var] = "existing-no-update" + else: + print(f"[{env_var}] Reusing existing runtime agent: {sanitized_name} ({agent_id})") + statuses[env_var] = "existing" + + deployed_agents[env_var] = str(agent_id) + deployed_resource_ids[env_var] = str(agent_id) + continue + + print(f"[{env_var}] Creating new runtime agent: {sanitized_name}") + created = agents_client.create_agent( + model=_resolve_agents_client_model(cfg["model"]), + name=sanitized_name, + description=name, + instructions=instr, + ) + agent_id = getattr(created, "id", None) or f"unknown-{env_var}" + deployed_agents[env_var] = str(agent_id) + deployed_resource_ids[env_var] = str(agent_id) + statuses[env_var] = "created" + print(f"[{env_var}] SUCCESS - Created runtime agent: {agent_id}") + + # Persist state (hash + id) + new_state = {} + for cfg in agents_config: + ev = cfg["env_var"] + new_state[ev] = { + "id": deployed_agents.get(ev), + "resource_id": deployed_resource_ids.get(ev), + "hash": _hash_instructions(cfg["instructions"]), + "status": statuses.get(ev) + } + try: + with open(state_path, "w", encoding="utf-8") as sf: + json.dump(new_state, sf, indent=2) + print(f"[STATE] State file updated: {state_path}") + except Exception as se: + print(f"WARNING: Failed to write state file: {se}") + + # Update src/.env with runtime agent IDs + env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..', 'src', '.env')) + if os.path.exists(env_path): + try: + import re + + with open(env_path, 'r', encoding='utf-8') as f: + content = f.read() + + content = content.replace("cognitiveservices.azure.com", "services.ai.azure.com") + + for var, aid in deployed_agents.items(): + pattern = rf'^{re.escape(var)}=.*$' + replacement = f'{var}={aid}' + if re.search(pattern, content, flags=re.MULTILINE): + content = re.sub(pattern, replacement, content, flags=re.MULTILINE) + else: + if not content.endswith("\n"): + content += "\n" + content += f"{replacement}\n" + + with open(env_path, 'w', encoding='utf-8') as f: + f.write(content) + + print(f"Updated .env with agent IDs: {env_path}") + print("Agent IDs written:") + for var, aid in deployed_agents.items(): + print(f" {var}: {aid}") + except Exception as ee: + print(f"WARNING: Failed to update .env: {ee}") + else: + print(f"INFO: .env file not found for agent ID propagation: {env_path}") + + print("\n" + "=" * 70) + print("DEPLOYMENT SUMMARY") + print("=" * 70) + for k, v in deployed_agents.items(): + status = statuses.get(k, "unknown") + print(f" {k}: {v} [{status}]") + + payload = {"agents": deployed_agents, "statuses": statuses} + print("===AGENTS_JSON_START===") + print(json.dumps(payload, indent=2)) + print("===AGENTS_JSON_END===") + + return deployed_agents + + except Exception as e: + print( + "WARNING: AgentsClient provisioning failed; falling back to AIProjectClient path: " + f"{e}. If this is '(unsupported_model)', ensure your AI Foundry has a real chat model " + "deployment (e.g., 'gpt-4o-mini') and set AZURE_AI_AGENT_MODEL_DEPLOYMENT accordingly." + ) + try: print("Initializing Azure AI Project Client...") @@ -360,6 +610,7 @@ def deploy_agents(): print(f"[{env_var}] No project client - using fallback ID") fallback_id = f"asst_local_{env_var}" deployed_agents[env_var] = fallback_id + deployed_resource_ids[env_var] = fallback_id statuses[env_var] = "fallback-no-client" continue @@ -367,12 +618,12 @@ def deploy_agents(): existing_agent = existing_agents.get(name) or existing_agents.get(sanitized_name) if existing_agent: agent_obj = existing_agent - agent_id = ( - getattr(agent_obj, "id", None) - or getattr(agent_obj, "agent_id", None) - or getattr(agent_obj, "agentId", None) - or f"unknown-{env_var}" - ) + resource_id = _extract_agent_resource_id(agent_obj) or f"unknown-{env_var}" + + # Try to fetch full details to discover assistant_id (asst_...). + detailed = _get_agent_details(project_client, resource_id) + runtime_id = _extract_runtime_id(detailed or agent_obj, env_var) + deployed_resource_ids[env_var] = resource_id # Attempt update if instructions changed if prior_hash and prior_hash != instr_hash: @@ -380,9 +631,9 @@ def deploy_agents(): try: try: if hasattr(project_client.agents, "delete_agent"): - project_client.agents.delete_agent(agent_id=agent_id) + project_client.agents.delete_agent(agent_id=resource_id) else: - project_client.agents.delete(agent_id) + project_client.agents.delete(resource_id) except Exception: pass @@ -392,22 +643,21 @@ def deploy_agents(): name=sanitized_name, instructions=instr, ) - agent_id = ( - getattr(new_agent, "id", None) - or getattr(new_agent, "agent_id", None) - or getattr(new_agent, "agentId", None) - or agent_id - ) + resource_id = _extract_agent_resource_id(new_agent) or resource_id + + detailed = _get_agent_details(project_client, resource_id) + runtime_id = _extract_runtime_id(detailed or new_agent, env_var) + deployed_resource_ids[env_var] = resource_id statuses[env_var] = "recreated" - print(f"[{env_var}] Successfully recreated: {agent_id}") + print(f"[{env_var}] Successfully recreated: runtime_id={runtime_id} resource_id={resource_id}") except Exception as ue: print(f"[{env_var}] Failed to recreate {name}: {ue}") statuses[env_var] = "existing-no-update" - deployed_agents[env_var] = agent_id + deployed_agents[env_var] = runtime_id else: - print(f"[{env_var}] Reusing existing agent: {name} ({agent_id})") - deployed_agents[env_var] = agent_id + print(f"[{env_var}] Reusing existing agent: {name} (runtime_id={runtime_id} resource_id={resource_id})") + deployed_agents[env_var] = runtime_id statuses[env_var] = "existing" continue @@ -420,15 +670,14 @@ def deploy_agents(): name=sanitized_name, instructions=instr, ) - agent_id = ( - getattr(agent, "id", None) - or getattr(agent, "agent_id", None) - or getattr(agent, "agentId", None) - or f"unknown-{env_var}" - ) - deployed_agents[env_var] = agent_id + resource_id = _extract_agent_resource_id(agent) or f"unknown-{env_var}" + + detailed = _get_agent_details(project_client, resource_id) + runtime_id = _extract_runtime_id(detailed or agent, env_var) + deployed_agents[env_var] = runtime_id + deployed_resource_ids[env_var] = resource_id statuses[env_var] = "created" - print(f"[{env_var}] SUCCESS - Created agent: {agent_id}") + print(f"[{env_var}] SUCCESS - Created agent: runtime_id={runtime_id} resource_id={resource_id}") except Exception as ce: print(f"[{env_var}] FAILED to create {name}: {ce}") import traceback @@ -437,6 +686,7 @@ def deploy_agents(): # Use fallback local ID fallback_id = f"asst_local_{env_var}" deployed_agents[env_var] = fallback_id + deployed_resource_ids[env_var] = fallback_id statuses[env_var] = "fallback-creation-failed" print(f"[{env_var}] Using fallback local simulation: {fallback_id}") @@ -446,6 +696,9 @@ def deploy_agents(): ev = cfg["env_var"] new_state[ev] = { "id": deployed_agents.get(ev), + # Best-effort: keep the Foundry agent resource id for management/verification. + # If we only have a runtime id, this may be the same value. + "resource_id": deployed_resource_ids.get(ev), "hash": _hash_instructions(cfg["instructions"]), "status": statuses.get(ev) } diff --git a/src/app/agents/quick_verify.py b/src/app/agents/quick_verify.py index 23bd0f3..32837d7 100644 --- a/src/app/agents/quick_verify.py +++ b/src/app/agents/quick_verify.py @@ -11,6 +11,35 @@ load_dotenv() + +def _extract_agent_resource_id(agent_obj) -> str | None: + if agent_obj is None: + return None + for attr in ("id", "agent_id", "agentId"): + value = getattr(agent_obj, attr, None) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _extract_assistant_id(agent_obj) -> str | None: + if agent_obj is None: + return None + for attr in ( + "assistant_id", + "assistantId", + "openai_assistant_id", + "openaiAssistantId", + "assistantID", + ): + value = getattr(agent_obj, attr, None) + if isinstance(value, str) and value.strip().startswith("asst"): + return value.strip() + value = getattr(agent_obj, "id", None) + if isinstance(value, str) and value.strip().startswith("asst"): + return value.strip() + return None + def verify_agents(): """Verify agents are accessible via the correct endpoint""" @@ -68,7 +97,9 @@ def verify_agents(): print(f"Expected agents (from state file): {len(expected_agents)}") for name, data in expected_agents.items(): - print(f" - {name}: {data.get('id')} ({data.get('status')})") + rid = data.get("resource_id") + aid = data.get("id") + print(f" - {name}: runtime_id={aid} resource_id={rid} ({data.get('status')})") print() # Try to connect and list agents @@ -96,34 +127,63 @@ def verify_agents(): # Display found agents for agent in agents_list: - agent_id = getattr(agent, 'id', 'unknown') + agent_resource_id = _extract_agent_resource_id(agent) or 'unknown' + assistant_id = _extract_assistant_id(agent) agent_name = getattr(agent, 'name', 'unnamed') print(f" [OK] {agent_name}") - print(f" ID: {agent_id}") + print(f" resource_id: {agent_resource_id}") + if assistant_id: + print(f" assistant_id: {assistant_id}") # Compare with expected - found_ids = set(getattr(a, 'id', '') for a in agents_list) - expected_ids = set(d.get('id', '') for d in expected_agents.values()) + found_resource_ids = set(_extract_agent_resource_id(a) or '' for a in agents_list) + found_assistant_ids = set(_extract_assistant_id(a) or '' for a in agents_list) + + expected_runtime_ids = set(str(d.get('id', '') or '') for d in expected_agents.values()) + expected_resource_ids = set(str(d.get('resource_id', '') or '') for d in expected_agents.values()) + + found_resource_ids.discard('') + found_assistant_ids.discard('') + expected_runtime_ids.discard('') + expected_resource_ids.discard('') print("\nComparison:") - print(f" Expected: {len(expected_ids)} agents") - print(f" Found: {len(found_ids)} agents") + print(f" Expected runtime IDs: {len(expected_runtime_ids)}") + print(f" Expected resource IDs: {len(expected_resource_ids)}") + print(f" Found assistant IDs: {len(found_assistant_ids)}") + print(f" Found resource IDs: {len(found_resource_ids)}") - missing = expected_ids - found_ids - extra = found_ids - expected_ids + # An expected runtime id (often asst_*) should match found assistant ids. + missing_runtime = expected_runtime_ids - found_assistant_ids + # An expected resource id should match found resource ids. + missing_resource = expected_resource_ids - found_resource_ids + + # Extra reporting (best-effort) + extra_assistants = found_assistant_ids - expected_runtime_ids + extra_resources = found_resource_ids - expected_resource_ids - if missing: - print(f"\nWARNING: Missing agents: {missing}") + if missing_runtime: + print(f"\nWARNING: Missing assistant/runtime IDs: {missing_runtime}") + + if missing_resource: + print(f"WARNING: Missing agent resource IDs: {missing_resource}") - if extra: - print(f"\n Extra agents found: {extra}") + if extra_assistants: + print(f"\n Extra assistant IDs found: {extra_assistants}") + + if extra_resources: + print(f" Extra resource IDs found: {extra_resources}") - if not missing and not extra: + if not missing_runtime and not missing_resource: print("\n[SUCCESS] All expected agents are present!") return True else: print("\nWARNING: Agent count mismatch") - return len(found_ids) >= len(expected_ids) + # Pass if we've found at least all expected runtime IDs or all expected resource IDs. + return ( + (expected_runtime_ids.issubset(found_assistant_ids) if expected_runtime_ids else True) + and (expected_resource_ids.issubset(found_resource_ids) if expected_resource_ids else True) + ) except Exception as e: print(f"\nERROR: Failed to verify agents: {e}") diff --git a/src/app/templates/index.html b/src/app/templates/index.html index b7e2a8d..62841e2 100644 --- a/src/app/templates/index.html +++ b/src/app/templates/index.html @@ -14,16 +14,17 @@ body { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); - height: 100vh; + height: 100dvh; display: flex; justify-content: center; align-items: center; + padding: 16px; } .chat-container { width: 90%; max-width: 800px; - height: 90vh; + height: min(90dvh, 900px); background: white; border-radius: 20px; box-shadow: 0 20px 60px rgba(0,0,0,0.3); @@ -35,13 +36,13 @@ .chat-header { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; - padding: 20px; + padding: 18px 20px; text-align: center; } .chat-header h1 { font-size: 24px; - margin-bottom: 5px; + margin-bottom: 0; } .chat-header p { @@ -88,7 +89,7 @@ } .message-content { - max-width: 70%; + max-width: 78%; padding: 12px 16px; border-radius: 18px; word-wrap: break-word; @@ -105,6 +106,26 @@ box-shadow: 0 2px 5px rgba(0,0,0,0.1); line-height: 1.6; } + + @media (max-width: 640px) { + body { + padding: 10px; + } + .chat-container { + width: 100%; + height: 100dvh; + border-radius: 14px; + } + .message-content { + max-width: 92%; + } + .chat-input-container { + padding: 14px; + } + #sendButton { + padding: 12px 18px; + } + } /* Markdown formatting */ .message-content h1, .message-content h2, .message-content h3 { @@ -256,7 +277,6 @@

🏠 Zava AI Shopping Assistant

-

Your DIY Project Helper

diff --git a/src/chat_app_multi_agent.py b/src/chat_app_multi_agent.py index b217475..38628da 100644 --- a/src/chat_app_multi_agent.py +++ b/src/chat_app_multi_agent.py @@ -163,6 +163,46 @@ def _get_env_any(*names: str) -> str | None: return None +def _plan_handoff_sequence(domain: str, user_message: str) -> list[str]: + """Plan a minimal agent handoff sequence. + + This is the missing piece for "agents talking to each other" in the chat UI. + The UI still sends a single user message, but the server can delegate to + multiple specialist agents and pass context forward. + """ + msg = (user_message or "").lower() + + # Always start with the classified domain. + sequence: list[str] = [domain] + + # Product discovery and comparison are best handled by product management. + if any(k in msg for k in ["recommend", "recommendation", "find", "search", "compare", "best", "popular", "spec", "specification"]): + if "product_management" not in sequence: + sequence.append("product_management") + + # Design requests often benefit from product discovery afterwards. + if any(k in msg for k in ["design", "interior", "room", "layout", "style", "color", "paint", "decor", "furniture"]): + if "interior_design" not in sequence: + sequence.append("interior_design") + if "product_management" not in sequence: + sequence.append("product_management") + + # Purchase intent: check inventory then cart. + if any(k in msg for k in ["buy", "purchase", "checkout", "order", "add to cart", "add this", "add it", "remove from cart"]): + if "inventory" not in sequence: + sequence.append("inventory") + if "cart_management" not in sequence: + sequence.append("cart_management") + + # Discounts: consult loyalty. + if any(k in msg for k in ["discount", "loyalty", "points", "member", "reward", "promo", "promotion", "deal"]): + if "customer_loyalty" not in sequence: + sequence.append("customer_loyalty") + + # Cap to avoid runaway chains. + return sequence[:4] + + def get_agent_processor(domain: str): """Return a processor (remote if available, else local) for the domain.""" agent_id_map = { @@ -170,7 +210,8 @@ def get_agent_processor(domain: str): "inventory": _get_env_any("inventory_agent", "AGENT_INVENTORY_AGENT_ID"), "customer_loyalty": _get_env_any("customer_loyalty", "AGENT_CUSTOMER_LOYALTY_ID"), "cart_management": _get_env_any("cart_manager", "AGENT_CART_MANAGER_ID"), - "cora": _get_env_any("cora", "AGENT_CORA_ID") + "cora": _get_env_any("cora", "AGENT_CORA_ID"), + "product_management": _get_env_any("product_management", "AGENT_PRODUCT_MANAGEMENT_ID"), } agent_id = agent_id_map.get(domain) @@ -180,7 +221,9 @@ def get_agent_processor(domain: str): # Prefer remote only if endpoint exists and agent id looks like a remote id remote_endpoint = os.getenv("AZURE_AI_AGENT_ENDPOINT") or os.getenv("AZURE_AI_PROJECT_ENDPOINT") - if remote_endpoint and agent_id.startswith("asst_") and not agent_id.startswith("asst_local_"): + # Real Foundry agent IDs are not guaranteed to start with "asst_" (some SDKs/services use + # a name-based ID). Treat only explicit "asst_local_*" as local simulation. + if remote_endpoint and not agent_id.startswith("asst_local_"): try: return AgentProcessor(agent_id=agent_id, project_endpoint=remote_endpoint) except Exception as e: @@ -257,119 +300,97 @@ async def websocket_endpoint(websocket: WebSocket): domain = classification["domain"] logger.info(f"Classified as domain: {domain} (confidence: {classification['confidence']})") - - # Step 2: Get appropriate agent - agent_processor = get_agent_processor(domain) - - if not agent_processor: - # Instead of reverting to single-agent (which may lack config), - # emit a message explaining the missing processor. - warning = "Multi-agent processor unavailable; please verify configuration." - await websocket.send_text(fast_json_dumps({ - "answer": warning, - "agent": "unassigned", - "cart": persistent_cart - })) - conversation_history.append({"role": "assistant", "content": warning}) - continue - - # Step 3: Prepare context for agent - additional_context = { - "cart": persistent_cart, - "discount": customer_discount - } - - if domain == "cart_management": - # Cart manager needs full history - additional_context["conversation_history"] = conversation_history - - # Step 4: Call agent and stream response - response_text = "" - for chunk in agent_processor.run_conversation_with_text_stream( - user_message=user_message, - conversation_history=conversation_history[-5:], # Last 5 messages - additional_context=additional_context - ): - response_text += chunk - - # Step 5: Parse response and flatten to a human answer - parsed_json: Dict[str, Any] | None = None - try: - parsed_json = json.loads(response_text) - except Exception: - # Try secondary parse if nested JSON inside 'answer' - if response_text.strip().startswith('{'): - try: - parsed_json = json.loads(response_text.strip()) - except Exception: - parsed_json = None - - if parsed_json: - # Some prompts return a JSON object as a *string* inside the - # outer {"answer": "..."} wrapper (local agents). Lift it. - if isinstance(parsed_json.get("answer"), str): - inner_text = parsed_json["answer"].strip() - if inner_text.startswith("{") and '"answer"' in inner_text: - try: - inner_obj = json.loads(inner_text) - if isinstance(inner_obj, dict): - # Prefer inner values for these well-known fields. - for key in ("answer", "products", "image_output", "cart", "discount_percentage", "image_url"): - if key in inner_obj and inner_obj[key] is not None: - parsed_json[key] = inner_obj[key] - except Exception: - pass - - # Some older prompts embed structured fields inside the - # answer string (e.g., "answer: ...\nproducts: []"). Normalize. - legacy = _parse_legacy_kv_block(parsed_json["answer"]) - if legacy: - parsed_json.update({k: v for k, v in legacy.items() if v is not None}) - - if "cart" in parsed_json and isinstance(parsed_json["cart"], list): - persistent_cart = parsed_json["cart"] - if "discount_percentage" in parsed_json and parsed_json["discount_percentage"]: - customer_discount = parsed_json["discount_percentage"] - flattened = _flatten_response_json(parsed_json) + + # Step 2: Plan handoffs and execute sequence + sequence = _plan_handoff_sequence(domain, user_message) + logger.info(f"Multi-agent handoff sequence: {sequence}") + + agent_outputs: list[dict[str, Any]] = [] + last_parsed_json: Dict[str, Any] | None = None + last_domain = domain + + for step_idx, step_domain in enumerate(sequence): + agent_processor = get_agent_processor(step_domain) + if not agent_processor: + raise RuntimeError(f"No agent processor available for domain={step_domain}") + + additional_context = { + "cart": persistent_cart, + "discount": customer_discount, + "handoff": { + "step": step_idx + 1, + "sequence": sequence, + "previous_outputs": agent_outputs[-3:], + }, + } + if step_domain == "cart_management": + additional_context["conversation_history"] = conversation_history + + response_text = "" + for chunk in agent_processor.run_conversation_with_text_stream( + user_message=user_message, + conversation_history=conversation_history[-5:], + additional_context=additional_context, + ): + response_text += chunk + + parsed_json: Dict[str, Any] | None = None + try: + parsed_json = json.loads(response_text) + except Exception: + parsed_json = None + + if parsed_json and isinstance(parsed_json, dict): + # Lift nested JSON if present. + if isinstance(parsed_json.get("answer"), str): + legacy = _parse_legacy_kv_block(parsed_json["answer"]) + if legacy: + parsed_json.update({k: v for k, v in legacy.items() if v is not None}) + if "cart" in parsed_json and isinstance(parsed_json["cart"], list): + persistent_cart = parsed_json["cart"] + if "discount_percentage" in parsed_json and parsed_json["discount_percentage"]: + customer_discount = parsed_json["discount_percentage"] + last_parsed_json = parsed_json + + agent_outputs.append({ + "agent": step_domain, + "raw": response_text, + "parsed": parsed_json, + }) + last_domain = step_domain + + # Step 3: Build final response from last agent result + if last_parsed_json: + flattened = _flatten_response_json(last_parsed_json) answer_text = _extract_plain_answer(flattened) - - # Extract image URL if present - image_url = parsed_json.get("image_url") + image_url = last_parsed_json.get("image_url") else: - answer_text = _extract_plain_answer(response_text) + answer_text = _extract_plain_answer(agent_outputs[-1]["raw"] if agent_outputs else "") image_url = None - # Send natural language answer with metadata response_data = { "answer": answer_text, - "agent": domain, + "agent": last_domain, "cart": persistent_cart, - "discount": customer_discount + "discount": customer_discount, } # Forward structured fields if present. - if parsed_json: - if "products" in parsed_json: - response_data["products"] = parsed_json.get("products") - if "image_output" in parsed_json: - response_data["image_output"] = parsed_json.get("image_output") - - # If the agent returned diagnostic error details, forward them to the UI. - if parsed_json: - if isinstance(parsed_json.get("error"), str): - response_data["error"] = parsed_json["error"] - if parsed_json.get("error_id") is not None: - response_data["error_id"] = parsed_json.get("error_id") - - # Include image URL if available + if last_parsed_json: + if "products" in last_parsed_json: + response_data["products"] = last_parsed_json.get("products") + if "image_output" in last_parsed_json: + response_data["image_output"] = last_parsed_json.get("image_output") + if isinstance(last_parsed_json.get("error"), str): + response_data["error"] = last_parsed_json["error"] + if last_parsed_json.get("error_id") is not None: + response_data["error_id"] = last_parsed_json.get("error_id") if image_url: response_data["image_url"] = image_url - - await websocket.send_text(fast_json_dumps(response_data)) + await websocket.send_text(fast_json_dumps(response_data)) conversation_history.append({"role": "assistant", "content": answer_text}) - - logger.info(f"Response sent successfully from {domain} agent") + logger.info(f"Response sent successfully (final agent={last_domain})") else: # === SINGLE-AGENT MODE (Legacy) === diff --git a/src/requirements.txt b/src/requirements.txt index 0ecf281..e6c378e 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -6,6 +6,7 @@ azure-identity==1.19.0 azure-search-documents==11.6.0 azure-ai-inference==1.0.0b6 azure-ai-projects>=2.0.0b1 +azure-ai-agents>=1.1.0 azure-storage-blob==12.19.0 openai==1.54.0 fastapi==0.115.0 diff --git a/terraform-infrastructure/main.tf b/terraform-infrastructure/main.tf index 8c9ac9e..c04c393 100644 --- a/terraform-infrastructure/main.tf +++ b/terraform-infrastructure/main.tf @@ -514,6 +514,11 @@ resource "azurerm_container_app" "app" { secret_name = "storage-connection-string" } + env { + name = "A2A_DEBUG" + value = "true" + } + # Cosmos auth: use AAD when local auth disabled dynamic "env" { for_each = var.enable_cosmos_local_auth ? [1] : [] @@ -551,6 +556,10 @@ resource "azurerm_container_app" "app" { name = "AGENT_CART_MANAGER_ID" value = data.external.agents_state.result["agent_cart_manager_id"] } + env { + name = "AGENT_PRODUCT_MANAGEMENT_ID" + value = data.external.agents_state.result["agent_product_management_id"] + } env { name = "cora" value = data.external.agents_state.result["agent_cora_id"] @@ -571,6 +580,10 @@ resource "azurerm_container_app" "app" { name = "cart_manager" value = data.external.agents_state.result["agent_cart_manager_id"] } + env { + name = "product_management" + value = data.external.agents_state.result["agent_product_management_id"] + } } min_replicas = 1 @@ -1508,7 +1521,7 @@ resource "null_resource" "ai_model_deployments" { --model-name "gpt-4o-mini" ` --model-version "2024-07-18" ` --model-format "OpenAI" ` - --sku-capacity 10 ` + --sku-capacity 1 ` --sku-name "GlobalStandard" if ($LASTEXITCODE -eq 0) { @@ -2129,7 +2142,7 @@ resource "null_resource" "deploy_multi_agents" { } Write-Host "Installing required Azure SDK packages..." - & $pythonCmd -m pip install -q --pre 'azure-ai-projects>=2.0.0b1' azure-identity python-dotenv + & $pythonCmd -m pip install -q --pre 'azure-ai-projects>=2.0.0b1' 'azure-ai-agents>=1.1.0' azure-identity python-dotenv if ($LASTEXITCODE -ne 0) { Write-Host "ERROR: Failed to install required packages" From 48fc8db8fad63a6ef112e88c397dc72a1c549098 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Mon, 2 Feb 2026 16:17:07 +0000 Subject: [PATCH 3/3] Update visitor count --- README.md | 4 ++-- TROUBLESHOOTING.md | 4 ++-- terraform-infrastructure/README.md | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 836d3d4..83a6270 100644 --- a/README.md +++ b/README.md @@ -213,7 +213,7 @@ graph TD
- Total views -

Refresh Date: 2026-01-30

+ Total views +

Refresh Date: 2026-02-02

diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 6533b13..80d611f 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -347,7 +347,7 @@ terraform apply
- Total views -

Refresh Date: 2026-01-30

+ Total views +

Refresh Date: 2026-02-02

diff --git a/terraform-infrastructure/README.md b/terraform-infrastructure/README.md index 9cb3228..9b19cf5 100644 --- a/terraform-infrastructure/README.md +++ b/terraform-infrastructure/README.md @@ -119,7 +119,7 @@ graph TD;
- Total views -

Refresh Date: 2026-01-30

+ Total views +

Refresh Date: 2026-02-02