From 7b70f7b09f89aa264cb27169bf6cbfe2afd846ba Mon Sep 17 00:00:00 2001 From: brown9804 Date: Wed, 3 Dec 2025 09:52:59 -0600 Subject: [PATCH 1/2] patch ACR to web app with multiagent --- .gitignore | 1 + src/app/agents/agents_state.json | 27 - src/app/agents/deploy_real_agents.py | 5 +- src/app/templates/index.html | 14 +- terraform-infrastructure/main.tf | 534 +++++++++++++----- terraform-infrastructure/read_agents_state.py | 3 +- 6 files changed, 421 insertions(+), 163 deletions(-) delete mode 100644 src/app/agents/agents_state.json diff --git a/.gitignore b/.gitignore index 341f4d3..9d26a5d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,7 @@ LogFiles deploy.log __pycache__ *.log +*agents_state.json # .tfstate files *.tfstate diff --git a/src/app/agents/agents_state.json b/src/app/agents/agents_state.json deleted file mode 100644 index dc0c3f4..0000000 --- a/src/app/agents/agents_state.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "cora": { - "id": "asst_PlBscdXbszB9KSwdKsY4O2ft", - "hash": "ec1323afb9692d92de14373b05eb60026bb3738f5fb3303976f74d5c40536092", - "status": "existing" - }, - "interior_designer": { - "id": "asst_RbfreBvHmlR1JVPvUjfMRGvX", - "hash": "0fbee2d10b87eee5a9d0bd87a9d7af60f327c9093edd47d8e6683505db5aabba", - "status": "existing" - }, - "inventory_agent": { - "id": "asst_nZQaxH8eRuZywig5umn4OHfA", - "hash": "87deafceb6532b78ef075dd0e084909c4177286a0808cb9755c9a289f0076ba3", - "status": "existing" - }, - "customer_loyalty": { - "id": "asst_DMnuT808C6ydiTwmgWLnwXz8", - "hash": "1e0ffd8c5b4dac8cd247b90966b513f89b5c1c366edd476ca164d27b86dc276c", - "status": "existing" - }, - "cart_manager": { - "id": "asst_2PzHUwpLAFoT2KIlGd45xrg3", - "hash": "bd3985311b2b5e0d4d88ae4583c5c3b36113d6ddbbee67e6d36924f34292ccab", - "status": "existing" - } -} \ No newline at end of file diff --git a/src/app/agents/deploy_real_agents.py b/src/app/agents/deploy_real_agents.py index 8a30ed3..73d8781 100644 --- a/src/app/agents/deploy_real_agents.py +++ b/src/app/agents/deploy_real_agents.py @@ -99,7 +99,10 @@ def deploy_agents(): ] # Load prior state (instruction hashes) if present - state_path = os.path.join(os.path.dirname(__file__), "agents_state.json") + # Write to terraform temp directory instead of src/app/agents + terraform_dir = os.path.join(os.path.dirname(__file__), "..", "..", "..", "terraform-infrastructure") + state_path = os.path.join(terraform_dir, ".terraform", "agents_state.json") + os.makedirs(os.path.dirname(state_path), exist_ok=True) prior_state = {} if os.path.exists(state_path): try: diff --git a/src/app/templates/index.html b/src/app/templates/index.html index f5bb3a0..b7e2a8d 100644 --- a/src/app/templates/index.html +++ b/src/app/templates/index.html @@ -314,14 +314,14 @@

🏠 Zava AI Shopping Assistant

function getAgentDisplayName(agent) { const agentNames = { - 'cora': 'Cora', - 'interior_design': 'Design Specialist', - 'inventory': 'Inventory Manager', - 'customer_loyalty': 'Rewards Specialist', - 'cart': 'Cart Manager', - 'single': 'Assistant' + 'cora': '🏠 Cora', + 'interior_design': '🎨 Design Specialist', + 'inventory': '📦 Inventory Manager', + 'customer_loyalty': '🎁 Rewards Specialist', + 'cart': '🛒 Cart Manager', + 'single': '🤖 Assistant' }; - return agentNames[agent] || 'Assistant'; + return agentNames[agent] || '🤖 Assistant'; } function addMessage(text, type, agentName = null, imageUrl = null) { diff --git a/terraform-infrastructure/main.tf b/terraform-infrastructure/main.tf index 67f46ae..68ea357 100644 --- a/terraform-infrastructure/main.tf +++ b/terraform-infrastructure/main.tf @@ -94,13 +94,10 @@ resource "azapi_resource" "storage" { properties = { accessTier = "Hot" allowSharedKeyAccess = true - defaultToOAuthAuthentication = false - allowBlobPublicAccess = false minimumTlsVersion = "TLS1_2" supportsHttpsTrafficOnly = true } }) - identity { type = "SystemAssigned" } @@ -230,7 +227,6 @@ resource "azurerm_log_analytics_workspace" "law" { resource_group_name = azurerm_resource_group.rg.name sku = "PerGB2018" retention_in_days = 90 - daily_quota_gb = 1 depends_on = [ azurerm_resource_group.rg @@ -283,7 +279,10 @@ resource "azurerm_container_registry_webhook" "webhook" { "Content-Type" = "application/json" } - depends_on = [azurerm_container_registry.acr] + depends_on = [ + azurerm_container_registry.acr, + azurerm_linux_web_app.app + ] } resource "azurerm_service_plan" "appserviceplan" { @@ -306,15 +305,19 @@ resource "azurerm_linux_web_app" "app" { } site_config { - always_on = false + always_on = true http2_enabled = true minimum_tls_version = "1.2" + # Ensure App Service waits for container readiness + health_check_path = "/health" + health_check_eviction_time_in_min = 10 application_stack { - docker_image_name = "zava-chat-app:latest" - docker_registry_url = "https://${local.registry_name}.azurecr.io" - docker_registry_username = azurerm_container_registry.acr.admin_username - docker_registry_password = azurerm_container_registry.acr.admin_password + docker_image_name = "zava-chat-app:latest" + # Use full https URL for docker registry + docker_registry_url = "https://${local.registry_name}.azurecr.io" } + # Use system-assigned managed identity for ACR pulls (AcrPull role assignment granted below) + container_registry_use_managed_identity = true } app_settings = { @@ -348,13 +351,14 @@ resource "azurerm_linux_web_app" "app" { COSMOS_DB_KEY = var.enable_cosmos_local_auth ? "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/cosmos-primary-key)" : "AAD_AUTH" STORAGE_CONNECTION_STRING = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/storage-connection-string)" - # Multi-Agent Configuration - Real agent IDs from deployment + # Multi-Agent Configuration - Agent IDs from Key Vault USE_MULTI_AGENT = var.enable_multi_agent ? "true" : "false" - cora = try(jsondecode(file("${path.module}/agent_ids.json")).cora, "asst_local_cora") - interior_designer = try(jsondecode(file("${path.module}/agent_ids.json")).interior_designer, "asst_local_interior_design") - inventory_agent = try(jsondecode(file("${path.module}/agent_ids.json")).inventory_agent, "asst_local_inventory") - customer_loyalty = try(jsondecode(file("${path.module}/agent_ids.json")).customer_loyalty, "asst_local_customer_loyalty") - cart_manager = try(jsondecode(file("${path.module}/agent_ids.json")).cart_manager, "asst_local_cart_manager") + AZURE_AI_AGENT_ENDPOINT = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/agent-endpoint)" + AGENT_CORA_ID = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/agent-cora-id)" + AGENT_INTERIOR_DESIGNER_ID = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/agent-interior-designer-id)" + AGENT_INVENTORY_AGENT_ID = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/agent-inventory-agent-id)" + AGENT_CUSTOMER_LOYALTY_ID = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/agent-customer-loyalty-id)" + AGENT_CART_MANAGER_ID = "@Microsoft.KeyVault(SecretUri=${azurerm_key_vault.kv.vault_uri}secrets/agent-cart-manager-id)" CUSTOMER_ID = "CUST001" } @@ -364,6 +368,17 @@ resource "azurerm_linux_web_app" "app" { ] } +# Grant AcrPull role to Web App managed identity so it can pull private images without admin credentials +resource "azurerm_role_assignment" "webapp_acr_pull" { + scope = azurerm_container_registry.acr.id + role_definition_name = "AcrPull" + principal_id = azurerm_linux_web_app.app.identity[0].principal_id + depends_on = [ + azurerm_linux_web_app.app, + azurerm_container_registry.acr + ] +} + # Key Vault for central secret management resource "azurerm_key_vault" "kv" { name = local.key_vault_name @@ -416,25 +431,21 @@ resource "azurerm_key_vault_secret" "search_admin_key" { depends_on = [azurerm_key_vault.kv] } +# Fetch storage keys unconditionally +data "azapi_resource_action" "storage_keys_unconditional" { + type = "Microsoft.Storage/storageAccounts@2023-01-01" + resource_id = azapi_resource.storage.id + action = "listKeys" + response_export_values = ["keys"] + body = jsonencode({}) + depends_on = [azapi_resource.storage] +} + resource "azurerm_key_vault_secret" "storage_connection_string" { name = "storage-connection-string" - value = azapi_resource.storage.id != "" ? trimspace(chomp(join("", []))) : "placeholder" # placeholder; will be overridden below via provisioner + value = "DefaultEndpointsProtocol=https;AccountName=${local.storage_account};AccountKey=${jsondecode(data.azapi_resource_action.storage_keys_unconditional.output).keys[0].value};EndpointSuffix=core.windows.net" key_vault_id = azurerm_key_vault.kv.id - depends_on = [azurerm_key_vault.kv] - lifecycle { ignore_changes = [value] } -} - -resource "null_resource" "update_storage_connection_secret" { - depends_on = [azurerm_key_vault_secret.storage_connection_string, azapi_resource.storage] - provisioner "local-exec" { - command = <<-EOT - Write-Host "Updating storage-connection-string secret value..." - $conn = az storage account show-connection-string --resource-group ${azurerm_resource_group.rg.name} --name ${local.storage_account} --query connectionString -o tsv - az keyvault secret set --vault-name ${azurerm_key_vault.kv.name} --name storage-connection-string --value $conn | Out-Null - Write-Host "[OK] storage-connection-string secret updated" - EOT - interpreter = ["PowerShell", "-Command"] - } + depends_on = [azurerm_key_vault.kv, data.azapi_resource_action.storage_keys_unconditional] } resource "azurerm_key_vault_secret" "cosmos_primary_key" { @@ -451,6 +462,50 @@ data "external" "agents_state" { depends_on = [null_resource.deploy_multi_agents] } +# Store agent IDs in Key Vault +resource "azurerm_key_vault_secret" "agent_cora_id" { + name = "agent-cora-id" + value = data.external.agents_state.result["agent_cora_id"] + key_vault_id = azurerm_key_vault.kv.id + depends_on = [azurerm_key_vault.kv, data.external.agents_state] +} + +resource "azurerm_key_vault_secret" "agent_interior_designer_id" { + name = "agent-interior-designer-id" + value = data.external.agents_state.result["agent_interior_designer_id"] + key_vault_id = azurerm_key_vault.kv.id + depends_on = [azurerm_key_vault.kv, data.external.agents_state] +} + +resource "azurerm_key_vault_secret" "agent_inventory_agent_id" { + name = "agent-inventory-agent-id" + value = data.external.agents_state.result["agent_inventory_agent_id"] + key_vault_id = azurerm_key_vault.kv.id + depends_on = [azurerm_key_vault.kv, data.external.agents_state] +} + +resource "azurerm_key_vault_secret" "agent_customer_loyalty_id" { + name = "agent-customer-loyalty-id" + value = data.external.agents_state.result["agent_customer_loyalty_id"] + key_vault_id = azurerm_key_vault.kv.id + depends_on = [azurerm_key_vault.kv, data.external.agents_state] +} + +resource "azurerm_key_vault_secret" "agent_cart_manager_id" { + name = "agent-cart-manager-id" + value = data.external.agents_state.result["agent_cart_manager_id"] + key_vault_id = azurerm_key_vault.kv.id + depends_on = [azurerm_key_vault.kv, data.external.agents_state] +} + +# Store agent endpoint in Key Vault (transformed to services.ai.azure.com) +resource "azurerm_key_vault_secret" "agent_endpoint" { + name = "agent-endpoint" + value = "https://${local.ai_foundry_name}.services.ai.azure.com/models" + key_vault_id = azurerm_key_vault.kv.id + depends_on = [azurerm_key_vault.kv] +} + # App Service Plan autoscale resource "azurerm_monitor_autoscale_setting" "appservice_autoscale" { name = "${var.name_prefix}-${local.suffix}-asp-autoscale" @@ -899,7 +954,7 @@ data "azapi_resource_action" "storage_list_keys" { action = "listKeys" response_export_values = ["keys"] body = jsonencode({}) - depends_on = [azapi_resource.storage] + depends_on = [data.azapi_resource_action.storage_keys_unconditional] } data "azapi_resource_action" "search_admin_keys" { @@ -954,7 +1009,7 @@ resource "azapi_resource" "storage_connection" { authType = "AccountKey" isSharedToAll = true credentials = { - key = jsondecode(data.azapi_resource_action.storage_list_keys[0].output).keys[0].value + key = jsondecode(data.azapi_resource_action.storage_keys_unconditional.output).keys[0].value } metadata = { ApiType = "Azure" @@ -1334,41 +1389,53 @@ resource "null_resource" "data_pipeline" { exit 1 } - # Create virtual environment - Write-Host "Creating Python virtual environment..." - if (Test-Path "venv") { - Write-Host "Virtual environment already exists, removing..." - Remove-Item -Recurse -Force venv +# Create virtual environment +Write-Host "Creating Python virtual environment..." +if (Test-Path "venv") { + Write-Host "Virtual environment already exists, attempting to remove..." + try { + Remove-Item -Recurse -Force venv -ErrorAction Stop + Start-Sleep -Seconds 2 + } catch { + Write-Host "WARNING: Could not remove existing venv, it may be locked. Trying to continue..." + } +} + +try { + python -m venv venv --clear +} catch { + Write-Host "WARNING: Failed to create virtual environment: $_" + Write-Host "Skipping data pipeline - you can run it manually later" + Write-Host "Run from src directory: python -m venv venv; .\venv\Scripts\activate; pip install -r requirements.txt" + exit 0 +} + +# Install dependencies directly to venv without activation +Write-Host "Installing Python dependencies (with retry)..." +$pythonExe = "venv\Scripts\python.exe" + +if (Test-Path $pythonExe) { + # Use python -m pip instead of pip.exe to avoid file locking issues + $maxAttempts = 3 + for ($i = 1; $i -le $maxAttempts; $i++) { + Write-Host "pip install attempt $i..." + & $pythonExe -m pip install --upgrade pip --no-warn-script-location 2>&1 | Out-Null + & $pythonExe -m pip install -r requirements.txt + if ($LASTEXITCODE -eq 0) { + Write-Host "Dependencies installed successfully on attempt $i" + break + } else { + Write-Host "pip install failed (exit $LASTEXITCODE)." + if ($i -lt $maxAttempts) { + Write-Host "Retrying after short backoff..." + Start-Sleep -Seconds 5 + } else { + Write-Host "WARNING: Dependencies failed after $maxAttempts attempts" + Write-Host "Skipping data pipeline - you can run it manually later" + exit 0 } - python -m venv venv - - # Install dependencies directly to venv without activation - Write-Host "Installing Python dependencies (with retry)..." - $pythonExe = "venv\Scripts\python.exe" - $pipExe = "venv\Scripts\pip.exe" - - if (Test-Path $pythonExe) { - & $pythonExe -m pip install --upgrade pip - $maxAttempts = 3 - for ($i = 1; $i -le $maxAttempts; $i++) { - Write-Host "pip install attempt $i..." - & $pipExe install -r requirements.txt - if ($LASTEXITCODE -eq 0) { - Write-Host "Dependencies installed successfully on attempt $i" - break - } else { - Write-Host "pip install failed (exit $LASTEXITCODE)." - if ($i -lt $maxAttempts) { - Write-Host "Retrying after short backoff..." - Start-Sleep -Seconds 5 - } else { - Write-Host "ERROR: Dependencies failed after $maxAttempts attempts" - exit 1 - } - } - } - - Write-Host "Python environment ready" + } + } Write-Host "Python environment ready" Write-Host "" # Check if CSV data file exists @@ -1396,8 +1463,8 @@ resource "null_resource" "data_pipeline" { Write-Host "- Data imported to search index" } } else { - Write-Host "ERROR: Failed to create virtual environment" - exit 1 + Write-Host "WARNING: Failed to create virtual environment" + Write-Host "Skipping data pipeline - you can run it manually later" } Write-Host "" @@ -1608,43 +1675,63 @@ resource "null_resource" "deploy_multi_agents" { $env:PYTHONIOENCODING = "utf-8" Write-Host "Building container image in Azure Container Registry..." - Write-Host "This may take 2-3 minutes. Checking status via ACR task logs..." + Write-Host "This may take 2-3 minutes..." - # Start build and get run ID (ignore encoding errors in output) - $buildOutput = az acr build ` - --resource-group ${azurerm_resource_group.rg.name} ` - --registry ${local.registry_name} ` - --image zava-chat-app:latest ` - --file "$srcPath\Dockerfile" ` - --no-logs ` - "$srcPath" 2>&1 | Out-String - - # Extract run ID from output - if ($buildOutput -match "Run ID: (\w+)") { - $runId = $Matches[1] - Write-Host "Build queued with Run ID: $runId" - Write-Host "Waiting for build to complete..." - - # Wait and check status - Start-Sleep -Seconds 60 - $status = az acr task logs --registry ${local.registry_name} --run-id $runId --query "[-1]" 2>&1 | Select-String "was successful" + # Build with logs enabled to see progress + $buildSuccess = $false + try { + az acr build ` + --resource-group ${azurerm_resource_group.rg.name} ` + --registry ${local.registry_name} ` + --image zava-chat-app:latest ` + --file "$srcPath\Dockerfile" ` + "$srcPath" - if ($status) { - Write-Host "[SUCCESS] Container build completed" - Write-Host "Restarting Web App to pull new image..." - az webapp restart --resource-group ${azurerm_resource_group.rg.name} --name ${local.web_app_name} | Out-Null - Write-Host "[OK] Web App restarted" + if ($LASTEXITCODE -eq 0) { + Write-Host "[SUCCESS] ACR build completed successfully" + $buildSuccess = $true } else { - Write-Host "WARNING: Could not confirm build status, but continuing..." - Write-Host "Check Azure Portal ACR build logs for run ID: $runId" - az webapp restart --resource-group ${azurerm_resource_group.rg.name} --name ${local.web_app_name} | Out-Null + Write-Host "[ERROR] ACR build failed with exit code: $LASTEXITCODE" } - } else { - Write-Host "WARNING: Could not extract run ID from build output" - Write-Host "Build may still be in progress - check Azure Portal" - Write-Host "Restarting Web App anyway..." - az webapp restart --resource-group ${azurerm_resource_group.rg.name} --name ${local.web_app_name} | Out-Null + } catch { + Write-Host "[ERROR] ACR build exception: $_" } + + # Verify image exists + Write-Host "" + Write-Host "Verifying image in ACR..." + $imageExists = $false + for ($attempt = 1; $attempt -le 3; $attempt++) { + try { + $imgCheck = az acr repository show --name ${local.registry_name} --image zava-chat-app:latest --query "name" -o tsv 2>&1 + if ($LASTEXITCODE -eq 0 -and $imgCheck -eq "zava-chat-app") { + Write-Host "[OK] Image verified in ACR: zava-chat-app:latest" + $imageExists = $true + break + } + } catch { } + if ($attempt -lt 3) { + Write-Host "Image not found yet, waiting 10s... (attempt $attempt/3)" + Start-Sleep -Seconds 10 + } + } + + if (-not $imageExists) { + Write-Host "[CRITICAL] Image not found in ACR after build. Web App cannot start." + Write-Host "Please check ACR build logs in Azure Portal." + exit 0 # Non-blocking but logged + } + + # Web App will automatically pull image using managed identity (AcrPull role) + # No need to set ACR credentials - managed identity handles authentication + Write-Host "" + Write-Host "[INFO] Web App configured to use managed identity for ACR access" + Write-Host "[INFO] Webhook will trigger automatic deployment on image push" + + Write-Host "" + Write-Host "Restarting Web App to pull new image..." + az webapp restart --resource-group ${azurerm_resource_group.rg.name} --name ${local.web_app_name} | Out-Null + Write-Host "[OK] Web App restarted" Write-Host "" Write-Host "Multi-agent deployment complete!" EOT @@ -1832,31 +1919,10 @@ resource "null_resource" "deploy_chat_app" { Write-Host "[OK] Environment variables configured" } - # Get ACR admin credentials - $acrUsername = az acr credential show ` - --name ${local.registry_name} ` - --query "username" ` - --output tsv - - $acrPassword = az acr credential show ` - --name ${local.registry_name} ` - --query "passwords[0].value" ` - --output tsv - - # Update container image with ACR credentials - Write-Host "Configuring container deployment..." - az webapp config container set ` - --resource-group ${azurerm_resource_group.rg.name} ` - --name ${local.web_app_name} ` - --docker-custom-image-name ${local.registry_name}.azurecr.io/zava-chat-app:latest ` - --docker-registry-server-url https://${local.registry_name}.azurecr.io ` - --docker-registry-server-user "$acrUsername" ` - --docker-registry-server-password "$acrPassword" ` - --enable-app-service-storage false | Out-Null - - if ($LASTEXITCODE -eq 0) { - Write-Host "[OK] Container configuration updated" - } + # Web App uses managed identity for ACR access (configured in site_config) + # Webhook will trigger automatic deployment when image is pushed + Write-Host "[INFO] Container will be pulled automatically via managed identity" + Write-Host "[INFO] Webhook configured for automatic updates on push" # Restart the web app Write-Host "" @@ -1961,4 +2027,218 @@ resource "null_resource" "verify_multi_agent_remote" { } } +# Post-deploy automated fix to ensure Web App starts successfully +resource "null_resource" "post_deploy_health" { + depends_on = [ + azurerm_linux_web_app.app, + azurerm_role_assignment.webapp_acr_pull, + azurerm_key_vault_access_policy.app_policy + ] + + provisioner "local-exec" { + interpreter = ["PowerShell", "-Command"] + command = <<-EOT + Write-Host "" + Write-Host "============================================================================" + Write-Host "=== AUTOMATED WEB APP STARTUP FIX ===" + Write-Host "============================================================================" + $rg = "${azurerm_resource_group.rg.name}" + $name = "${local.web_app_name}" + $url = "https://${local.web_app_name}.azurewebsites.net" + + Write-Host "" + Write-Host "[1/7] Checking current Web App status..." + $status = az webapp show --name $name --resource-group $rg --query "state" -o tsv + Write-Host "Current state: $status" + + if ($status -eq "Stopped") { + Write-Host "[DETECTED] Web App is stopped - applying automated fix" + } + + Write-Host "" + Write-Host "[2/7] Enabling detailed logging for diagnostics..." + az webapp log config --name $name --resource-group $rg ` + --level verbose ` + --web-server-logging filesystem ` + --docker-container-logging filesystem ` + --detailed-error-messages true ` + --failed-request-tracing true | Out-Null + + Write-Host "" + Write-Host "[2b/7] Verifying container configuration..." + $cfg = az webapp config container show --name $name --resource-group $rg --output json | ConvertFrom-Json + $desiredImage = "${local.registry_name}.azurecr.io/zava-chat-app:latest" + $needsConfig = $true + if ($cfg) { + $currentImage = $cfg.dockerCustomImageName + if ($currentImage -and ($currentImage -eq $desiredImage)) { + Write-Host "[OK] Container image already set: $currentImage" + $needsConfig = $false + } else { + Write-Host "[INFO] Container image differs or not set (current: '$currentImage'). Will apply fallback configuration." + } + } else { + Write-Host "[INFO] No container config returned; will apply fallback." + } + + if ($needsConfig) { + try { + $acrUser = az acr credential show --name ${local.registry_name} --query "username" -o tsv + $acrPass = az acr credential show --name ${local.registry_name} --query "passwords[0].value" -o tsv + az webapp config container set ` + --resource-group $rg ` + --name $name ` + --docker-custom-image-name $desiredImage ` + --docker-registry-server-url https://${local.registry_name}.azurecr.io ` + --docker-registry-server-user "$acrUser" ` + --docker-registry-server-password "$acrPass" ` + --enable-app-service-storage false | Out-Null + Write-Host "[OK] Applied fallback container configuration" + } catch { + Write-Host "[WARN] Could not apply container configuration: $_" + } + } + + Write-Host "" + Write-Host "[3/7] Ensuring Web App is stopped cleanly..." + az webapp stop --name $name --resource-group $rg | Out-Null + Write-Host "Waiting 15 seconds for complete shutdown..." + Start-Sleep -Seconds 15 + + Write-Host "" + Write-Host "[4/7] Verifying container image exists in ACR..." + $imageExists = az acr repository show --name ${local.registry_name} --image zava-chat-app:latest --query "name" -o tsv 2>$null + if ($imageExists) { + Write-Host "[OK] Container image found: zava-chat-app:latest" + } else { + Write-Host "[WARNING] Container image may still be building - will retry startup" + } + + Write-Host "" + Write-Host "[5/7] Starting Web App with fresh container pull..." + az webapp start --name $name --resource-group $rg | Out-Null + Write-Host "[OK] Start command sent" + + Write-Host "" + Write-Host "[6/7] Waiting for container pull and app initialization..." + Write-Host "This takes 2-5 minutes for first deployment..." + + # Progressive wait with status checks + $waitIntervals = @(30, 30, 30, 30, 30, 30) # 3 minutes total + foreach ($interval in $waitIntervals) { + Start-Sleep -Seconds $interval + $currentStatus = az webapp show --name $name --resource-group $rg --query "state" -o tsv + Write-Host " Status: $currentStatus (waited $($waitIntervals.IndexOf($interval) * 30 + $interval)s)" + + if ($currentStatus -eq "Running") { + Write-Host " [OK] App is now Running" + break + } + } + + Write-Host "" + Write-Host "[7/7] Testing application health endpoint..." + $health = "$url/health" + $maxAttempts = 10 + $ok = $false + + for ($i=1; $i -le $maxAttempts; $i++) { + Write-Host " Attempt $i/$maxAttempts - Testing: $health" + try { + $resp = Invoke-RestMethod -Uri $health -TimeoutSec 30 -Method GET -ErrorAction Stop + if ($resp.status -eq 'healthy') { + Write-Host " [SUCCESS] App is healthy and responding!" + Write-Host " Response: $($resp | ConvertTo-Json -Compress)" + $ok = $true + break + } else { + Write-Host " Status: $($resp | ConvertTo-Json -Depth 4)" + } + } catch { + $errMsg = $_.Exception.Message + if ($errMsg -like "*503*" -or $errMsg -like "*502*") { + Write-Host " Container still starting up... (HTTP $($_.Exception.Response.StatusCode))" + } else { + Write-Host " Error: $errMsg" + } + } + + if ($i -lt $maxAttempts) { + Start-Sleep -Seconds 20 + } + } + + if (-not $ok) { + Write-Host "" + Write-Host "[DIAGNOSTICS] Health checks did not pass during apply. Collecting logs..." + # Show recent logs to console and save a snapshot + try { + $diagLog = Join-Path (Split-Path $PWD.Path -Parent) "deploy.log" + Write-Host "Saving recent logs to $diagLog" + az webapp log show --name $name --resource-group $rg | Out-File -FilePath $diagLog -Encoding utf8 + Write-Host "[OK] Recent logs saved" + } catch { Write-Host "Could not save recent logs: $_" } + + # Download the zipped log bundle + try { + $logZip = Join-Path (Split-Path $PWD.Path -Parent) "app-logs.zip" + Write-Host "Downloading log bundle to $logZip" + az webapp log download --name $name --resource-group $rg --log-file $logZip | Out-Null + Write-Host "[OK] Logs bundle saved" + } catch { Write-Host "Could not download logs bundle: $_" } + } + + Write-Host "" + Write-Host "============================================================================" + if ($ok) { + Write-Host "=== [SUCCESS] WEB APP IS HEALTHY AND READY ===" + Write-Host "" + Write-Host "Your application is live at:" + Write-Host " $url" + Write-Host "" + Write-Host "Test the chat interface in your browser now!" + } else { + Write-Host "=== [INFO] FINAL STATUS CHECK ===" + + # Get final state + $finalState = az webapp show --name $name --resource-group $rg --query "state" -o tsv + Write-Host "Web App State: $finalState" + + if ($finalState -eq "Running") { + Write-Host "" + Write-Host "The app is Running but health endpoint hasn't responded yet." + Write-Host "This is normal for first deployment - the container may need more time." + Write-Host "" + Write-Host "NEXT STEPS:" + Write-Host "1. Wait 2-3 more minutes for full initialization" + Write-Host "2. Check the app at: $url" + Write-Host "3. View logs: az webapp log tail --name $name --resource-group $rg" + Write-Host "" + Write-Host "The app will be ready shortly!" + } else { + Write-Host "" + Write-Host "[ACTION REQUIRED] App is in state: $finalState" + Write-Host "" + Write-Host "Attempting one more restart..." + az webapp restart --name $name --resource-group $rg | Out-Null + Start-Sleep -Seconds 30 + + Write-Host "" + Write-Host "MANUAL VERIFICATION STEPS:" + Write-Host "1. Go to Azure Portal > $name > Overview" + Write-Host "2. Click 'Restart' button at the top" + Write-Host "3. Wait 5 minutes and visit: $url" + Write-Host "4. Check logs: az webapp log tail --name $name --resource-group $rg" + } + } + Write-Host "============================================================================" + Write-Host "" + EOT + } + + triggers = { + always_run = timestamp() + } +} + diff --git a/terraform-infrastructure/read_agents_state.py b/terraform-infrastructure/read_agents_state.py index 72dd777..0ed2e41 100644 --- a/terraform-infrastructure/read_agents_state.py +++ b/terraform-infrastructure/read_agents_state.py @@ -4,7 +4,8 @@ # Terraform external data source requires a flat map of strings. # We will flatten the agents_state.json into keys like "agent_NAME_id" and "agent_NAME_status". -state_path = os.path.join(os.path.dirname(__file__), '..', 'src', 'app', 'agents', 'agents_state.json') +# Read from .terraform directory (temporary location) +state_path = os.path.join(os.path.dirname(__file__), '.terraform', 'agents_state.json') result = {} try: From 3d9591d3b5acc2606137ef7d5a675e4bf0ee5276 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Wed, 3 Dec 2025 16:46:27 +0000 Subject: [PATCH 2/2] Update visitor count --- README.md | 4 ++-- TROUBLESHOOTING.md | 4 ++-- src/DATA_PIPELINE.md | 4 ++-- terraform-infrastructure/README.md | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 88051f7..c8eecc1 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ graph TD
- Total views -

Refresh Date: 2025-11-29

+ Total views +

Refresh Date: 2025-12-03

diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 0c4bfa6..d103d8e 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -347,7 +347,7 @@ terraform apply
- Total views -

Refresh Date: 2025-11-29

+ Total views +

Refresh Date: 2025-12-03

diff --git a/src/DATA_PIPELINE.md b/src/DATA_PIPELINE.md index 4bda863..9102512 100644 --- a/src/DATA_PIPELINE.md +++ b/src/DATA_PIPELINE.md @@ -277,7 +277,7 @@ az search index show-statistics \
- Total views -

Refresh Date: 2025-11-29

+ Total views +

Refresh Date: 2025-12-03

diff --git a/terraform-infrastructure/README.md b/terraform-infrastructure/README.md index 0e53b3e..d69138d 100644 --- a/terraform-infrastructure/README.md +++ b/terraform-infrastructure/README.md @@ -119,7 +119,7 @@ graph TD;
- Total views -

Refresh Date: 2025-11-29

+ Total views +

Refresh Date: 2025-12-03