diff --git a/docs/AutomatedLocalSetup.md b/docs/AutomatedLocalSetup.md new file mode 100644 index 000000000..bd1009e99 --- /dev/null +++ b/docs/AutomatedLocalSetup.md @@ -0,0 +1,199 @@ +# Automated Local Development Setup + +Two scripts — one for each platform — that automate the entire local development setup: Azure authentication, `.env` generation, Python/Node dependency installation, RBAC role assignment, and VS Code configuration. + +| Platform | Script | +|---|---| +| Linux / macOS / WSL / Git Bash | `infra/scripts/setup_local_dev.sh` | +| Windows PowerShell | `infra/scripts/setup_local_dev.ps1` | + +--- + +## Prerequisites + +| Tool | Purpose | +|---|---| +| [Python 3.12+](https://www.python.org/downloads/) | Backend and frontend virtual environments | +| [Node.js 18+](https://nodejs.org/) | Frontend build | +| [uv](https://github.com/astral-sh/uv) | Fast Python package management (backend & MCP) | +| [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) | Fetch Azure config and assign RBAC roles | +| [Git](https://git-scm.com/) | Source control | + +You must be logged in before running (the script will prompt if you are not): + +```bash +az login +``` + +--- + +## What It Does (in order) + +1. **Checks prerequisites** — Python 3.12+, Node.js, npm, uv, Azure CLI, Git +2. **Azure authentication** — logs you in if needed, confirms the active subscription +2b. **Checks Azure roles & permissions** — verifies you have role-assignment permission (Owner / User Access Administrator / RBAC Administrator) before attempting RBAC step (non-fatal warning) +3. **Fetches Azure configuration** — reads deployment outputs or queries resources individually to build `src/backend/.env` +4. **Assigns RBAC roles** — grants your user account the roles needed to run the app locally: + - Cosmos DB Built-in Data Contributor + - Azure AI User, Azure AI Developer, Cognitive Services OpenAI User + - Search Index Data Contributor + - Storage Blob Data Contributor +5. **Sets up Backend** (`src/backend`) — creates a `.venv` with `uv`, installs all dependencies +6. **Sets up MCP Server** (`src/mcp_server`) — same as backend +7. **Sets up Frontend** (`src/App`) — creates a `.venv`, installs Python deps, runs `npm install` and `npm run build` +8. **Configures VS Code** — writes `.vscode/extensions.json` and `settings.json` (skip with `--skip-vscode`) +9. **Prints a start summary** with the exact commands to run each service + +--- + +## Quick Start + +```bash +# bash (Linux / macOS / WSL / Git Bash) +bash infra/scripts/setup_local_dev.sh --resource-group + +# PowerShell (Windows) +.\infra\scripts\setup_local_dev.ps1 -ResourceGroup +``` + +The script will: +- Fetch all Azure settings and write `src/backend/.env` automatically +- Create Python virtual environments and install all dependencies +- Assign your account the required Azure roles + +--- + +## All Options + +### Bash + +```bash +bash infra/scripts/setup_local_dev.sh [options] + +Options: + --resource-group, -g Azure Resource Group (auto-detected from .azure/ if omitted) + --subscription, -s Azure Subscription ID (uses current az account if omitted) + --skip-vscode Skip writing .vscode/ settings files + --skip-prereqs Skip prerequisite checks + -h, --help Show help +``` + +### PowerShell + +```powershell +.\infra\scripts\setup_local_dev.ps1 [options] + +Options: + -ResourceGroup Azure Resource Group (auto-detected from .azure/ if omitted) + -Subscription Azure Subscription ID (uses current az account if omitted) + -SkipVSCode Skip writing .vscode/ settings files + -SkipPrereqs Skip prerequisite checks +``` + +--- + +## Examples + +```bash +# Fetch config from Azure and set up everything +bash infra/scripts/setup_local_dev.sh --resource-group rg-macae-dev + +# Use a specific subscription +bash infra/scripts/setup_local_dev.sh --resource-group rg-macae-dev --subscription 00000000-0000-0000-0000-000000000000 + +# Skip VS Code settings (e.g. using a different editor) +bash infra/scripts/setup_local_dev.sh --resource-group rg-macae-dev --skip-vscode + +# Skip prerequisite checks (useful in CI or if tools are on a non-standard PATH) +bash infra/scripts/setup_local_dev.sh --resource-group rg-macae-dev --skip-prereqs +``` + +```powershell +# Fetch config from Azure and set up everything +.\infra\scripts\setup_local_dev.ps1 -ResourceGroup rg-macae-dev + +# Use a specific subscription +.\infra\scripts\setup_local_dev.ps1 -ResourceGroup rg-macae-dev -Subscription 00000000-0000-0000-0000-000000000000 + +# Skip VS Code settings +.\infra\scripts\setup_local_dev.ps1 -ResourceGroup rg-macae-dev -SkipVSCode +``` + +--- + +## Auto-Detection (no `--resource-group`) + +If you ran `azd up` to deploy, the scripts will automatically find the `.azure//.env` file and use it — no flags needed: + +```bash +bash infra/scripts/setup_local_dev.sh # reads .azure//.env written by azd up +.\infra\scripts\setup_local_dev.ps1 # same +``` + +If no `.azure/` folder exists and no `--resource-group` is provided, the script will prompt you to enter the resource group name interactively. + +--- + +## RBAC Roles Assigned + +The script automatically grants your user account the following roles (skips if already assigned): + +| Role | Resource | Purpose | +|---|---|---| +| Cosmos DB Built-in Data Contributor | Cosmos DB account | Read/write conversation history | +| Azure AI User | AI Foundry project | Call AI Foundry APIs | +| Azure AI Developer | AI Foundry project | Deploy and manage agents | +| Cognitive Services OpenAI User | AI Foundry project | Call OpenAI endpoints | +| Search Index Data Contributor | Azure AI Search | Read/write search indexes | +| Storage Blob Data Contributor | Storage account | Read/write blob storage | + +> **Note:** RBAC changes can take 5–10 minutes to propagate before the app can use them. + +--- + +## After Setup + +Once the script finishes, start the three services in separate terminals (Backend first, then MCP, then Frontend): + +``` +Terminal 1 — Backend (port 8000): + cd src/backend + Activate virtual environment: + PowerShell : .\.venv\Scripts\Activate.ps1 + Git Bash : source .venv/Scripts/activate + Linux/macOS: source .venv/bin/activate + python app.py + +Terminal 2 — MCP Server (port 9000): + cd src/mcp_server + Activate virtual environment: + PowerShell : .\.venv\Scripts\Activate.ps1 + Git Bash : source .venv/Scripts/activate + Linux/macOS: source .venv/bin/activate + python mcp_server.py --transport streamable-http --host 0.0.0.0 --port 9000 + +Terminal 3 — Frontend (port 3000): + cd src/App + Activate virtual environment: + PowerShell : .\.venv\Scripts\Activate.ps1 + Git Bash : source .venv/Scripts/activate + Linux/macOS: source .venv/bin/activate + python frontend_server.py +``` + +Then open [http://localhost:3000](http://localhost:3000). + +--- + +## Troubleshooting + +| Symptom | Likely cause | Fix | +|---|---|---| +| `az login` loop | CLI not installed or PATH issue | Install [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) | +| `.env` values empty | RG has no deployment outputs | Pass `--resource-group` explicitly | +| `uv: command not found` | uv not installed | `pip install uv` or see [uv docs](https://github.com/astral-sh/uv) | +| RBAC errors at runtime | Roles not propagated | Wait 10 min for Azure propagation; re-run script | +| `source .venv/Scripts/activate: No such file` | Incomplete venv | Delete `.venv/` folder and re-run the script | +| Frontend npm errors | Node.js version too old | Upgrade to Node.js 18+ | + +For more detail, see [TroubleShootingSteps.md](TroubleShootingSteps.md). diff --git a/docs/DeployLocalChanges.md b/docs/DeployLocalChanges.md new file mode 100644 index 000000000..f0a68a858 --- /dev/null +++ b/docs/DeployLocalChanges.md @@ -0,0 +1,178 @@ +# Deploy Local Changes to Azure + +Two scripts — one for each platform — that build Docker images for the services you specify (or all by default), push them to ACR, and update the live Azure resources. + +| Platform | Script | +|---|---| +| Linux / macOS / WSL | `infra/scripts/deploy_to_azure.sh` | +| Windows PowerShell | `infra/scripts/deploy_to_azure.ps1` | + +--- + +## Prerequisites + +| Tool | Required? | Purpose | +|---|---|---| +| [Azure CLI](https://learn.microsoft.com/cli/azure/install-azure-cli) | **Yes** | Manage Azure resources, ACR login | +| [Docker Desktop](https://www.docker.com/products/docker-desktop/) | **Yes** | Build and push Docker images | + +You must be logged in before running: +```bash +az login +``` + +--- + +## What It Does (in order) + +1. **Checks prerequisites** — Azure CLI and Docker (both required) +1b. **Checks Azure roles & permissions** — verifies you have Contributor + role-assignment permission on the resource group (non-fatal warning) +2. **Discovers Azure resources** — finds the backend/MCP Container Apps and frontend App Service in your resource group +3. **Resolves ACR** — lists ACRs in the resource group and asks which one to use; prompts to create a new one if needed +4. **Determines services** — deploys all services by default, or only the ones you specify with `--services` +5. **Generates an image tag** — auto-generates `YYYYMMDD-HHMMSS` or uses your custom tag +6. **Builds & pushes images** — builds locally with Docker, pushes to ACR +7. **Updates Azure resources** — updates the Container App / App Service to the new image tag +8. **Prints a summary** with rollback commands + +--- + +## Quick Start + +```bash +# bash (Linux/macOS/WSL) +bash infra/scripts/deploy_to_azure.sh --resource-group + +# PowerShell (Windows) +.\infra\scripts\deploy_to_azure.ps1 -ResourceGroup +``` + +The script will: +- Deploy all services by default (use `--services` to pick specific ones) +- Build images locally with Docker and push to ACR +- Ask which ACR to use (or offer to create one) + +--- + +## All Options + +### Bash + +```bash +./infra/scripts/deploy_to_azure.sh --resource-group [options] + +Required: + -g, --resource-group Azure Resource Group name + +Options: + --acr Skip the ACR prompt; use this ACR directly + --services Deploy only these services (default: all) + Values: backend, mcp, frontend (comma-separated) + --tag Use a custom image tag instead of auto-generated + --dry-run Preview all steps without making any changes + --build-only Build and push images, but don't update Azure + --deploy-only Update Azure resources only (images must exist) + --skip-role-assignment Skip AcrPull role assignment (use if roles already exist) + -h, --help Show help +``` + +### PowerShell + +```powershell +.\infra\scripts\deploy_to_azure.ps1 -ResourceGroup [options] + +Required: + -ResourceGroup Azure Resource Group name + +Options: + -Acr Skip the ACR prompt; use this ACR directly + -Services Deploy only these services (default: all) + Values: "backend,mcp,frontend" + -Tag Use a custom image tag instead of auto-generated + -DryRun Preview all steps without making any changes + -BuildOnly Build and push images, but don't update Azure + -DeployOnly Update Azure resources only (images must exist) + -SkipRoleAssignment Skip AcrPull role assignment (use if roles already exist) +``` + +--- + +## Examples + +```bash +# Deploy all services (default) +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev + +# Deploy only the frontend +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev --services frontend + +# Deploy backend and MCP with a specific ACR +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev --services backend,mcp --acr myregistry + +# Preview without making changes +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev --dry-run + +# Build images only (no Azure update) +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev --build-only + +# Update Azure only (images already pushed) +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev --deploy-only --tag 20260506-120000-abc1234 + +# Skip AcrPull role assignment (roles already exist) +bash infra/scripts/deploy_to_azure.sh --resource-group rg-macae-dev --skip-role-assignment +``` + +```powershell +# Deploy all services (default) +.\infra\scripts\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev + +# Deploy only backend +.\infra\scripts\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev -Services "backend" + +# Dry run +.\infra\scripts\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev -DryRun + +# Skip AcrPull role assignment (roles already exist) +.\infra\scripts\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev -SkipRoleAssignment +``` + +--- + +## ACR Selection + +If `--acr` / `-Acr` is not provided, the script **always prompts first**: + +``` +Enter ACR name to use (or press Enter to see available ACRs / create new): +``` + +- **Type a name** → validates and uses that ACR directly +- **Press Enter** → discovers ACRs in the resource group: + - If one or more ACRs are found, the first one is selected automatically + - If none are found, a new Basic ACR is created in the same resource group + +In all cases, `AcrPull` is assigned to the managed identities of each service. + +--- + +## How Azure Authentication Works + +The scripts use **managed identity** (not admin credentials or passwords): + +- Each Container App and App Service has a user-assigned managed identity +- The script assigns the `AcrPull` role to those identities on the ACR +- `az containerapp registry set --identity ` wires the identity to the registry config + +This means no passwords are stored anywhere. + +If the role assignment step fails (e.g. your account lacks `Microsoft.Authorization/roleAssignments/write`), ask a subscription Owner to grant `User Access Administrator` on the resource group. Once roles are already in place you can skip this step on subsequent runs with `--skip-role-assignment` / `-SkipRoleAssignment`. + +--- + +## Rollback + +At the end of each run, the summary prints ready-to-run rollback commands, e.g.: + +```bash +az containerapp update --name ca-macae --resource-group rg-macae-dev --image myacr.azurecr.io/macaebackend:20260505-120000-abc1234 +``` diff --git a/infra/scripts/deploy_to_azure.ps1 b/infra/scripts/deploy_to_azure.ps1 new file mode 100644 index 000000000..48b87ea64 --- /dev/null +++ b/infra/scripts/deploy_to_azure.ps1 @@ -0,0 +1,840 @@ +# ============================================================================== +# MACAE - Deploy Local Code to Azure +# ============================================================================== +# +# Usage: +# .\deploy_to_azure.ps1 -ResourceGroup [options] +# +# Examples: +# .\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev +# .\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev -Services "backend,mcp" +# .\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev -Acr myacr +# .\deploy_to_azure.ps1 -ResourceGroup rg-macae-dev -DryRun +# ============================================================================== + +param( + [Parameter(Mandatory=$true)] + [string]$ResourceGroup, + [string]$Acr = "", + [string]$Services = "", + [string]$Tag = "", + [switch]$DryRun, + [switch]$BuildOnly, + [switch]$DeployOnly, + [switch]$SkipRoleAssignment +) + +$ErrorActionPreference = "Stop" + +# ============================================================================== +# Configuration +# ============================================================================== + +$ScriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path +$RepoRoot = (Resolve-Path (Join-Path $ScriptDir "..\..")).Path +$BackendDir = Join-Path $RepoRoot "src\backend" +$McpDir = Join-Path $RepoRoot "src\mcp_server" +$FrontendDir = Join-Path $RepoRoot "src\App" + +$BackendImageName = "macaebackend" +$McpImageName = "macaemcp" +$FrontendImageName = "macaefrontend" + +# ============================================================================== +# Logging +# ============================================================================== + +function Write-LogInfo { param([string]$msg) Write-Host "[i] $msg" -ForegroundColor Blue } +function Write-LogSuccess { param([string]$msg) Write-Host "[✓] $msg" -ForegroundColor Green } +function Write-LogWarn { param([string]$msg) Write-Host "[!] $msg" -ForegroundColor Yellow } +function Write-LogError { param([string]$msg) Write-Host "[✗] $msg" -ForegroundColor Red } +function Write-LogStep { param([string]$msg) Write-Host "`n━━━ $msg ━━━`n" -ForegroundColor Cyan } + +# Retry az command up to 4 times on transient network/operation-in-progress errors +function Invoke-AzRetry { + param([string[]]$AzArgs) + $attempt = 1 + while ($attempt -le 4) { + $out = az @AzArgs 2>&1 + if ($LASTEXITCODE -eq 0) { return $out } + $outStr = $out -join "`n" + if ($outStr -match 'OperationInProgress|ContainerAppOperation') { + Write-LogWarn "Azure operation in progress (attempt $attempt/4), retrying in 30s..." + Start-Sleep -Seconds 30; $attempt++ + } elseif ($outStr -match 'RemoteDisconnected|Connection aborted|timed out|ECONNRESET|HTTPSConnectionPool|Max retries exceeded|NewConnectionError|getaddrinfo|Failed to establish') { + Write-LogWarn "Transient network error (attempt $attempt/4), retrying in 15s..." + Start-Sleep -Seconds 15; $attempt++ + } else { + return $out + } + } + return $out +} + +# ============================================================================== +# Step 1: Prerequisites +# ============================================================================== + +function Check-Prerequisites { + Write-LogStep "Step 1: Checking Prerequisites" + + $missing = @() + + if (Get-Command az -ErrorAction SilentlyContinue) { + Write-LogSuccess "Azure CLI found" + } else { + $missing += "azure-cli" + } + + if (Get-Command docker -ErrorAction SilentlyContinue) { + $dockerInfo = docker info 2>&1 + if ($LASTEXITCODE -eq 0) { + Write-LogSuccess "Docker found and running" + } else { + Write-LogError "Docker found but daemon not running. Please start Docker Desktop." + exit 1 + } + } else { + $missing += "docker" + } + + if ($missing.Count -gt 0) { + Write-LogError "Missing prerequisites: $($missing -join ', ')" + Write-Host "" + foreach ($tool in $missing) { + switch ($tool) { + "azure-cli" { + Write-Host " ┌─ Azure CLI ───────────────────────────────────────────────────" + Write-Host " │ Download: https://aka.ms/installazurecliwindows" + Write-Host " │ Or: winget install Microsoft.AzureCLI" + Write-Host " │ Verify: az --version" + Write-Host " └──────────────────────────────────────────────────────────────" + } + "docker" { + Write-Host " ┌─ Docker Desktop ──────────────────────────────────────────────" + Write-Host " │ Download: https://www.docker.com/products/docker-desktop" + Write-Host " │ Or: winget install Docker.DockerDesktop" + Write-Host " │ Verify: docker --version" + Write-Host " └──────────────────────────────────────────────────────────────" + } + } + } + exit 1 + } + + # Check Azure login + $azAccount = az account show 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-LogWarn "Not logged into Azure CLI. Running 'az login'..." + az login + } + Write-LogSuccess "Logged into Azure CLI" +} + +# ============================================================================== +# Step 1b: Azure Role / Permission Check +# ============================================================================== +# +# Per docs/DeploymentGuide.md, the deploying account needs: +# - Contributor (or Owner) on the subscription -- to update resources +# - User Access Administrator OR Role Based Access Control Administrator +# (or Owner) -- to assign the AcrPull role to managed identities +# This check is non-fatal: group-inherited roles may not always enumerate. +# ============================================================================== + +function Check-AzureRoles { + Write-LogStep "Step 1b: Checking Azure Roles & Permissions" + + $subId = az account show --query id -o tsv 2>$null + $userId = az ad signed-in-user show --query id -o tsv 2>$null + if (-not $subId -or -not $userId) { + Write-LogWarn "Could not determine subscription or user identity -- skipping role check." + return + } + + $scope = "/subscriptions/$subId" + $rolesRaw = az role assignment list --assignee $userId --scope $scope ` + --include-inherited --include-groups --query "[].roleDefinitionName" -o tsv 2>$null + if (-not $rolesRaw) { + Write-LogWarn "Unable to enumerate role assignments at $scope." + Write-LogWarn "Required: Contributor + (User Access Administrator OR Role Based Access Control Administrator), or Owner." + return + } + + $roles = ($rolesRaw -split "`r?`n" | Where-Object { $_ -ne "" }) + $hasResMgmt = ($roles -contains 'Owner') -or ($roles -contains 'Contributor') + $hasRoleMgmt = ($roles -contains 'Owner') -or ($roles -contains 'User Access Administrator') -or ($roles -contains 'Role Based Access Control Administrator') + + if ($hasResMgmt) { Write-LogSuccess "Resource management role found (Owner/Contributor)" } + else { Write-LogWarn "Missing 'Contributor' (or 'Owner') at subscription scope -- Azure resource updates may fail." } + + if ($hasRoleMgmt) { Write-LogSuccess "Role-assignment permission found (Owner/UAA/RBAC Admin)" } + else { Write-LogWarn "Missing 'User Access Administrator' / 'Role Based Access Control Administrator' (or 'Owner') -- AcrPull role assignment may fail. Pass -SkipRoleAssignment if roles are already in place." } +} + +# ============================================================================== +# Step 2: Discover Azure Resources +# ============================================================================== + +function Discover-Resources { + Write-LogStep "Step 2: Discovering Azure Resources" + + $rgCheck = az group show --name $ResourceGroup 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-LogError "Resource group '$ResourceGroup' not found." + exit 1 + } + Write-LogSuccess "Resource group: $ResourceGroup" + + # Discover container apps + $caList = az containerapp list --resource-group $ResourceGroup --query "[].name" -o tsv 2>$null + + $script:BackendCA = "" + $script:McpCA = "" + + if ($caList) { + foreach ($app in ($caList -split "`n")) { + $app = $app.Trim() + if ($app -like "ca-mcp-*") { + $script:McpCA = $app + } elseif ($app -like "ca-*") { + $script:BackendCA = $app + } + } + } + + if ($script:BackendCA) { Write-LogSuccess "Backend Container App: $script:BackendCA" } + else { Write-LogWarn "Backend Container App: not found in RG" } + + if ($script:McpCA) { Write-LogSuccess "MCP Container App: $script:McpCA" } + else { Write-LogWarn "MCP Container App: not found in RG" } + + # Discover frontend web app + $script:FrontendApp = az webapp list --resource-group $ResourceGroup --query "[0].name" -o tsv 2>$null + if ($script:FrontendApp) { Write-LogSuccess "Frontend Web App: $script:FrontendApp" } + else { Write-LogWarn "Frontend Web App: not found in RG" } + + # Capture current images for rollback + $script:OldBackendImage = "" + $script:OldMcpImage = "" + $script:OldFrontendImage = "" + + if ($script:BackendCA) { + $script:OldBackendImage = az containerapp show --name $script:BackendCA --resource-group $ResourceGroup ` + --query "properties.template.containers[0].image" -o tsv 2>$null + Write-LogInfo "Current backend image: $script:OldBackendImage" + } + if ($script:McpCA) { + $script:OldMcpImage = az containerapp show --name $script:McpCA --resource-group $ResourceGroup ` + --query "properties.template.containers[0].image" -o tsv 2>$null + Write-LogInfo "Current MCP image: $script:OldMcpImage" + } + if ($script:FrontendApp) { + $script:OldFrontendImage = az webapp config show --name $script:FrontendApp --resource-group $ResourceGroup ` + --query "linuxFxVersion" -o tsv 2>$null + Write-LogInfo "Current frontend image: $script:OldFrontendImage" + } +} + +# ============================================================================== +# Step 3: Resolve ACR +# ============================================================================== + +# Resolve ACR resource ID reliably: +# 1. Try with -ResourceGroup (fastest, most reliable for RG-scoped ACRs) +# 2. Try global lookup (for ACRs in a different RG) +# 3. Build from known parts as fallback (handles post-create propagation delay) +function Get-AcrResourceId([string]$acrName, [string]$rg = $ResourceGroup) { + $id = az acr show --name $acrName --resource-group $rg --query "id" -o tsv 2>$null + if (-not $id) { + $id = az acr show --name $acrName --query "id" -o tsv 2>$null + } + if (-not $id) { + $subId = az account show --query id -o tsv 2>$null + $id = "/subscriptions/$subId/resourceGroups/$rg/providers/Microsoft.ContainerRegistry/registries/$acrName" + } + return $id +} + +function Resolve-Acr { + Write-LogStep "Step 3: Resolving Container Registry" + + if ($Acr) { + # User provided ACR via -Acr flag — try RG-scoped lookup first, then global + $input = $Acr -replace '\.azurecr\.io$', '' + $script:AcrName = az acr list --resource-group $ResourceGroup --query "[?name=='$input'].name | [0]" -o tsv 2>$null + if (-not $script:AcrName) { + $script:AcrName = az acr show --name $input --query "name" -o tsv 2>$null + } + if (-not $script:AcrName) { + Write-LogError "ACR '$Acr' not found or not accessible." + exit 1 + } + $script:AcrLoginServer = az acr show --name $script:AcrName --query "loginServer" -o tsv 2>$null + if (-not $script:AcrLoginServer) { $script:AcrLoginServer = az acr show --name $script:AcrName --resource-group $ResourceGroup --query "loginServer" -o tsv 2>$null } + $script:AcrId = Get-AcrResourceId $script:AcrName + Write-LogSuccess "Using specified ACR: $script:AcrName ($script:AcrLoginServer)" + Assign-AcrPullRoles + return + } + + # Always ask first — no pre-discovery + Write-Host "" + $userAcr = Read-Host "Enter ACR name to use (or press Enter to see available ACRs / create new)" + + if ($userAcr) { + $input = $userAcr -replace '\.azurecr\.io$', '' + $script:AcrName = az acr show --name $input --query "name" -o tsv 2>$null + if (-not $script:AcrName) { + Write-LogError "ACR '$userAcr' not found or not accessible." + exit 1 + } + $script:AcrLoginServer = az acr show --name $script:AcrName --resource-group $ResourceGroup --query "loginServer" -o tsv 2>$null + if (-not $script:AcrLoginServer) { $script:AcrLoginServer = az acr show --name $script:AcrName --query "loginServer" -o tsv 2>$null } + $script:AcrId = Get-AcrResourceId $script:AcrName + Write-LogSuccess "Using ACR: $script:AcrName ($script:AcrLoginServer)" + Assign-AcrPullRoles + return + } + + # Empty input — discover what's in the RG and auto-select or auto-create + Write-LogInfo "Looking for ACR(s) in resource group '$ResourceGroup'..." + $foundAcrs = @(az acr list --resource-group $ResourceGroup --query "[].name" -o tsv 2>$null | Where-Object { $_ }) + + if ($foundAcrs.Count -gt 0) { + $script:AcrName = $foundAcrs[0] + $script:AcrLoginServer = az acr show --name $script:AcrName --resource-group $ResourceGroup --query "loginServer" -o tsv + $script:AcrId = Get-AcrResourceId $script:AcrName + Write-LogSuccess "Found and using ACR: $script:AcrName ($script:AcrLoginServer)" + Assign-AcrPullRoles + } else { + # Create new ACR in the same RG + $suffix = ($ResourceGroup -replace '[^a-zA-Z0-9]', '').Substring(0, [Math]::Min(15, ($ResourceGroup -replace '[^a-zA-Z0-9]', '').Length)) + $ts = (Get-Date).ToString("HHmmss") + $newAcrName = ("acr$suffix$ts").ToLower().Substring(0, [Math]::Min(50, ("acr$suffix$ts").Length)) + + Write-LogInfo "Creating ACR: $newAcrName in $ResourceGroup..." + az acr create ` + --resource-group $ResourceGroup ` + --name $newAcrName ` + --sku Basic ` + --admin-enabled false ` + --output none + + $script:AcrName = $newAcrName + $script:AcrLoginServer = az acr show --name $script:AcrName --resource-group $ResourceGroup --query "loginServer" -o tsv + $script:AcrId = Get-AcrResourceId $script:AcrName + Write-LogSuccess "Created ACR: $script:AcrName ($script:AcrLoginServer)" + Assign-AcrPullRoles + } +} + +# ============================================================================== +# ACR Pull Role Assignment +# ============================================================================== + +function Assign-AcrPullRoles { + if ($SkipRoleAssignment) { + Write-LogInfo "Skipping AcrPull role assignment (-SkipRoleAssignment set)." + return + } + + Write-LogInfo "Assigning AcrPull role to service identities..." + + if (-not $script:AcrId) { + Write-LogError "ACR resource ID is empty — cannot assign roles. Aborting." + exit 1 + } + + $acrPullRole = "7f951dda-4ed3-4680-a7ca-43fe172d538d" + $anyFailed = $false + + # Helper: resolve principal ID from a Container App (system-assigned first, then user-assigned) + function Get-CAPrincipalId([string]$caName) { + $id = az containerapp show --name $caName --resource-group $ResourceGroup ` + --query "identity.principalId" -o tsv 2>$null + if (-not $id -or $id -eq "null") { + $id = az containerapp show --name $caName --resource-group $ResourceGroup ` + --query "identity.userAssignedIdentities.*.principalId | [0]" -o tsv 2>$null + } + return $id + } + + function Assign-Role([string]$identity, [string]$label) { + $existing = az role assignment list --assignee $identity --role $acrPullRole --scope $script:AcrId --query "[0].id" -o tsv 2>$null + if (-not $existing) { + $createOutput = az role assignment create --assignee $identity --role $acrPullRole --scope $script:AcrId --output none 2>&1 + if ($LASTEXITCODE -ne 0) { + Write-LogError " Failed to assign AcrPull to $label identity" + Write-LogError " Azure: $createOutput" + $script:RoleFailed = $true + } else { + Write-LogSuccess " AcrPull assigned to $label identity" + } + } else { + Write-LogInfo " AcrPull already assigned to $label identity ✓" + } + } + + $script:RoleFailed = $false + + # Backend + if ($script:BackendCA) { + $identity = Get-CAPrincipalId $script:BackendCA + if ($identity -and $identity -ne "null") { + Assign-Role $identity "backend" + } else { + Write-LogWarn " No identity found on backend Container App — cannot assign AcrPull" + $script:RoleFailed = $true + } + } + + # MCP + if ($script:McpCA) { + $identity = Get-CAPrincipalId $script:McpCA + if ($identity -and $identity -ne "null") { + Assign-Role $identity "MCP" + } else { + Write-LogWarn " No identity found on MCP Container App — cannot assign AcrPull" + $script:RoleFailed = $true + } + } + + # Frontend + if ($script:FrontendApp) { + $identity = az webapp show --name $script:FrontendApp --resource-group $ResourceGroup ` + --query "identity.principalId" -o tsv 2>$null + if (-not $identity -or $identity -eq "null") { + $identity = az webapp show --name $script:FrontendApp --resource-group $ResourceGroup ` + --query "identity.userAssignedIdentities.*.principalId | [0]" -o tsv 2>$null + } + if ($identity -and $identity -ne "null") { + Assign-Role $identity "frontend" + } else { + Write-LogWarn " No identity found on frontend Web App — cannot assign AcrPull" + $script:RoleFailed = $true + } + } + + if ($script:RoleFailed) { + Write-Host "" + Write-LogError "One or more AcrPull role assignments failed." + Write-LogError "The container(s) will NOT be able to pull images from $($script:AcrLoginServer)." + Write-LogError "" + Write-LogError "This usually means your account lacks 'Microsoft.Authorization/roleAssignments/write'." + Write-LogError "Ask your subscription Owner to grant you 'User Access Administrator' on the RG," + Write-LogError "or run: az role assignment create --assignee --role 'Owner' --scope /subscriptions/" + Write-LogError "" + Write-LogError "If AcrPull roles are already assigned, re-run with: -SkipRoleAssignment" + exit 1 + } +} + +# ============================================================================== +# Step 4: Determine Services +# ============================================================================== + +function Determine-Services { + Write-LogStep "Step 4: Determining Services to Deploy" + + $script:DeployBackend = $false + $script:DeployMcp = $false + $script:DeployFrontend = $false + + if ($Services) { + foreach ($svc in ($Services -split ',')) { + $svc = $svc.Trim().ToLower() + switch ($svc) { + "backend" { $script:DeployBackend = $true } + "mcp" { $script:DeployMcp = $true } + "frontend" { $script:DeployFrontend = $true } + default { Write-LogWarn "Unknown service: $svc (valid: backend, mcp, frontend)" } + } + } + } else { + Write-LogInfo "No -Services specified — deploying all services" + $script:DeployBackend = $true + $script:DeployMcp = $true + $script:DeployFrontend = $true + } + + Write-Host " Services to deploy:" + if ($script:DeployBackend) { Write-Host " ✓ Backend" } else { Write-Host " ○ Backend (skipped)" } + if ($script:DeployMcp) { Write-Host " ✓ MCP Server" } else { Write-Host " ○ MCP Server (skipped)" } + if ($script:DeployFrontend) { Write-Host " ✓ Frontend" } else { Write-Host " ○ Frontend (skipped)" } +} + +# ============================================================================== +# Step 5: Generate Tag +# ============================================================================== + +function Generate-Tag { + Write-LogStep "Step 5: Generating Image Tag" + + if ($Tag) { + $script:ImageTag = $Tag + } else { + $timestamp = (Get-Date).ToString("yyyyMMdd-HHmmss") + $script:ImageTag = $timestamp + } + + Write-LogSuccess "Image tag: $script:ImageTag" +} + +# ============================================================================== +# Step 6: Build & Push +# ============================================================================== + +function Build-AndPush { + Write-LogStep "Step 6: Building & Pushing Docker Images" + + if ($DeployOnly) { + Write-LogInfo "Skipping build (--DeployOnly mode)" + return + } + + Write-LogInfo "Logging into ACR: $script:AcrName..." + az acr login --name $script:AcrName + if ($LASTEXITCODE -ne 0) { + Write-LogError "ACR login failed for '$script:AcrName'." + Write-LogError " Likely causes:" + Write-LogError " - Your account lacks 'AcrPush' / 'Contributor' on the registry." + Write-LogError " - Docker daemon not running." + Write-LogError " - Tenant blocks docker-credential helpers (try: az acr login -n $script:AcrName --expose-token)." + exit 1 + } + Write-LogSuccess "ACR login successful" + + $env:DOCKER_BUILDKIT = "1" + + # Track per-service success so a partial failure does not strand the others + $script:BuildResults = [ordered]@{} + + if ($script:DeployBackend) { + $fullImage = "$($script:AcrLoginServer)/$BackendImageName`:$($script:ImageTag)" + Write-LogInfo "Building backend image: $fullImage" + if ($DryRun) { + Write-LogInfo "[DRY RUN] Would build: docker build -t $fullImage $BackendDir" + $script:BuildResults["backend"] = "dry-run" + } else { + docker build -t $fullImage $BackendDir + if ($LASTEXITCODE -ne 0) { + Write-LogError "Backend image build FAILED -- continuing with other services" + $script:BuildResults["backend"] = "build-failed" + $script:DeployBackend = $false + } else { + Write-LogSuccess "Backend image built" + docker push $fullImage + if ($LASTEXITCODE -ne 0) { + Write-LogError "Backend image push FAILED -- continuing with other services" + $script:BuildResults["backend"] = "push-failed" + $script:DeployBackend = $false + } else { + Write-LogSuccess "Backend image pushed: $fullImage" + $script:BuildResults["backend"] = "ok" + } + } + } + } + + if ($script:DeployMcp) { + $fullImage = "$($script:AcrLoginServer)/$McpImageName`:$($script:ImageTag)" + Write-LogInfo "Building MCP image: $fullImage" + if ($DryRun) { + Write-LogInfo "[DRY RUN] Would build: docker build -t $fullImage $McpDir" + $script:BuildResults["mcp"] = "dry-run" + } else { + docker build -t $fullImage $McpDir + if ($LASTEXITCODE -ne 0) { + Write-LogError "MCP image build FAILED -- continuing with other services" + $script:BuildResults["mcp"] = "build-failed" + $script:DeployMcp = $false + } else { + Write-LogSuccess "MCP image built" + docker push $fullImage + if ($LASTEXITCODE -ne 0) { + Write-LogError "MCP image push FAILED -- continuing with other services" + $script:BuildResults["mcp"] = "push-failed" + $script:DeployMcp = $false + } else { + Write-LogSuccess "MCP image pushed: $fullImage" + $script:BuildResults["mcp"] = "ok" + } + } + } + } + + if ($script:DeployFrontend) { + $fullImage = "$($script:AcrLoginServer)/$FrontendImageName`:$($script:ImageTag)" + Write-LogInfo "Building frontend image: $fullImage" + if ($DryRun) { + Write-LogInfo "[DRY RUN] Would build: docker build -t $fullImage $FrontendDir" + $script:BuildResults["frontend"] = "dry-run" + } else { + docker build -t $fullImage $FrontendDir + if ($LASTEXITCODE -ne 0) { + Write-LogError "Frontend image build FAILED -- continuing with other services" + $script:BuildResults["frontend"] = "build-failed" + $script:DeployFrontend = $false + } else { + Write-LogSuccess "Frontend image built" + docker push $fullImage + if ($LASTEXITCODE -ne 0) { + Write-LogError "Frontend image push FAILED -- continuing with other services" + $script:BuildResults["frontend"] = "push-failed" + $script:DeployFrontend = $false + } else { + Write-LogSuccess "Frontend image pushed: $fullImage" + $script:BuildResults["frontend"] = "ok" + } + } + } + } + + # If all selected services failed to build/push, bail before touching Azure resources + $okCount = ($script:BuildResults.Values | Where-Object { $_ -eq "ok" -or $_ -eq "dry-run" }).Count + if ($okCount -eq 0 -and $script:BuildResults.Count -gt 0) { + Write-LogError "All image builds/pushes failed -- aborting before touching Azure resources." + exit 1 + } + + if ($BuildOnly) { + Write-LogSuccess "Build & push complete (-BuildOnly mode, skipping Azure update)" + } +} + +# ============================================================================== +# Step 7: Configure ACR on Resources (if changed) +# ============================================================================== + +function Set-CaRegistry([string]$caName, [string]$label) { + # Skip if registry + identity already correctly configured + $currentServer = az containerapp show --name $caName --resource-group $ResourceGroup ` + --query "properties.configuration.registries[0].server" -o tsv 2>$null + $currentIdentity = az containerapp show --name $caName --resource-group $ResourceGroup ` + --query "properties.configuration.registries[0].identity" -o tsv 2>$null + if ($currentServer -eq $script:AcrLoginServer -and $currentIdentity -and $currentIdentity -ne "null") { + Write-LogSuccess "$label`: ACR registry already configured — skipping" + return + } + $identityId = az containerapp show --name $caName --resource-group $ResourceGroup ` + --query "identity.userAssignedIdentities | keys(@) | [0]" -o tsv 2>$null + $identityArg = if ($identityId -and $identityId -ne "null") { $identityId } else { "system" } + Write-LogInfo "Configuring $label registry → $($script:AcrLoginServer)..." + $regOut = az containerapp registry set --name $caName --resource-group $ResourceGroup ` + --server $script:AcrLoginServer --identity $identityArg --output none 2>&1 + if ($LASTEXITCODE -eq 0) { + Write-LogSuccess "$label registry configured" + } elseif ($regOut -match 'Operation expired|OperationInProgress|ContainerAppOperation|HTTPSConnectionPool|Max retries exceeded|NewConnectionError|getaddrinfo|Failed to establish|RemoteDisconnected|Connection aborted') { + Write-LogWarn "$label registry config accepted but status polling failed (network/timeout). The app will pull correctly once the revision is ready." + } else { + Write-LogError "$label registry set FAILED — $regOut" + throw "$label registry set failed" + } +} + +function Configure-AcrOnResources { + if ($script:DeployBackend -and $script:BackendCA) { + if ($DryRun) { Write-LogInfo "[DRY RUN] Would configure backend registry" } + else { Set-CaRegistry $script:BackendCA "Backend" } + } + if ($script:DeployMcp -and $script:McpCA) { + if ($DryRun) { Write-LogInfo "[DRY RUN] Would configure MCP registry" } + else { Set-CaRegistry $script:McpCA "MCP" } + } + if ($script:DeployFrontend -and $script:FrontendApp) { + if ($DryRun) { Write-LogInfo "[DRY RUN] Would update frontend App Service registry config" } + else { + Write-LogInfo "Updating frontend App Service registry config..." + # Capture exit codes from BOTH commands — without this, a failure in the + # first call (DOCKER_REGISTRY_SERVER_URL) would be masked if the second + # call succeeds, leaving the Web App with a half-configured registry. + az webapp config appsettings set --name $script:FrontendApp --resource-group $ResourceGroup ` + --settings DOCKER_REGISTRY_SERVER_URL="https://$($script:AcrLoginServer)" --output none + $appsettingsRc = $LASTEXITCODE + az webapp config set --name $script:FrontendApp --resource-group $ResourceGroup ` + --generic-configurations '{\"acrUseManagedIdentityCreds\": true}' --output none + $configRc = $LASTEXITCODE + if ($appsettingsRc -ne 0 -or $configRc -ne 0) { + Write-LogError "Frontend registry config FAILED (appsettings rc=$appsettingsRc, config rc=$configRc) — image pull may fail." + } else { + Write-LogSuccess "Frontend registry config updated" + } + } + } +} + +# ============================================================================== +# Step 8: Update Azure Resources +# ============================================================================== + +function Update-AzureResources { + Write-LogStep "Step 7: Updating Azure Resources" + + if ($BuildOnly) { return } + + Configure-AcrOnResources + + # Backend + if ($script:DeployBackend) { + if (-not $script:BackendCA) { + Write-LogWarn "No backend Container App found — skipping backend deployment" + } else { + $fullImage = "$($script:AcrLoginServer)/$BackendImageName`:$($script:ImageTag)" + Write-LogInfo "Updating backend: $($script:BackendCA) → $fullImage" + if ($DryRun) { + Write-LogInfo "[DRY RUN] Would run: az containerapp update --name $($script:BackendCA) --image $fullImage" + } else { + $updOut = Invoke-AzRetry @('containerapp','update','--name',$script:BackendCA,'--resource-group',$ResourceGroup,'--image',$fullImage,'--output','none') + if ($LASTEXITCODE -eq 0) { + Write-LogSuccess "Backend updated successfully" + } elseif ($updOut -match 'Operation expired|OperationInProgress|ContainerAppOperation|HTTPSConnectionPool|Max retries exceeded|NewConnectionError|getaddrinfo|Failed to establish|RemoteDisconnected|Connection aborted') { + Write-LogWarn "Backend image update accepted but status polling failed (network/timeout). Azure will complete provisioning shortly." + } else { + Write-LogError "Backend update failed: $updOut"; throw "Backend update failed" + } + } + } + } + + # MCP + if ($script:DeployMcp) { + if (-not $script:McpCA) { + Write-LogWarn "No MCP Container App found — skipping MCP deployment" + } else { + $fullImage = "$($script:AcrLoginServer)/$McpImageName`:$($script:ImageTag)" + Write-LogInfo "Updating MCP: $($script:McpCA) → $fullImage" + if ($DryRun) { + Write-LogInfo "[DRY RUN] Would run: az containerapp update --name $($script:McpCA) --image $fullImage" + } else { + $updOut = Invoke-AzRetry @('containerapp','update','--name',$script:McpCA,'--resource-group',$ResourceGroup,'--image',$fullImage,'--output','none') + if ($LASTEXITCODE -eq 0) { + Write-LogSuccess "MCP updated successfully" + } elseif ($updOut -match 'Operation expired|OperationInProgress|ContainerAppOperation|HTTPSConnectionPool|Max retries exceeded|NewConnectionError|getaddrinfo|Failed to establish|RemoteDisconnected|Connection aborted') { + Write-LogWarn "MCP image update accepted but status polling failed (network/timeout). Azure will complete provisioning shortly." + } else { + Write-LogError "MCP update failed: $updOut"; throw "MCP update failed" + } + } + } + } + + # Frontend + if ($script:DeployFrontend) { + if (-not $script:FrontendApp) { + Write-LogWarn "No Frontend Web App found — skipping frontend deployment" + } else { + $fullImage = "$($script:AcrLoginServer)/$FrontendImageName`:$($script:ImageTag)" + Write-LogInfo "Updating frontend: $($script:FrontendApp) → $fullImage" + if ($DryRun) { + Write-LogInfo "[DRY RUN] Would run: az webapp config container set + restart" + } else { + az webapp config container set ` + --name $script:FrontendApp ` + --resource-group $ResourceGroup ` + --container-image-name $fullImage ` + --container-registry-url "https://$($script:AcrLoginServer)" ` + --output none + + Write-LogInfo "Restarting frontend App Service..." + az webapp restart --name $script:FrontendApp --resource-group $ResourceGroup --output none + Write-LogSuccess "Frontend updated and restarted" + } + } + } +} + +# ============================================================================== +# Summary +# ============================================================================== + +function Print-Summary { + Write-LogStep "Deployment Summary" + + Write-Host " Resource Group: $ResourceGroup" + Write-Host " ACR: $($script:AcrLoginServer)" + Write-Host " Image Tag: $($script:ImageTag)" + Write-Host "" + + if ($script:BuildResults -and $script:BuildResults.Count -gt 0) { + Write-Host " Build results:" + foreach ($k in $script:BuildResults.Keys) { + $v = $script:BuildResults[$k] + $glyph = if ($v -eq "ok" -or $v -eq "dry-run") { "[OK]" } else { "[FAIL]" } + Write-Host (" {0,-6} {1,-9} {2}" -f $glyph, $k, $v) + } + Write-Host "" + } + + if ($script:DeployBackend -and $script:BackendCA) { + Write-Host " Backend: $($script:AcrLoginServer)/$BackendImageName`:$($script:ImageTag)" + } + if ($script:DeployMcp -and $script:McpCA) { + Write-Host " MCP: $($script:AcrLoginServer)/$McpImageName`:$($script:ImageTag)" + } + if ($script:DeployFrontend -and $script:FrontendApp) { + Write-Host " Frontend: $($script:AcrLoginServer)/$FrontendImageName`:$($script:ImageTag)" + } + + Write-Host "" + Write-Host " ┌─ Rollback Commands (if needed) ───────────────────────────────" + Write-Host " │ NOTE: When rolling back to images from a different registry" + Write-Host " │ (e.g. biabcontainerreg.azurecr.io public defaults), the Web App" + Write-Host " │ also needs acrUseManagedIdentityCreds disabled and the" + Write-Host " │ DOCKER_REGISTRY_SERVER_URL updated, otherwise the pull will" + Write-Host " │ fail with ACRTokenRetrievalFailure. Container Apps fall back" + Write-Host " │ to anonymous pull automatically for public registries." + Write-Host " └──────────────────────────────────────────────────────────────" + Write-Host "" + Write-Host " Copy/paste the commands below (one per line):" + Write-Host "" + + if ($script:DeployBackend -and $script:BackendCA -and $script:OldBackendImage) { + Write-Host " # Backend rollback" + Write-Host " az containerapp update --name $($script:BackendCA) --resource-group $ResourceGroup --image $($script:OldBackendImage)" + Write-Host "" + } + if ($script:DeployMcp -and $script:McpCA -and $script:OldMcpImage) { + Write-Host " # MCP rollback" + Write-Host " az containerapp update --name $($script:McpCA) --resource-group $ResourceGroup --image $($script:OldMcpImage)" + Write-Host "" + } + if ($script:DeployFrontend -and $script:FrontendApp -and $script:OldFrontendImage) { + $oldImg = $script:OldFrontendImage -replace '^DOCKER\|', '' + $oldRegistry = ($oldImg -split '/')[0] + Write-Host " # Frontend rollback (run all 4 lines)" + Write-Host " az webapp config set --name $($script:FrontendApp) --resource-group $ResourceGroup --generic-configurations '{\""acrUseManagedIdentityCreds\"": false}'" + Write-Host " az webapp config appsettings set --name $($script:FrontendApp) --resource-group $ResourceGroup --settings DOCKER_REGISTRY_SERVER_URL=https://$oldRegistry" + Write-Host " az webapp config container set --name $($script:FrontendApp) --resource-group $ResourceGroup --container-image-name $oldImg" + Write-Host " az webapp restart --name $($script:FrontendApp) --resource-group $ResourceGroup" + Write-Host "" + } + + if ($DryRun) { + Write-Host "" + Write-LogWarn "This was a DRY RUN — no changes were made." + } else { + Write-Host "" + Write-LogSuccess "Deployment complete!" + } +} + +# ============================================================================== +# Main +# ============================================================================== + +Write-Host "" +Write-Host "╔══════════════════════════════════════════════════════════════╗" -ForegroundColor Cyan +Write-Host "║ MACAE - Deploy Local Code to Azure ║" -ForegroundColor Cyan +Write-Host "╚══════════════════════════════════════════════════════════════╝" -ForegroundColor Cyan +Write-Host "" + +Check-Prerequisites +Check-AzureRoles +Discover-Resources +Resolve-Acr +Determine-Services +Generate-Tag +Build-AndPush +Update-AzureResources +Print-Summary diff --git a/infra/scripts/deploy_to_azure.sh b/infra/scripts/deploy_to_azure.sh new file mode 100644 index 000000000..7e5805096 --- /dev/null +++ b/infra/scripts/deploy_to_azure.sh @@ -0,0 +1,1007 @@ +#!/usr/bin/env bash +# ============================================================================== +# MACAE - Deploy Local Code to Azure +# ============================================================================== +# +# Builds Docker images locally, pushes to ACR, and updates +# the deployed Azure resources (Container Apps / App Service). +# +# Usage: +# ./deploy_to_azure.sh -g [options] +# +# Examples: +# ./deploy_to_azure.sh -g rg-macae-dev # Deploy all services +# ./deploy_to_azure.sh -g rg-macae-dev --services backend,mcp # Deploy specific services +# ./deploy_to_azure.sh -g rg-macae-dev --acr myacr # Use specific ACR +# ./deploy_to_azure.sh -g rg-macae-dev --dry-run # Preview what would happen +# ============================================================================== + +set -euo pipefail + +# On Windows Git Bash (MSYS/MinGW), paths starting with / get converted to Windows +# paths when passed to native .exe programs. This breaks ARM resource IDs like +# /subscriptions/... but we NEED normal conversion for docker build context paths. +# Solution: wrap 'az' with MSYS_NO_PATHCONV=1 so only az calls skip conversion. +# See: https://github.com/Azure/azure-cli/issues/13009 +az() { MSYS_NO_PATHCONV=1 MSYS2_ARG_CONV_EXCL="*" command az "$@"; } + +# Probe for the real az executable that ignores the wrapper function above. +# Used in check_prerequisites so an uninstalled Azure CLI doesn't pass the check +# merely because `command -v az` matches our shell function. +_has_az_executable() { type -P az >/dev/null 2>&1; } + +# Convert a path to Windows-native format for tools like docker.exe that need it. +# On non-MSYS systems (Linux/macOS) this is a no-op. +_winpath() { + if command -v cygpath &>/dev/null; then + cygpath -w "$1" + else + echo "$1" + fi +} + +# ============================================================================== +# Configuration +# ============================================================================== + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" +RESOURCE_GROUP="" +ACR_INPUT="" +SERVICES="" +CUSTOM_TAG="" +DRY_RUN=false +BUILD_ONLY=false +DEPLOY_ONLY=false +SKIP_ROLE_ASSIGNMENT=false + +# Image names (matching infra conventions) +BACKEND_IMAGE_NAME="macaebackend" +MCP_IMAGE_NAME="macaemcp" +FRONTEND_IMAGE_NAME="macaefrontend" + +# Service paths +BACKEND_DIR="$REPO_ROOT/src/backend" +MCP_DIR="$REPO_ROOT/src/mcp_server" +FRONTEND_DIR="$REPO_ROOT/src/App" + +# ============================================================================== +# Logging +# ============================================================================== + +RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[1;33m' +BLUE='\033[0;34m'; CYAN='\033[0;36m'; NC='\033[0m' + +log_info() { echo -e "${BLUE}[i]${NC} $*"; } +log_success() { echo -e "${GREEN}[✓]${NC} $*"; } +log_warn() { echo -e "${YELLOW}[!]${NC} $*"; } +log_error() { echo -e "${RED}[✗]${NC} $*"; } +log_step() { echo -e "\n${CYAN}━━━ $* ━━━${NC}\n"; } + +# Retry an az command up to 4 attempts on transient network/operation-in-progress errors +az_retry() { + local attempt=1 out rc delay + while [[ $attempt -le 4 ]]; do + out=$(az "$@" 2>&1) && rc=0 || rc=$? + if [[ $rc -eq 0 ]]; then echo "$out"; return 0; fi + if echo "$out" | grep -qiE "OperationInProgress|ContainerAppOperation"; then + delay=30 + log_warn "Azure operation in progress (attempt $attempt/4), retrying in ${delay}s..." >&2 + elif echo "$out" | grep -qiE "RemoteDisconnected|Connection aborted|timed out|ECONNRESET|HTTPSConnectionPool|Max retries exceeded|NewConnectionError|getaddrinfo|Failed to establish"; then + delay=15 + log_warn "Transient network error (attempt $attempt/4), retrying in ${delay}s..." >&2 + else + echo "$out"; return $rc + fi + sleep $delay; (( attempt++ )) + done + echo "$out"; return $rc +} + +# ============================================================================== +# Argument Parsing +# ============================================================================== + +parse_args() { + while [[ $# -gt 0 ]]; do + case "$1" in + -g|--resource-group) + RESOURCE_GROUP="$2"; shift 2 ;; + --acr) + ACR_INPUT="$2"; shift 2 ;; + --services) + SERVICES="$2"; shift 2 ;; + --tag) + CUSTOM_TAG="$2"; shift 2 ;; + --dry-run) + DRY_RUN=true; shift ;; + --build-only) + BUILD_ONLY=true; shift ;; + --deploy-only) + DEPLOY_ONLY=true; shift ;; + --skip-role-assignment) + SKIP_ROLE_ASSIGNMENT=true; shift ;; + -h|--help) + show_help; exit 0 ;; + *) + log_error "Unknown option: $1"; show_help; exit 1 ;; + esac + done +} + +show_help() { + cat < [options] + +Required: + -g, --resource-group Azure Resource Group name + +Options: + --acr ACR name or login server (auto-discovers if not set) + --services Comma-separated: backend,mcp,frontend (default: all) + --tag Custom image tag (default: auto-generated) + --dry-run Preview what would happen without making changes + --build-only Build and push images only, don't update Azure resources + --deploy-only Update Azure resources only (images must already exist) + --skip-role-assignment Skip AcrPull role assignment (use if roles already exist) + -h, --help Show this help message + +Examples: + ./deploy_to_azure.sh -g rg-macae-dev + ./deploy_to_azure.sh -g rg-macae-dev --services backend + ./deploy_to_azure.sh -g rg-macae-dev --acr myregistry --tag v1.0 + ./deploy_to_azure.sh -g rg-macae-dev --dry-run +EOF +} + +# ============================================================================== +# Prerequisites +# ============================================================================== + +check_prerequisites() { + log_step "Step 1: Checking Prerequisites" + + local missing=() + + if _has_az_executable; then + log_success "Azure CLI found" + else + missing+=("azure-cli") + fi + + if command -v docker &>/dev/null; then + if docker info &>/dev/null; then + log_success "Docker found and running" + else + log_error "Docker found but daemon not running. Please start Docker Desktop." + exit 1 + fi + else + missing+=("docker") + fi + + if [[ ${#missing[@]} -gt 0 ]]; then + log_error "Missing prerequisites: ${missing[*]}" + echo "" + for tool in "${missing[@]}"; do + case "$tool" in + azure-cli) + echo " ┌─ Azure CLI ───────────────────────────────────────────────────" + echo " │ Install: https://learn.microsoft.com/cli/azure/install-azure-cli" + echo " │ Verify: az --version" + echo " └──────────────────────────────────────────────────────────────" + ;; + docker) + echo " ┌─ Docker Desktop ──────────────────────────────────────────────" + echo " │ Install: https://www.docker.com/products/docker-desktop" + echo " │ Verify: docker --version" + echo " └──────────────────────────────────────────────────────────────" + ;; + esac + done + exit 1 + fi + + # Check Azure login + if ! az account show &>/dev/null; then + log_warn "Not logged into Azure CLI. Running 'az login'..." + az login + fi + log_success "Logged into Azure CLI" +} + +# ============================================================================== +# Step 1b: Azure Role / Permission Check +# ============================================================================== +# +# Per docs/DeploymentGuide.md, the deploying account needs: +# - Contributor (or Owner) on the subscription -- to update resources +# - User Access Administrator OR Role Based Access Control Administrator +# (or Owner) -- to assign the AcrPull role to managed identities +# This check is non-fatal: group-inherited roles may not always enumerate. +# ============================================================================== + +check_azure_roles() { + log_step "Step 1b: Checking Azure Roles & Permissions" + + local sub_id user_id + sub_id=$(az account show --query id -o tsv 2>/dev/null || true) + user_id=$(az ad signed-in-user show --query id -o tsv 2>/dev/null || true) + if [[ -z "$sub_id" || -z "$user_id" ]]; then + log_warn "Could not determine subscription or user identity -- skipping role check." + return + fi + + local scope="/subscriptions/$sub_id" + local roles_raw + roles_raw=$(az role assignment list --assignee "$user_id" --scope "$scope" \ + --include-inherited --include-groups --query "[].roleDefinitionName" -o tsv 2>/dev/null || true) + if [[ -z "$roles_raw" ]]; then + log_warn "Unable to enumerate role assignments at $scope." + log_warn "Required: Contributor + (User Access Administrator OR Role Based Access Control Administrator), or Owner." + return + fi + + local has_res_mgmt=false has_role_mgmt=false + while IFS= read -r r; do + case "$r" in + Owner) has_res_mgmt=true; has_role_mgmt=true ;; + Contributor) has_res_mgmt=true ;; + "User Access Administrator"|"Role Based Access Control Administrator") has_role_mgmt=true ;; + esac + done <<< "$roles_raw" + + if $has_res_mgmt; then log_success "Resource management role found (Owner/Contributor)" + else log_warn "Missing 'Contributor' (or 'Owner') at subscription scope -- Azure resource updates may fail."; fi + + if $has_role_mgmt; then log_success "Role-assignment permission found (Owner/UAA/RBAC Admin)" + else log_warn "Missing 'User Access Administrator' / 'Role Based Access Control Administrator' (or 'Owner') -- AcrPull role assignment may fail. Pass --skip-role-assignment if roles are already in place."; fi +} + +# ============================================================================== +# Step 2: Validate Resource Group & Discover Resources +# ============================================================================== + +validate_and_discover() { + log_step "Step 2: Discovering Azure Resources" + + if [[ -z "$RESOURCE_GROUP" ]]; then + log_error "Resource group is required. Use: -g " + exit 1 + fi + + if ! az group show --name "$RESOURCE_GROUP" &>/dev/null; then + log_error "Resource group '$RESOURCE_GROUP' not found." + exit 1 + fi + log_success "Resource group: $RESOURCE_GROUP" + + # Discover backend container app + local ca_list + ca_list=$(az containerapp list --resource-group "$RESOURCE_GROUP" --query "[].name" -o tsv 2>/dev/null || true) + + BACKEND_CA="" + MCP_CA="" + + if [[ -n "$ca_list" ]]; then + while IFS= read -r app; do + app=$(echo "$app" | tr -d '\r') + if [[ "$app" == ca-mcp-* ]]; then + MCP_CA="$app" + elif [[ "$app" == ca-* ]]; then + BACKEND_CA="$app" + fi + done <<< "$ca_list" + fi + + if [[ -n "$BACKEND_CA" ]]; then + log_success "Backend Container App: $BACKEND_CA" + else + log_warn "Backend Container App: not found in RG" + fi + + if [[ -n "$MCP_CA" ]]; then + log_success "MCP Container App: $MCP_CA" + else + log_warn "MCP Container App: not found in RG" + fi + + # Discover frontend web app + FRONTEND_APP="" + FRONTEND_APP=$(az webapp list --resource-group "$RESOURCE_GROUP" --query "[0].name" -o tsv 2>/dev/null | tr -d '\r' || true) + + if [[ -n "$FRONTEND_APP" ]]; then + log_success "Frontend Web App: $FRONTEND_APP" + else + log_warn "Frontend Web App: not found in RG" + fi + + # Capture current images for rollback + if [[ -n "$BACKEND_CA" ]]; then + OLD_BACKEND_IMAGE=$(az containerapp show --name "$BACKEND_CA" --resource-group "$RESOURCE_GROUP" \ + --query "properties.template.containers[0].image" -o tsv 2>/dev/null | tr -d '\r' || echo "unknown") + log_info "Current backend image: $OLD_BACKEND_IMAGE" + fi + + if [[ -n "$MCP_CA" ]]; then + OLD_MCP_IMAGE=$(az containerapp show --name "$MCP_CA" --resource-group "$RESOURCE_GROUP" \ + --query "properties.template.containers[0].image" -o tsv 2>/dev/null | tr -d '\r' || echo "unknown") + log_info "Current MCP image: $OLD_MCP_IMAGE" + fi + + if [[ -n "$FRONTEND_APP" ]]; then + OLD_FRONTEND_IMAGE=$(az webapp config show --name "$FRONTEND_APP" --resource-group "$RESOURCE_GROUP" \ + --query "linuxFxVersion" -o tsv 2>/dev/null | tr -d '\r' || echo "unknown") + log_info "Current frontend image: $OLD_FRONTEND_IMAGE" + fi +} + +# ============================================================================== +# Step 3: Resolve ACR +# ============================================================================== + +# Resolve ACR resource ID reliably: +# 1. Try with --resource-group (fastest, most reliable for RG-scoped ACRs) +# 2. Try global lookup (for ACRs in a different RG) +# 3. Build from known parts as fallback (handles post-create propagation delay) +_get_acr_id() { + local name="$1" + local rg="${2:-$RESOURCE_GROUP}" + local id + id=$(az acr show --name "$name" --resource-group "$rg" --query "id" -o tsv 2>/dev/null | tr -d '\r' || true) + if [[ -z "$id" ]]; then + id=$(az acr show --name "$name" --query "id" -o tsv 2>/dev/null | tr -d '\r' || true) + fi + if [[ -z "$id" ]]; then + local sub_id + sub_id=$(az account show --query id -o tsv 2>/dev/null | tr -d '\r') + id="/subscriptions/$sub_id/resourceGroups/$rg/providers/Microsoft.ContainerRegistry/registries/$name" + fi + echo "$id" +} + +resolve_acr() { + log_step "Step 3: Resolving Container Registry" + + if [[ -n "$ACR_INPUT" ]]; then + # User provided ACR via --acr flag — normalize to name and login server + local input="${ACR_INPUT%.azurecr.io}" + ACR_NAME=$(az acr list --resource-group "$RESOURCE_GROUP" --query "[?name=='$input'].name | [0]" -o tsv 2>/dev/null | tr -d '\r' || true) + if [[ -z "$ACR_NAME" ]]; then + ACR_NAME=$(az acr show --name "$input" --query "name" -o tsv 2>/dev/null | tr -d '\r' || true) + fi + if [[ -z "$ACR_NAME" ]]; then + log_error "ACR '$ACR_INPUT' not found or not accessible." + exit 1 + fi + ACR_LOGIN_SERVER=$(az acr show --name "$ACR_NAME" --query "loginServer" -o tsv 2>/dev/null | tr -d '\r' || az acr show --name "$ACR_NAME" --resource-group "$RESOURCE_GROUP" --query "loginServer" -o tsv | tr -d '\r') + ACR_ID=$(_get_acr_id "$ACR_NAME") + log_success "Using specified ACR: $ACR_NAME ($ACR_LOGIN_SERVER)" + assign_acr_pull_roles + return + fi + + # Always ask first — no pre-discovery + echo "" + read -rp "Enter ACR name to use (or press Enter to see available ACRs / create new): " user_acr + + if [[ -n "$user_acr" ]]; then + local input="${user_acr%.azurecr.io}" + ACR_NAME=$(az acr show --name "$input" --query "name" -o tsv 2>/dev/null | tr -d '\r' || true) + if [[ -z "$ACR_NAME" ]]; then + log_error "ACR '$user_acr' not found or not accessible." + exit 1 + fi + ACR_LOGIN_SERVER=$(az acr show --name "$ACR_NAME" --resource-group "$RESOURCE_GROUP" --query "loginServer" -o tsv 2>/dev/null | tr -d '\r' || az acr show --name "$ACR_NAME" --query "loginServer" -o tsv | tr -d '\r') + ACR_ID=$(_get_acr_id "$ACR_NAME") + log_success "Using ACR: $ACR_NAME ($ACR_LOGIN_SERVER)" + assign_acr_pull_roles + return + fi + + # Empty input — discover what's in the RG and auto-select or auto-create + log_info "Looking for ACR(s) in resource group '$RESOURCE_GROUP'..." + local found_acrs + found_acrs=$(az acr list --resource-group "$RESOURCE_GROUP" --query "[].name" -o tsv 2>/dev/null | tr -d '\r' || true) + + local chosen_acr + chosen_acr=$(echo "$found_acrs" | head -1 | tr -d '[:space:]') + + if [[ -n "$chosen_acr" ]]; then + ACR_NAME="$chosen_acr" + ACR_LOGIN_SERVER=$(az acr show --name "$ACR_NAME" --resource-group "$RESOURCE_GROUP" --query "loginServer" -o tsv | tr -d '\r') + ACR_ID=$(_get_acr_id "$ACR_NAME") + log_success "Found and using ACR: $ACR_NAME ($ACR_LOGIN_SERVER)" + assign_acr_pull_roles + else + # Create new ACR in the same RG + local suffix + suffix=$(echo "$RESOURCE_GROUP" | sed 's/[^a-zA-Z0-9]//g' | tail -c 15) + local new_acr_name="acr${suffix}$(date +%s | tail -c 6)" + new_acr_name=$(echo "$new_acr_name" | tr '[:upper:]' '[:lower:]' | head -c 50) + + log_info "Creating ACR: $new_acr_name in $RESOURCE_GROUP..." + az acr create \ + --resource-group "$RESOURCE_GROUP" \ + --name "$new_acr_name" \ + --sku Basic \ + --admin-enabled false \ + --output none + + ACR_NAME="$new_acr_name" + ACR_LOGIN_SERVER=$(az acr show --name "$ACR_NAME" --resource-group "$RESOURCE_GROUP" --query "loginServer" -o tsv | tr -d '\r') + ACR_ID=$(_get_acr_id "$ACR_NAME") + log_success "Created ACR: $ACR_NAME ($ACR_LOGIN_SERVER)" + assign_acr_pull_roles + fi +} + +# ============================================================================== +# ACR Pull Role Assignment +# ============================================================================== + +_assign_one_role() { + # _assign_one_role