From d13133d04205915a20992c5cfc9915991f71fb1e Mon Sep 17 00:00:00 2001 From: Saswato-Microsoft Date: Mon, 6 Apr 2026 18:34:55 +0000 Subject: [PATCH 1/2] feat: Enhance Purview scan automation with fallback collection resolution and reparenting logic --- ...gger_purview_scan_for_fabric_workspace.ps1 | 64 +++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 b/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 index 6c61084..490e47e 100644 --- a/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 +++ b/scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 @@ -207,10 +207,74 @@ if (Test-Path $collectionEnvPath) { if ($_ -match '^PURVIEW_COLLECTION_ID=(.*)$') { $collectionId = $Matches[1].Trim() } } } +# Fallback: resolve collection from azd env when temp file is missing +if (-not $collectionId) { + try { + $azdCollId = & azd env get-value purviewCollectionName 2>$null + if ($LASTEXITCODE -eq 0 -and $azdCollId) { $collectionId = $azdCollId.Trim() } + } catch { } +} +if (-not $collectionId) { + try { + $azdCollId = & azd env get-value desiredFabricDomainName 2>$null + if ($LASTEXITCODE -eq 0 -and $azdCollId) { $collectionId = $azdCollId.Trim() } + } catch { } +} if (-not $collectionId) { Log "No Purview collection found. Scan will be created in root collection." } +# Resolve the datasource's own collection to avoid Scan_CollectionOutOfBound errors. +# Purview requires scans to be created under the datasource's collection or a child of it. +$datasourceCollectionId = $null +$datasourceEnvPathForColl = Join-Path $tempDir 'fabric_datasource.env' +if (Test-Path $datasourceEnvPathForColl) { + Get-Content $datasourceEnvPathForColl | ForEach-Object { + if ($_ -match '^FABRIC_COLLECTION_ID=(.+)$') { $datasourceCollectionId = $Matches[1].Trim() } + } +} +if (-not $datasourceCollectionId) { + # Query the datasource directly to get its collection + try { + $dsInfo = Invoke-SecureRestMethod -Uri "$endpoint/scan/datasources/${datasourceName}?api-version=2022-07-01-preview" -Headers $purviewHeaders -Method Get -ErrorAction Stop + if ($dsInfo.properties.collection.referenceName) { + $datasourceCollectionId = $dsInfo.properties.collection.referenceName + Log "Datasource '$datasourceName' belongs to collection: $datasourceCollectionId" + } + } catch { + Log "Could not query datasource collection: $($_.Exception.Message)" + } +} + +# If our deployment collection differs from the datasource collection, reparent it as a child +if ($collectionId -and $datasourceCollectionId -and $collectionId -ne $datasourceCollectionId) { + Log "Deployment collection '$collectionId' is not under datasource collection '$datasourceCollectionId'. Reparenting..." + try { + $reparentBody = @{ + parentCollection = @{ + referenceName = $datasourceCollectionId + type = 'CollectionReference' + } + } | ConvertTo-Json -Depth 5 + $reparentUrl = "$endpoint/account/collections/${collectionId}?api-version=2019-11-01-preview" + $reparentHeaders = New-SecureHeaders -Token $purviewToken -AdditionalHeaders @{'Content-Type' = 'application/json'} + $reparentResp = Invoke-SecureWebRequest -Uri $reparentUrl -Headers $reparentHeaders -Method Put -Body $reparentBody -ErrorAction Stop + if ($reparentResp.StatusCode -ge 200 -and $reparentResp.StatusCode -lt 300) { + Log "Collection '$collectionId' reparented under '$datasourceCollectionId' successfully" + } else { + Warn "Reparent returned HTTP $($reparentResp.StatusCode). Falling back to datasource collection." + $collectionId = $datasourceCollectionId + } + } catch { + Warn "Failed to reparent collection: $($_.Exception.Message). Falling back to datasource collection." + $collectionId = $datasourceCollectionId + } +} elseif (-not $collectionId -and $datasourceCollectionId) { + # No deployment collection — use the datasource's collection + $collectionId = $datasourceCollectionId + Log "Using datasource collection: $collectionId" +} + $scanName = "scan-workspace-$WorkspaceId" Log "Creating/Updating scan '$scanName' for datasource '$datasourceName' targeting workspace '$WorkspaceId'" From 08f47358ff4a1f0e7090d17c84940cd7018a4a1d Mon Sep 17 00:00:00 2001 From: Saswato-Microsoft Date: Mon, 6 Apr 2026 18:46:19 +0000 Subject: [PATCH 2/2] docs: Enhance Purview scan automation with reparenting logic for deployment collections --- docs/post_deployment_steps.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/post_deployment_steps.md b/docs/post_deployment_steps.md index aac6b8f..b14f030 100644 --- a/docs/post_deployment_steps.md +++ b/docs/post_deployment_steps.md @@ -149,6 +149,8 @@ If the connection fails, verify RBAC roles are assigned (see Troubleshooting sec If `purviewCollectionName` is left empty in [infra/main.bicepparam](../infra/main.bicepparam), the automation now uses `collection-`. +> **Note:** If a tenant-level Fabric datasource already exists under a different collection, the scan script automatically reparents the deployment collection as a child of the datasource's collection. This ensures scans comply with Purview's requirement that scans are created within the datasource's collection hierarchy. In the Purview portal, your deployment collection may appear nested under the datasource's collection rather than at the root. + If the identity running `azd` does not have **Purview Collection Admin** (or equivalent) on the target collection, the Purview scripts will warn and skip collection, datasource, and scan steps. Grant the role, then rerun the Purview scripts. If you need to rerun the Purview steps after provisioning: @@ -288,10 +290,13 @@ pwsh ./scripts/automationScripts/OneLakeIndex/06_setup_ai_foundry_search_rbac.ps 2. Check scan configuration: - Purview Portal → Data Map → Sources → Fabric source → Scans -3. Re-run the registration script: +3. **`Scan_CollectionOutOfBound` error:** Purview requires that scans are created under the datasource's collection or a child of it. If your deployment collection is not under the datasource's collection, the scan script will attempt to reparent it automatically. If this fails, manually move your deployment collection under the datasource's collection in Purview Portal → Data Map → Collections. + +4. Re-run the scan pipeline: ```bash eval $(azd env get-values) pwsh ./scripts/automationScripts/FabricWorkspace/CreateWorkspace/register_fabric_datasource.ps1 + pwsh ./scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1 ``` ### Post-Provision Hooks Failed