Skip to content

Commit c4afafd

Browse files
Merge pull request #139 from Saswato-Microsoft/psl-purview-fixes
fix: Enhance Purview scan reliability with collection hierarchy fallback and reparenting logic
2 parents faaced3 + 08f4735 commit c4afafd

2 files changed

Lines changed: 70 additions & 1 deletion

File tree

docs/post_deployment_steps.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ If the connection fails, verify RBAC roles are assigned (see Troubleshooting sec
149149

150150
If `purviewCollectionName` is left empty in [infra/main.bicepparam](../infra/main.bicepparam), the automation now uses `collection-<AZURE_ENV_NAME>`.
151151

152+
> **Note:** If a tenant-level Fabric datasource already exists under a different collection, the scan script automatically reparents the deployment collection as a child of the datasource's collection. This ensures scans comply with Purview's requirement that scans are created within the datasource's collection hierarchy. In the Purview portal, your deployment collection may appear nested under the datasource's collection rather than at the root.
153+
152154
If the identity running `azd` does not have **Purview Collection Admin** (or equivalent) on the target collection, the Purview scripts will warn and skip collection, datasource, and scan steps. Grant the role, then rerun the Purview scripts.
153155

154156
If you need to rerun the Purview steps after provisioning:
@@ -289,10 +291,13 @@ pwsh ./scripts/automationScripts/OneLakeIndex/06_setup_ai_foundry_search_rbac.ps
289291
2. Check scan configuration:
290292
- Purview Portal → Data Map → Sources → Fabric source → Scans
291293

292-
3. Re-run the registration script:
294+
3. **`Scan_CollectionOutOfBound` error:** Purview requires that scans are created under the datasource's collection or a child of it. If your deployment collection is not under the datasource's collection, the scan script will attempt to reparent it automatically. If this fails, manually move your deployment collection under the datasource's collection in Purview Portal → Data Map → Collections.
295+
296+
4. Re-run the scan pipeline:
293297
```bash
294298
eval $(azd env get-values)
295299
pwsh ./scripts/automationScripts/FabricWorkspace/CreateWorkspace/register_fabric_datasource.ps1
300+
pwsh ./scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1
296301
```
297302

298303
### Post-Provision Hooks Failed

scripts/automationScripts/FabricPurviewAutomation/trigger_purview_scan_for_fabric_workspace.ps1

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -207,10 +207,74 @@ if (Test-Path $collectionEnvPath) {
207207
if ($_ -match '^PURVIEW_COLLECTION_ID=(.*)$') { $collectionId = $Matches[1].Trim() }
208208
}
209209
}
210+
# Fallback: resolve collection from azd env when temp file is missing
211+
if (-not $collectionId) {
212+
try {
213+
$azdCollId = & azd env get-value purviewCollectionName 2>$null
214+
if ($LASTEXITCODE -eq 0 -and $azdCollId) { $collectionId = $azdCollId.Trim() }
215+
} catch { }
216+
}
217+
if (-not $collectionId) {
218+
try {
219+
$azdCollId = & azd env get-value desiredFabricDomainName 2>$null
220+
if ($LASTEXITCODE -eq 0 -and $azdCollId) { $collectionId = $azdCollId.Trim() }
221+
} catch { }
222+
}
210223
if (-not $collectionId) {
211224
Log "No Purview collection found. Scan will be created in root collection."
212225
}
213226

227+
# Resolve the datasource's own collection to avoid Scan_CollectionOutOfBound errors.
228+
# Purview requires scans to be created under the datasource's collection or a child of it.
229+
$datasourceCollectionId = $null
230+
$datasourceEnvPathForColl = Join-Path $tempDir 'fabric_datasource.env'
231+
if (Test-Path $datasourceEnvPathForColl) {
232+
Get-Content $datasourceEnvPathForColl | ForEach-Object {
233+
if ($_ -match '^FABRIC_COLLECTION_ID=(.+)$') { $datasourceCollectionId = $Matches[1].Trim() }
234+
}
235+
}
236+
if (-not $datasourceCollectionId) {
237+
# Query the datasource directly to get its collection
238+
try {
239+
$dsInfo = Invoke-SecureRestMethod -Uri "$endpoint/scan/datasources/${datasourceName}?api-version=2022-07-01-preview" -Headers $purviewHeaders -Method Get -ErrorAction Stop
240+
if ($dsInfo.properties.collection.referenceName) {
241+
$datasourceCollectionId = $dsInfo.properties.collection.referenceName
242+
Log "Datasource '$datasourceName' belongs to collection: $datasourceCollectionId"
243+
}
244+
} catch {
245+
Log "Could not query datasource collection: $($_.Exception.Message)"
246+
}
247+
}
248+
249+
# If our deployment collection differs from the datasource collection, reparent it as a child
250+
if ($collectionId -and $datasourceCollectionId -and $collectionId -ne $datasourceCollectionId) {
251+
Log "Deployment collection '$collectionId' is not under datasource collection '$datasourceCollectionId'. Reparenting..."
252+
try {
253+
$reparentBody = @{
254+
parentCollection = @{
255+
referenceName = $datasourceCollectionId
256+
type = 'CollectionReference'
257+
}
258+
} | ConvertTo-Json -Depth 5
259+
$reparentUrl = "$endpoint/account/collections/${collectionId}?api-version=2019-11-01-preview"
260+
$reparentHeaders = New-SecureHeaders -Token $purviewToken -AdditionalHeaders @{'Content-Type' = 'application/json'}
261+
$reparentResp = Invoke-SecureWebRequest -Uri $reparentUrl -Headers $reparentHeaders -Method Put -Body $reparentBody -ErrorAction Stop
262+
if ($reparentResp.StatusCode -ge 200 -and $reparentResp.StatusCode -lt 300) {
263+
Log "Collection '$collectionId' reparented under '$datasourceCollectionId' successfully"
264+
} else {
265+
Warn "Reparent returned HTTP $($reparentResp.StatusCode). Falling back to datasource collection."
266+
$collectionId = $datasourceCollectionId
267+
}
268+
} catch {
269+
Warn "Failed to reparent collection: $($_.Exception.Message). Falling back to datasource collection."
270+
$collectionId = $datasourceCollectionId
271+
}
272+
} elseif (-not $collectionId -and $datasourceCollectionId) {
273+
# No deployment collection — use the datasource's collection
274+
$collectionId = $datasourceCollectionId
275+
Log "Using datasource collection: $collectionId"
276+
}
277+
214278
$scanName = "scan-workspace-$WorkspaceId"
215279

216280
Log "Creating/Updating scan '$scanName' for datasource '$datasourceName' targeting workspace '$WorkspaceId'"

0 commit comments

Comments
 (0)