Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -197,5 +197,7 @@ ingestion/.claude/agents
# Connector audit working files — per-session, never committed
.claude/audit-results/
.claude/connector-audit.json
.claude/scheduled_tasks.lock
.claude/plans/

test-results/
124 changes: 123 additions & 1 deletion ingestion/src/metadata/ingestion/source/dashboard/ssrs/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
"""
SSRS REST client
"""
import base64
import binascii
import json
from typing import Iterable, Iterator, Optional, Union

import requests
Expand All @@ -36,12 +39,16 @@
API_VERSION = "api/v2.0"
CONNECT_TIMEOUT = 10
READ_TIMEOUT = 120
RDL_READ_TIMEOUT = 60
PAGE_SIZE = 100
MAX_RETRIES = 2
BACKOFF_FACTOR = 1
RETRY_STATUS_CODES = (500, 502, 503, 504)
REPORT_SELECT_FIELDS = "Id,Name,Path,Description,Type,Hidden,HasDataSources"
REPORT_SELECT_FIELDS = "Id,Name,Path,Description,Type,Hidden,HasDataSources,CreatedBy"
FOLDER_SELECT_FIELDS = "Id,Name,Path"
RDL_CONTENT_PATHS = ("/Reports({id})/Content/$value", "/CatalogItems({id})/Content")
RDL_NOT_FOUND_STATUS = {404}
MAX_RDL_BYTES = 50 * 1024 * 1024


class SsrsClient:
Expand Down Expand Up @@ -136,3 +143,118 @@
}
for data in self._paginate("/Reports", params, "reports"):
yield from SsrsReportListResponse(**data).value

def get_report_definition(self, report_id: str) -> Optional[bytes]:
"""Return the RDL XML bytes for a report, or ``None`` if unavailable.

Tries ``/Reports({id})/Content/$value`` first, then ``/CatalogItems({id})/Content``.
Only 404 triggers silent fallback; permission errors (401/403), server errors
(5xx after retries), and transport errors raise ``SourceConnectionException`` so
operators see outages instead of silently deleted entities."""
last_err: Optional[Exception] = None
for template in RDL_CONTENT_PATHS:
path = template.format(id=report_id)
try:
body = self._fetch_report_content(path)
except (requests.RequestException, SourceConnectionException) as exc:
last_err = exc
logger.warning("RDL fetch failed for %s: %s", path, exc)
continue
if body is not None:
return body
if last_err is not None:
raise SourceConnectionException(
f"Failed to fetch RDL content for report [{report_id}]: {last_err}"
) from last_err
return None
Comment on lines +147 to +169

def _fetch_report_content(self, path: str) -> Optional[bytes]:
url = f"{self.base_url}{path}"
with self.session.get(
url,
timeout=(CONNECT_TIMEOUT, RDL_READ_TIMEOUT),
headers={"Accept": "application/xml,application/octet-stream"},
stream=True,
) as resp:
if resp.status_code in RDL_NOT_FOUND_STATUS:
return None
if not resp.ok:
raise SourceConnectionException(
f"RDL fetch returned HTTP {resp.status_code} for {path}"
)
if _exceeds_size_limit(resp, path):
return None
body = _read_bounded_body(resp, path)
if body is None:
return None
return _decode_rdl_body(
body,
(resp.headers.get("Content-Type") or "").lower(),
path,
)


def _read_bounded_body(resp: requests.Response, path: str) -> Optional[bytes]:
"""Stream response body into memory, aborting if it exceeds ``MAX_RDL_BYTES``."""
buffer = bytearray()
for chunk in resp.iter_content(chunk_size=65536):
if not chunk:
continue
if len(buffer) + len(chunk) > MAX_RDL_BYTES:
logger.warning(
"RDL at %s exceeds size limit (>%s bytes); aborting download",
path,
MAX_RDL_BYTES,
)
return None
buffer.extend(chunk)
return bytes(buffer)


def _exceeds_size_limit(resp: requests.Response, path: str) -> bool:
length = resp.headers.get("Content-Length")
if length is None:
return False
try:
length_int = int(length)
except ValueError:
return False
if length_int > MAX_RDL_BYTES:
logger.warning(
"RDL at %s exceeds size limit (%s bytes > %s); skipping to avoid OOM",
path,
length_int,
MAX_RDL_BYTES,
)
return True
return False


def _decode_rdl_body(body: bytes, content_type: str, path: str) -> Optional[bytes]:
"""Decode an already-read response body. If JSON-wrapped base64, unwrap it."""
if not body:
return None
if "json" not in content_type:
return body
try:
payload = json.loads(body)
except ValueError:
return body
value = payload.get("Value") if isinstance(payload, dict) else None
if not value:
logger.warning("RDL JSON response missing 'Value' field at %s", path)
return None
try:
decoded = base64.b64decode(value, validate=True)
except (binascii.Error, ValueError) as exc:

Check warning on line 249 in ingestion/src/metadata/ingestion/source/dashboard/ssrs/client.py

View check run for this annotation

SonarQubeCloud / [open-metadata-ingestion] SonarCloud Code Analysis

Remove this redundant Exception class; it derives from another which is already caught.

See more on https://sonarcloud.io/project/issues?id=open-metadata-ingestion&issues=AZ25eRsUmZnDNosWGtsh&open=AZ25eRsUmZnDNosWGtsh&pullRequest=27652
logger.warning("Malformed base64 in RDL response at %s: %s", path, exc)
return None
if len(decoded) > MAX_RDL_BYTES:
logger.warning(
"RDL at %s exceeds size limit after base64 decode (%s > %s)",
path,
len(decoded),
MAX_RDL_BYTES,
)
return None
return decoded
Loading
Loading