diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml new file mode 100644 index 000000000..1c68f210c --- /dev/null +++ b/.github/workflows/validate-published-rules.yml @@ -0,0 +1,136 @@ +# ============================================================================== +# This workflow: +# 1. Checks out cdisc-rules-engine (the engine itself) +# 2. Checks out cdisc-open-rules (rules + test data) into ./open-rules/ +# 3. Installs engine Python dependencies +# 4. Iterates every Published/ rule from cdisc-open-rules +# 5. Runs the engine against each test case +# 6. Compares actual output with expected results.csv baseline +# 7. Publishes a Markdown report to Job Summary and as an artifact +# ============================================================================== +name: Validate Published Rules + +on: + push: + branches: + - main + workflow_dispatch: + inputs: + rules_ref: + description: "Branch/tag/SHA of cdisc-open-rules to validate against" + required: false + default: "main" + core_ids: + description: "Space-separated list of rule IDs to validate (e.g. CORE-000001 CORE-000002). Leave blank to validate all." + required: false + default: "" + +jobs: + validate-published-rules: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + # ----------------------------------------------------------------------- + # 1. Checkout cdisc-rules-engine + # ----------------------------------------------------------------------- + - name: Checkout cdisc-rules-engine + uses: actions/checkout@v6 + with: + repository: cdisc-org/cdisc-rules-engine + path: engine + token: ${{ secrets.GITHUB_TOKEN }} + + # ----------------------------------------------------------------------- + # 2. Checkout cdisc-open-rules (rules + test data + helper scripts) + # ----------------------------------------------------------------------- + - name: Checkout cdisc-open-rules + uses: actions/checkout@v6 + with: + repository: cdisc-org/cdisc-open-rules + ref: ${{ inputs.rules_ref}} + path: open-rules + + # ----------------------------------------------------------------------- + # 2b. Debug — verify directory layout + # ----------------------------------------------------------------------- + - name: Debug — list workspace layout + run: | + echo "=== Workspace root ===" + ls -la + echo "=== open-rules/ ===" + ls -la open-rules/ || echo "open-rules/ NOT FOUND" + echo "=== open-rules/Published/ (first 10) ===" + ls open-rules/Published/ 2>/dev/null | head -10 || echo "Published/ NOT FOUND" + echo "=== engine/ ===" + ls engine/ | head -10 || echo "engine/ NOT FOUND" + + # ----------------------------------------------------------------------- + # 3. Set up Python + # ----------------------------------------------------------------------- + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: "3.12" + + # ----------------------------------------------------------------------- + # 4. Install engine dependencies + # ----------------------------------------------------------------------- + - name: Install engine dependencies + run: | + python -m venv venv + ./venv/bin/pip install --upgrade pip + ./venv/bin/pip install -r engine/requirements.txt + + # ----------------------------------------------------------------------- + # 5. Run validation for every Published rule + # ----------------------------------------------------------------------- + - name: Run validation for all Published rules + id: validate + continue-on-error: true + run: | + chmod +x open-rules/.github/scripts/run_validation.sh + + CORE_IDS_ARG="" + if [ -n "${{ inputs.core_ids }}" ]; then + CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" + fi + + ./venv/bin/python engine/scripts/validate_published_rules.py \ + --rules-root "$(pwd)/open-rules" \ + --engine-dir "$(pwd)/engine" \ + --python-cmd "$(pwd)/venv/bin/python" \ + --output-dir "$(pwd)" \ + $CORE_IDS_ARG + + # ----------------------------------------------------------------------- + # 6. Upload both reports + raw results as artifacts + # ----------------------------------------------------------------------- + - name: Upload validation artifacts + if: always() + uses: actions/upload-artifact@v6 + with: + name: published-rules-validation-${{ github.run_id }} + path: | + open-rules/Published/**/results/results.csv + summary_table.md + detail_report.md + if-no-files-found: warn + + # ----------------------------------------------------------------------- + # 7. Write ONLY the summary table to GitHub Actions Job Summary + # ----------------------------------------------------------------------- + - name: Write summary table to workflow summary + if: always() + run: | + [ -f summary_table.md ] && cat summary_table.md >> $GITHUB_STEP_SUMMARY || true + + # ----------------------------------------------------------------------- + # 8. Fail the job if any rule failed + # ----------------------------------------------------------------------- + - name: Check overall status + if: steps.validate.outcome == 'failure' + run: | + echo "One or more published rules failed validation — see the artifacts for detail_report.md." + exit 1 diff --git a/cdisc_rules_engine/constants/__init__.py b/cdisc_rules_engine/constants/__init__.py index 4279e87b5..73c50d415 100644 --- a/cdisc_rules_engine/constants/__init__.py +++ b/cdisc_rules_engine/constants/__init__.py @@ -5,7 +5,7 @@ NULL_FLAVORS = ["", None, {}, {None}, [], [None], np.nan] -KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls"] +KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls", ".csv"] VALIDATION_FORMATS_MESSAGE = ( "SAS V5 XPT, Dataset-JSON (JSON or NDJSON), or Excel (XLSX)" diff --git a/cdisc_rules_engine/enums/report_types.py b/cdisc_rules_engine/enums/report_types.py index 3b6cfcb9e..077314af3 100644 --- a/cdisc_rules_engine/enums/report_types.py +++ b/cdisc_rules_engine/enums/report_types.py @@ -4,3 +4,4 @@ class ReportTypes(BaseEnum): XLSX = "XLSX" JSON = "JSON" + CSV = "CSV" diff --git a/cdisc_rules_engine/services/reporting/base_report_data.py b/cdisc_rules_engine/services/reporting/base_report_data.py index 1cbaa3746..f2a6fc11f 100644 --- a/cdisc_rules_engine/services/reporting/base_report_data.py +++ b/cdisc_rules_engine/services/reporting/base_report_data.py @@ -1,4 +1,4 @@ -from abc import ABC +from abc import ABC, abstractmethod from io import IOBase from typing import Iterable @@ -53,3 +53,11 @@ def process_values(values: list[str]) -> list[str]: else: processed_values.append(value) return processed_values + + @abstractmethod + def get_csv_rows(self) -> tuple[list[str], list[list[str]]]: + """ + Return (header, rows) for the CSV output format. + Each row is a list of string values matching the header columns. + """ + pass diff --git a/cdisc_rules_engine/services/reporting/csv_report.py b/cdisc_rules_engine/services/reporting/csv_report.py new file mode 100644 index 000000000..fce0c4af3 --- /dev/null +++ b/cdisc_rules_engine/services/reporting/csv_report.py @@ -0,0 +1,43 @@ +import csv +import os +from io import IOBase +from typing import override + +from cdisc_rules_engine.enums.report_types import ReportTypes +from cdisc_rules_engine.models.validation_args import Validation_args +from cdisc_rules_engine.services.reporting.base_report_data import BaseReportData + +from .base_report import BaseReport + + +class CsvReport(BaseReport): + """ + Writes a results.csv file in the format defined by the report standard, + compatible with the cdisc-open-rules test harness baselines. + """ + + def __init__( + self, + report_standard: BaseReportData, + args: Validation_args, + template: IOBase | None = None, + ): + super().__init__(report_standard, args, template) + + @property + @override + def _file_ext(self) -> str: + return ReportTypes.CSV.value.lower() + + @override + def write_report(self) -> None: + output_dir = os.path.dirname(self._output_name) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + header, rows = self._report_standard.get_csv_rows() + + with open(self._output_name, "w", newline="", encoding="utf-8") as fh: + writer = csv.writer(fh) + writer.writerow(header) + writer.writerows(rows) diff --git a/cdisc_rules_engine/services/reporting/report_factory.py b/cdisc_rules_engine/services/reporting/report_factory.py index 6228b4ae3..27c3ecf81 100644 --- a/cdisc_rules_engine/services/reporting/report_factory.py +++ b/cdisc_rules_engine/services/reporting/report_factory.py @@ -13,6 +13,7 @@ from .base_report import BaseReport from .excel_report import ExcelReport from .json_report import JsonReport +from .csv_report import CsvReport class ReportFactory: @@ -46,6 +47,7 @@ def __init__( self._output_type_service_map: dict[str, Type[BaseReport]] = { ReportTypes.XLSX.value: ExcelReport, ReportTypes.JSON.value: JsonReport, + ReportTypes.CSV.value: CsvReport, } self._standard_type_map: dict[str, Type[BaseReportData]] = { "usdm": USDMReportData, diff --git a/cdisc_rules_engine/services/reporting/sdtm_report_data.py b/cdisc_rules_engine/services/reporting/sdtm_report_data.py index 4f5180648..a8354278c 100644 --- a/cdisc_rules_engine/services/reporting/sdtm_report_data.py +++ b/cdisc_rules_engine/services/reporting/sdtm_report_data.py @@ -347,6 +347,18 @@ def _generate_error_details( ) return errors + def get_csv_rows(self) -> tuple[list[str], list[list[str]]]: + header = ["Dataset", "Record", "Variable", "Value"] + rows = [] + for issue in self.data_sheets.get("Issue Details", []): + dataset = (issue.get("dataset") or "").removesuffix(".csv") + record = str(issue.get("row", "")) + variables = issue.get("variables") or [] + values = issue.get("values") or [] + for variable, value in zip(variables, values): + rows.append([dataset, record, variable, str(value)]) + return header, rows + def get_rules_report_data(self) -> list[dict]: """ Generates the rules report data that goes into the excel export. diff --git a/cdisc_rules_engine/services/reporting/usdm_report_data.py b/cdisc_rules_engine/services/reporting/usdm_report_data.py index 0fb94ad50..98795a9dd 100644 --- a/cdisc_rules_engine/services/reporting/usdm_report_data.py +++ b/cdisc_rules_engine/services/reporting/usdm_report_data.py @@ -245,6 +245,17 @@ def _generate_error_details( ) return errors + def get_csv_rows(self) -> tuple[list[str], list[list[str]]]: + header = ["path", "attribute", "value"] + rows = [] + for issue in self.data_sheets.get("Issue Details", []): + path = issue.get("path") or "" + attributes = issue.get("attributes") or [] + values = issue.get("values") or [] + for attribute, value in zip(attributes, values): + rows.append([path, attribute, str(value)]) + return header, rows + def get_rules_report_data(self) -> list[dict]: """ Generates the rules report data that goes into the excel export. diff --git a/docs/cli-reference.md b/docs/cli-reference.md index dabe4eb17..00db2c4c2 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -69,14 +69,14 @@ python core.py validate --help ### Output -| Flag | Description | -| -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| `-o, --output TEXT` | Output file path (without extension). Extension is added automatically based on format. | -| `-of, --output-format [JSON\|XLSX]` | Output format. | -| `-rr, --raw-report` | Raw output format (JSON only). | -| `-mr, --max-report-rows INTEGER` | Max rows in the Issue Details tab of Excel output (default: 1000; 0 = unlimited). Also via `MAX_REPORT_ROWS` env var. | -| `-me, --max-errors-per-rule INTEGER BOOLEAN` | Limit errors per rule. Format: `-me `. See below. | -| `-rt, --report-template TEXT` | Path to a custom Excel report template. | +| Flag | Description | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| `-o, --output TEXT` | Output file path (without extension). Extension is added automatically based on format. | +| `-of, --output-format [JSON\|XLSX\|CSV]` | Output format. `CSV` writes issue rows directly (Dataset, Record, Variable, Value) compatible with the open-rules test harness. | +| `-rr, --raw-report` | Raw output format (JSON only). | +| `-mr, --max-report-rows INTEGER` | Max rows in the Issue Details tab of Excel output (default: 1000; 0 = unlimited). Also via `MAX_REPORT_ROWS` env var. | +| `-me, --max-errors-per-rule INTEGER BOOLEAN` | Limit errors per rule. Format: `-me `. See below. | +| `-rt, --report-template TEXT` | Path to a custom Excel report template. | #### `--max-errors-per-rule` Detail diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py new file mode 100644 index 000000000..c645c163a --- /dev/null +++ b/scripts/validate_published_rules.py @@ -0,0 +1,402 @@ +#!/usr/bin/env python3 +""" +validate_published_rules.py + +Validates every Published rule from cdisc-open-rules against cdisc-rules-engine. +Intended to be called from the CI workflow (validate-published-rules.yml) instead +of the large inline bash block. + +Outputs: + /summary_table.md — per-test-case results table + /detail_report.md — failure details (stderr / diffs) + +Exit code: + 0 — all rules passed + 1 — one or more rules failed +""" + +import argparse +import json +import os +import subprocess +import sys + +SUMMARY_HEADERS = ["Rule", "Type", "Number", "Expected", "Actual", "Match"] +CHECKMARK = "\u2705" +CROSS = "\u274c" + + +# --------------------------------------------------------------------------- +# Markdown helpers +# --------------------------------------------------------------------------- + + +def create_md_table(table_name, headers, records, property_getter=None): + """ + Create a Markdown table with the given headers and records. + + Args: + table_name: The title of the table + headers: List of column headers + records: List of records to include in the table + property_getter: Optional function to extract properties from records. + If None, assumes records are dictionaries. + Returns: + String containing the formatted Markdown table + """ + title = f"### {table_name}" + header = "| " + " | ".join(headers) + " |" + underline = "| " + " | ".join(["---" for _ in headers]) + " |" + + if property_getter is None: + + def property_getter(record, prop): + return str(record.get(prop, "")) + + values = "\n".join( + "| " + " | ".join([property_getter(record, prop) for prop in headers]) + " |" + for record in records + ) + + return f"{title}\n\n{header}\n{underline}\n{values}" + + +def _parse_case_result(line: str) -> dict: + """ + Parse one JSON line from case_results.jsonl produced by run_validation.sh. + + Returns a flat dict with both display-ready values (keyed by SUMMARY_HEADERS) + and private fields (prefixed with '_') needed to build failure detail sections. + """ + d = json.loads(line) + exec_ok = bool(d["exec"]) + match_ok = bool(d.get("match", False)) + return { + # Display fields — keys match SUMMARY_HEADERS exactly + "Rule": d["rule"], + "Type": d["type"], + "Number": str(d["num"]), + "Expected": CROSS if d.get("expected") == "" else str(d["expected"]), + "Actual": str(d.get("actual", "")) if exec_ok else CROSS, + "Match": CHECKMARK if match_ok else CROSS, + # Private fields used when generating failure detail + "_exec_ok": exec_ok, + "_match_ok": match_ok, + "_diff_file": d.get("diff", "") or "", + "_stderr_file": d.get("stderr", "") or "", + } + + +def _build_failure_detail(record: dict) -> str: + """Return a Markdown section describing one failing test case.""" + lines = [f"## {record['Rule']} \u2014 {record['Type']} / {record['Number']}\n"] + if not record["_exec_ok"]: + lines.append("**Execution failed.**\n") + stderr_file = record["_stderr_file"] + if stderr_file and os.path.isfile(stderr_file): + lines.append("```") + with open(stderr_file) as fh: + lines.append(fh.read()) + lines.append("```") + else: + lines.append( + f"**Expected:** {record['Expected']} **Actual:** {record['Actual']}\n" + ) + diff_file = record["_diff_file"] + if diff_file and os.path.isfile(diff_file): + lines.append("```diff") + with open(diff_file) as fh: + lines.append(fh.read()) + lines.append("```") + lines.append("") + return "\n".join(lines) + + +def _run_rule_validation( + rule_id: str, + scripts_dir: str, + rules_root: str, + engine_dir: str, + python_cmd: str, +) -> int: + """ + Invoke run_validation.sh for a single rule directory and return its exit code. + Output is streamed directly to stdout/stderr so CI logs remain readable. + """ + env = os.environ.copy() + env["ENGINE_DIR_OVERRIDE"] = engine_dir + + result = subprocess.run( + [ + "bash", + os.path.join(scripts_dir, "run_validation.sh"), + f"Published/{rule_id}", + python_cmd, + rules_root, + ], + env=env, + ) + return result.returncode + + +def _process_case_results(case_results_path: str) -> tuple[list[dict], list[str], bool]: + """ + Read and remove case_results.jsonl, returning: + - summary rows (public fields only) + - failure detail sections + - whether any row failed + """ + summary_rows: list[dict] = [] + details: list[str] = [] + any_failed = False + + with open(case_results_path) as fh: + raw_lines = fh.readlines() + os.remove(case_results_path) + + for raw_line in raw_lines: + raw_line = raw_line.strip() + if not raw_line: + continue + record = _parse_case_result(raw_line) + summary_rows.append({k: v for k, v in record.items() if not k.startswith("_")}) + if not record["_exec_ok"] or not record["_match_ok"]: + any_failed = True + details.append(_build_failure_detail(record)) + + return summary_rows, details, any_failed + + +def _aggregate_row( + rule_id: str, rule_exit: int, rules_root: str +) -> tuple[dict, str | None]: + """ + Build a single-row summary entry and optional failure detail + for a rule that produced no per-case JSONL output. + """ + exec_ok = rule_exit == 0 + row = { + "Rule": rule_id, + "Type": "\u2014", + "Number": "\u2014", + "Expected": "\u2014", + "Actual": CHECKMARK if exec_ok else CROSS, + "Match": "\u2014", + } + if exec_ok: + return row, None + + detail = f"## {rule_id}\n\n" + report_file = os.path.join(rules_root, "validation_report.md") + if os.path.isfile(report_file): + with open(report_file) as fh: + detail += fh.read() + detail += "\n" + return row, detail + + +def _validate_one_rule( + rule_id: str, + scripts_dir: str, + rules_root: str, + engine_dir: str, + python_cmd: str, +) -> tuple[list[dict], list[str], bool]: + """ + Run validation for a single rule and return summary rows, failure details, + and whether the rule passed. + """ + print("=" * 40) + print(f" Validating {rule_id}") + print("=" * 40) + + rule_exit = _run_rule_validation( + rule_id, scripts_dir, rules_root, engine_dir, python_cmd + ) + + case_results_path = os.path.join(rules_root, "case_results.jsonl") + + if os.path.isfile(case_results_path): + summary_rows, details, row_failed = _process_case_results(case_results_path) + passed = not row_failed and rule_exit == 0 + else: + row, detail = _aggregate_row(rule_id, rule_exit, rules_root) + summary_rows = [row] + details = [detail] if detail is not None else [] + passed = rule_exit == 0 + + # Clean up any leftover report file + report_file = os.path.join(rules_root, "validation_report.md") + if os.path.isfile(report_file): + os.remove(report_file) + + return summary_rows, details, passed + + +def _write_reports( + summary_records: list[dict], + failure_details: list[str], + rule_pass: int, + rule_fail: int, + output_dir: str, +) -> None: + """Render and write summary_table.md and detail_report.md.""" + total = rule_pass + rule_fail + totals_line = ( + f"**Total:** {total} | " + f"{CHECKMARK} Passed: {rule_pass} | " + f"{CROSS} Failed: {rule_fail}" + ) + summary_md = ( + "# Published Rules Validation \u2014 Summary\n\n" + f"{totals_line}\n\n" + + create_md_table("Results", SUMMARY_HEADERS, summary_records) + ) + detail_body = "\n".join(failure_details) if failure_details else "_No failures._\n" + detail_md = f"# Published Rules Validation \u2014 Failure Details\n\n{detail_body}" + + os.makedirs(output_dir, exist_ok=True) + summary_path = os.path.join(output_dir, "summary_table.md") + detail_path = os.path.join(output_dir, "detail_report.md") + + with open(summary_path, "w", encoding="utf-8") as fh: + fh.write(summary_md) + with open(detail_path, "w", encoding="utf-8") as fh: + fh.write(detail_md) + + print(f"\nSummary written to : {summary_path}") + print(f"Details written to : {detail_path}") + + +def validate_all_rules( + rules_root: str, + engine_dir: str, + python_cmd: str, + output_dir: str, + core_ids: list[str] | None = None, +) -> bool: + """ + Iterate every directory under Published/, run the validation shell script, + parse results, and write the two report files. + + Args: + core_ids: Optional list of rule IDs to restrict validation to. + If None or empty, all rules are validated. + + Returns True if any rule failed, False if all passed. + """ + published_dir = os.path.join(rules_root, "Published") + scripts_dir = os.path.join(rules_root, ".github", "scripts") + + if not os.path.isdir(published_dir): + print(f"ERROR: Published/ not found under {rules_root}", file=sys.stderr) + return True + + rule_ids = sorted( + entry + for entry in os.listdir(published_dir) + if os.path.isdir(os.path.join(published_dir, entry)) + ) + + if not rule_ids: + print("ERROR: No rule directories found under Published/", file=sys.stderr) + return True + + if core_ids: + unknown = [cid for cid in core_ids if cid not in rule_ids] + if unknown: + print( + f"WARNING: The following requested core IDs were not found under Published/: {unknown}", + file=sys.stderr, + ) + rule_ids = [rid for rid in rule_ids if rid in core_ids] + if not rule_ids: + print( + "ERROR: None of the requested core IDs exist under Published/", + file=sys.stderr, + ) + return True + + print(f"Found {len(rule_ids)} rule(s) under Published/") + + summary_records: list[dict] = [] + failure_details: list[str] = [] + rule_pass = rule_fail = 0 + + for rule_id in rule_ids: + rule_dir = os.path.join(published_dir, rule_id) + yml_files = [f for f in os.listdir(rule_dir) if f.endswith(".yml")] + if not yml_files: + print( + f"WARNING: Skipping {rule_id} \u2014 no .yml file found", + file=sys.stderr, + ) + continue + + rows, details, passed = _validate_one_rule( + rule_id, scripts_dir, rules_root, engine_dir, python_cmd + ) + summary_records.extend(rows) + failure_details.extend(details) + + if passed: + rule_pass += 1 + print(f" \u2192 {rule_id}: PASSED") + else: + rule_fail += 1 + print(f" \u2192 {rule_id}: FAILED") + + _write_reports(summary_records, failure_details, rule_pass, rule_fail, output_dir) + return rule_fail > 0 + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Validate all Published rules from cdisc-open-rules." + ) + parser.add_argument( + "--rules-root", + required=True, + help="Absolute path to the cdisc-open-rules checkout (contains Published/).", + ) + parser.add_argument( + "--engine-dir", + required=True, + help="Absolute path to the cdisc-rules-engine checkout.", + ) + parser.add_argument( + "--python-cmd", + required=True, + help="Python executable passed through to run_validation.sh.", + ) + parser.add_argument( + "--output-dir", + default=".", + help="Directory where summary_table.md and detail_report.md are written (default: cwd).", + ) + parser.add_argument( + "--core-ids", + nargs="+", + metavar="CORE_ID", + default=None, + help="Optional list of rule IDs to validate (e.g. CORE-000001 CORE-000002). " + "If omitted, all Published rules are validated.", + ) + return parser.parse_args() + + +if __name__ == "__main__": + _args = _parse_args() + _any_failed = validate_all_rules( + rules_root=os.path.abspath(_args.rules_root), + engine_dir=os.path.abspath(_args.engine_dir), + python_cmd=_args.python_cmd, + output_dir=os.path.abspath(_args.output_dir), + core_ids=_args.core_ids, + ) + sys.exit(1 if _any_failed else 0) diff --git a/tests/QARegressionTests/test_core/test_validate.py b/tests/QARegressionTests/test_core/test_validate.py index 54fa64310..b3e6229bc 100644 --- a/tests/QARegressionTests/test_core/test_validate.py +++ b/tests/QARegressionTests/test_core/test_validate.py @@ -320,7 +320,7 @@ def test_validate_with_invalid_output_format(self): "-o", "output.json", "-of", - "csv", + "abc", ] exit_code, stdout, stderr = run_command(args, False) diff --git a/tests/unit/test_services/test_reporting/test_report_factory.py b/tests/unit/test_services/test_reporting/test_report_factory.py index 7c96680a0..f5375e9c0 100644 --- a/tests/unit/test_services/test_reporting/test_report_factory.py +++ b/tests/unit/test_services/test_reporting/test_report_factory.py @@ -2,6 +2,7 @@ from cdisc_rules_engine.enums.report_types import ReportTypes from cdisc_rules_engine.services.reporting import ReportFactory +from cdisc_rules_engine.services.reporting.csv_report import CsvReport from cdisc_rules_engine.services.reporting.excel_report import ExcelReport from cdisc_rules_engine.services.reporting.json_report import JsonReport @@ -23,12 +24,21 @@ def test_get_report_services(): dictionary_versions={}, ) services = factory.get_report_services() - assert len(services) == 2 + assert len(services) == 3 for service in services: - is_excel: bool = isinstance(service, ExcelReport) and not isinstance( - service, JsonReport + is_csv: bool = ( + isinstance(service, CsvReport) + and not isinstance(service, ExcelReport) + and not isinstance(service, JsonReport) ) - is_json: bool = isinstance(service, JsonReport) and not isinstance( - service, ExcelReport + is_excel: bool = ( + isinstance(service, ExcelReport) + and not isinstance(service, CsvReport) + and not isinstance(service, JsonReport) ) - assert is_excel or is_json + is_json: bool = ( + isinstance(service, JsonReport) + and not isinstance(service, CsvReport) + and not isinstance(service, ExcelReport) + ) + assert is_csv or is_excel or is_json diff --git a/tests/unit/test_services/test_reporting/test_sdtm_report.py b/tests/unit/test_services/test_reporting/test_sdtm_report.py index f144076d4..be24bd518 100644 --- a/tests/unit/test_services/test_reporting/test_sdtm_report.py +++ b/tests/unit/test_services/test_reporting/test_sdtm_report.py @@ -115,6 +115,78 @@ def test_get_summary_data(mock_validation_results): assert error == summary_data[i] +def test_get_csv_rows_header(mock_validation_results): + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, _ = report.get_csv_rows() + assert header == ["Dataset", "Record", "Variable", "Value"] + + +def test_get_csv_rows_produces_one_row_per_variable(mock_validation_results): + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + # 3 errors total (2 from CORE1, 1 from CORE2), each with 2 variables → 6 rows + assert len(rows) == 6 + for row in rows: + assert len(row) == 4 + + +def test_get_csv_rows_row_values(mock_validation_results): + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + variables = {r[2] for r in rows} + assert variables == {"AESTDY", "DOMAIN", "TTVAR1", "TTVAR2"} + records = {r[1] for r in rows} + assert records == {"1", "9"} + for row in rows: + assert row[3] == "test" + + +def test_get_csv_rows_strips_csv_suffix(mock_validation_results): + # Patch dataset field in Issue Details to include .csv suffix + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + for issue in report.data_sheets["Issue Details"]: + issue["dataset"] = "AE.csv" + _, rows = report.get_csv_rows() + assert all(r[0] == "AE" for r in rows) + + +def test_get_csv_rows_empty_results(): + report = SDTMReportData( + [], + ["test"], + [], + 0.0, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, rows = report.get_csv_rows() + assert header == ["Dataset", "Record", "Variable", "Value"] + assert rows == [] + + def test_no_errors_when_none_value_in_one_of_the_records(mock_validation_results): # forcing None and str comparison in summary and details mock_validation_results[0].id = None diff --git a/tests/unit/test_services/test_reporting/test_usdm_report.py b/tests/unit/test_services/test_reporting/test_usdm_report.py index c39da3e05..d0def4e08 100644 --- a/tests/unit/test_services/test_reporting/test_usdm_report.py +++ b/tests/unit/test_services/test_reporting/test_usdm_report.py @@ -116,6 +116,74 @@ def test_get_summary_data(mock_validation_results): assert error == summary_data[i] +def test_get_csv_rows_header(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, _ = report.get_csv_rows() + assert header == ["path", "attribute", "value"] + + +def test_get_csv_rows_produces_one_row_per_attribute(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + # 3 errors total (2 from CORE1, 1 from CORE2), each with 2 attributes → 6 rows + assert len(rows) == 6 + for row in rows: + assert len(row) == 3 + + +def test_get_csv_rows_row_values(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + attributes = {r[1] for r in rows} + assert attributes == {"AESTDY", "DOMAIN", "TTVAR1", "TTVAR2"} + for row in rows: + assert row[2] == "test" + + +def test_get_csv_rows_empty_path_when_not_set(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + # mock errors have no 'path' key, so path defaults to "" + assert all(r[0] == "" for r in rows) + + +def test_get_csv_rows_empty_results(): + report = USDMReportData( + [], + ["test"], + [], + 0.0, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, rows = report.get_csv_rows() + assert header == ["path", "attribute", "value"] + assert rows == [] + + def test_no_errors_when_none_value_in_one_of_the_records(mock_validation_results): # forcing None and str comparison in summary and details mock_validation_results[0].id = None