From 85b7b3cb4bc7ba697180a470365677995ce2fc01 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 21 Apr 2026 20:07:54 +0200 Subject: [PATCH 01/30] WIP: action to check engine against published rules --- .../workflows/validate-published-rules.yml | 190 ++++++++++++++++++ 1 file changed, 190 insertions(+) create mode 100644 .github/workflows/validate-published-rules.yml diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml new file mode 100644 index 000000000..0a4598995 --- /dev/null +++ b/.github/workflows/validate-published-rules.yml @@ -0,0 +1,190 @@ +# ============================================================================== +# This workflow: +# 1. Checks out cdisc-rules-engine (the engine itself) +# 2. Checks out cdisc-open-rules (rules + test data) into ./open-rules/ +# 3. Installs engine Python dependencies +# 4. Iterates every Published/ rule from cdisc-open-rules +# 5. Runs the engine against each test case +# 6. Compares output with committed results.csv baseline +# 7. Publishes a Markdown report to Job Summary and as an artifact +# ============================================================================== +name: Validate Published Rules + +on: + push: + branches: [ main ] + workflow_dispatch: + inputs: + rules_ref: + description: 'Branch/tag/SHA of cdisc-open-rules to validate against' + required: false + default: 'main' + engine_ref: + description: 'Branch/tag/SHA of cdisc-rules-engine to use' + required: false + default: 'main' + +jobs: + validate-published-rules: + runs-on: ubuntu-latest + permissions: + contents: read + + steps: + # ----------------------------------------------------------------------- + # 1. Checkout cdisc-rules-engine + # ----------------------------------------------------------------------- + - name: Checkout cdisc-rules-engine + uses: actions/checkout@v6 + with: + repository: cdisc-org/cdisc-rules-engine + ref: ${{ inputs.engine_ref || github.sha }} + path: engine + token: ${{ secrets.GITHUB_TOKEN }} + + # ----------------------------------------------------------------------- + # 2. Checkout cdisc-open-rules (rules + test data + helper scripts) + # ----------------------------------------------------------------------- + - name: Checkout cdisc-open-rules + uses: actions/checkout@v6 + with: + repository: cdisc-org/cdisc-open-rules + ref: ${{ inputs.rules_ref || 'main' }} + path: open-rules + # If cdisc-open-rules is private, add a PAT secret: + # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} + + # ----------------------------------------------------------------------- + # 3. Set up Python + # ----------------------------------------------------------------------- + - name: Set up Python 3.12 + uses: actions/setup-python@v6 + with: + python-version: '3.12' + + # ----------------------------------------------------------------------- + # 4. Install engine dependencies + # ----------------------------------------------------------------------- + - name: Install engine dependencies + run: | + python -m venv venv + ./venv/bin/pip install --upgrade pip + ./venv/bin/pip install -r engine/requirements.txt + + # ----------------------------------------------------------------------- + # 5. Run validation for every Published rule + # ----------------------------------------------------------------------- + - name: Run validation for all Published rules + id: validate + continue-on-error: true + run: | + chmod +x open-rules/.github/scripts/run_validation.sh + + PYTHON_CMD="$(pwd)/venv/bin/python" + ENGINE_DIR="$(pwd)/engine" + RULES_ROOT="$(pwd)/open-rules" + PUBLISHED_DIR="$RULES_ROOT/Published" + SCRIPTS_DIR="$RULES_ROOT/.github/scripts" + SUMMARY_REPORT="$(pwd)/validation_report.md" + + OVERALL_EXIT=0 + RULE_PASS=0 + RULE_FAIL=0 + + mapfile -t RULE_DIRS < <(find "$PUBLISHED_DIR" -mindepth 1 -maxdepth 1 -type d | sort) + + if [ ${#RULE_DIRS[@]} -eq 0 ]; then + echo "::warning::No rule directories found under Published/" + exit 0 + fi + + echo "Found ${#RULE_DIRS[@]} rule(s) under Published/" + + { + echo "# Published Rules Validation Report" + echo "" + } > "$SUMMARY_REPORT" + + for RULE_DIR in "${RULE_DIRS[@]}"; do + RULE_ID=$(basename "$RULE_DIR") + RULE_REL_PATH="Published/$RULE_ID" + + RULE_YML=$(find "$RULE_DIR" -maxdepth 1 -name "*.yml" | head -1) + if [ -z "$RULE_YML" ]; then + echo "::warning::Skipping $RULE_ID — no .yml file found" + continue + fi + + echo "========================================" + echo " Validating $RULE_ID" + echo "========================================" + + RULE_EXIT=0 + # Pass ENGINE_DIR explicitly so run_validation.sh knows where core.py is + ENGINE_DIR_OVERRIDE="$ENGINE_DIR" \ + bash "$SCRIPTS_DIR/run_validation.sh" \ + "$RULE_REL_PATH" \ + "$PYTHON_CMD" \ + "$RULES_ROOT" \ + || RULE_EXIT=$? + + if [ -f "$RULES_ROOT/validation_report.md" ]; then + cat "$RULES_ROOT/validation_report.md" >> "$SUMMARY_REPORT" + echo -e "\n---\n" >> "$SUMMARY_REPORT" + rm -f "$RULES_ROOT/validation_report.md" + fi + + if [ $RULE_EXIT -eq 0 ]; then + RULE_PASS=$((RULE_PASS + 1)) + echo " → $RULE_ID: PASSED" + else + RULE_FAIL=$((RULE_FAIL + 1)) + OVERALL_EXIT=1 + echo " → $RULE_ID: FAILED" + fi + done + + # Insert summary totals after the H1 heading + { + echo "**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" + echo "" + echo "---" + echo "" + } > /tmp/totals.md + head -2 "$SUMMARY_REPORT" > /tmp/final_report.md + cat /tmp/totals.md >> /tmp/final_report.md + tail -n +3 "$SUMMARY_REPORT" >> /tmp/final_report.md + mv /tmp/final_report.md "$SUMMARY_REPORT" + + exit $OVERALL_EXIT + + # ----------------------------------------------------------------------- + # 6. Upload report + results.json as artifacts + # ----------------------------------------------------------------------- + - name: Upload validation artifacts + if: always() + uses: actions/upload-artifact@v6 + with: + name: published-rules-validation-${{ github.run_id }} + path: | + open-rules/Published/**/results/results.json + validation_report.md + if-no-files-found: warn + + # ----------------------------------------------------------------------- + # 7. Write report to GitHub Actions Job Summary + # ----------------------------------------------------------------------- + - name: Write report to workflow summary + if: always() + run: | + [ -f validation_report.md ] && cat validation_report.md >> $GITHUB_STEP_SUMMARY || true + + # ----------------------------------------------------------------------- + # 8. Fail the job if any rule failed + # ----------------------------------------------------------------------- + - name: Check overall status + if: steps.validate.outcome == 'failure' + run: | + echo "One or more published rules failed validation — see the report above." + exit 1 + From 44da428c602e89bb0f3ef753ed347f4c5f91fc7a Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 15:43:14 +0200 Subject: [PATCH 02/30] added workflow options to test it --- .github/workflows/validate-published-rules.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 0a4598995..60bb54bfc 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -11,8 +11,11 @@ name: Validate Published Rules on: + pull_request: push: - branches: [ main ] + branches: + - main + - 798-test-against-published workflow_dispatch: inputs: rules_ref: From 77d07d75979d901c59b20f2ae161858b6732c4f4 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 15:58:10 +0200 Subject: [PATCH 03/30] debug step --- .github/workflows/validate-published-rules.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 60bb54bfc..5c89ea32c 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -57,6 +57,20 @@ jobs: # If cdisc-open-rules is private, add a PAT secret: # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} + # ----------------------------------------------------------------------- + # 2b. Debug — verify directory layout + # ----------------------------------------------------------------------- + - name: Debug — list workspace layout + run: | + echo "=== Workspace root ===" + ls -la + echo "=== open-rules/ ===" + ls -la open-rules/ || echo "open-rules/ NOT FOUND" + echo "=== open-rules/Published/ (first 10) ===" + ls open-rules/Published/ 2>/dev/null | head -10 || echo "Published/ NOT FOUND" + echo "=== engine/ ===" + ls engine/ | head -10 || echo "engine/ NOT FOUND" + # ----------------------------------------------------------------------- # 3. Set up Python # ----------------------------------------------------------------------- From f0273b7d6db83eced7b92749000eb4e3e6fc2215 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 16:07:38 +0200 Subject: [PATCH 04/30] set rules_2 as default branch for open-rules --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 5c89ea32c..a1eaa6d32 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -21,7 +21,7 @@ on: rules_ref: description: 'Branch/tag/SHA of cdisc-open-rules to validate against' required: false - default: 'main' + default: 'rules_2' engine_ref: description: 'Branch/tag/SHA of cdisc-rules-engine to use' required: false From 17715926b026aed802a62327520ca17cc17b1d6d Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Wed, 22 Apr 2026 17:18:30 +0200 Subject: [PATCH 05/30] set rules_2 as default branch for open-rules --- .github/workflows/validate-published-rules.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index a1eaa6d32..a561e6aad 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -21,7 +21,7 @@ on: rules_ref: description: 'Branch/tag/SHA of cdisc-open-rules to validate against' required: false - default: 'rules_2' + default: 'main' engine_ref: description: 'Branch/tag/SHA of cdisc-rules-engine to use' required: false @@ -52,7 +52,7 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-open-rules - ref: ${{ inputs.rules_ref || 'main' }} + ref: ${{ inputs.rules_ref || 'rules_2' }} path: open-rules # If cdisc-open-rules is private, add a PAT secret: # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} From 57ce680a8c5d9afd2fc379a58bbd3b228a0c9861 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 13:41:00 +0200 Subject: [PATCH 06/30] report adjustments --- .../workflows/validate-published-rules.yml | 130 +++++++++++++----- 1 file changed, 95 insertions(+), 35 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index a561e6aad..0ffd9687b 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -11,7 +11,6 @@ name: Validate Published Rules on: - pull_request: push: branches: - main @@ -22,10 +21,6 @@ on: description: 'Branch/tag/SHA of cdisc-open-rules to validate against' required: false default: 'main' - engine_ref: - description: 'Branch/tag/SHA of cdisc-rules-engine to use' - required: false - default: 'main' jobs: validate-published-rules: @@ -41,7 +36,6 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-rules-engine - ref: ${{ inputs.engine_ref || github.sha }} path: engine token: ${{ secrets.GITHUB_TOKEN }} @@ -102,7 +96,9 @@ jobs: RULES_ROOT="$(pwd)/open-rules" PUBLISHED_DIR="$RULES_ROOT/Published" SCRIPTS_DIR="$RULES_ROOT/.github/scripts" - SUMMARY_REPORT="$(pwd)/validation_report.md" + + SUMMARY_TABLE="$(pwd)/summary_table.md" + DETAIL_REPORT="$(pwd)/detail_report.md" OVERALL_EXIT=0 RULE_PASS=0 @@ -117,14 +113,22 @@ jobs: echo "Found ${#RULE_DIRS[@]} rule(s) under Published/" + # -- Initialise summary table + { + echo "# Published Rules Validation — Summary" + echo "" + echo "| Rule | Type | Number | Execution | Expected | Got | Match |" + echo "|------|------|--------|-----------|----------|-----|-------|" + } > "$SUMMARY_TABLE" + + # -- Initialise detail report { - echo "# Published Rules Validation Report" + echo "# Published Rules Validation — Failure Details" echo "" - } > "$SUMMARY_REPORT" + } > "$DETAIL_REPORT" for RULE_DIR in "${RULE_DIRS[@]}"; do RULE_ID=$(basename "$RULE_DIR") - RULE_REL_PATH="Published/$RULE_ID" RULE_YML=$(find "$RULE_DIR" -maxdepth 1 -name "*.yml" | head -1) if [ -z "$RULE_YML" ]; then @@ -137,21 +141,85 @@ jobs: echo "========================================" RULE_EXIT=0 - # Pass ENGINE_DIR explicitly so run_validation.sh knows where core.py is ENGINE_DIR_OVERRIDE="$ENGINE_DIR" \ bash "$SCRIPTS_DIR/run_validation.sh" \ - "$RULE_REL_PATH" \ + "Published/$RULE_ID" \ "$PYTHON_CMD" \ "$RULES_ROOT" \ || RULE_EXIT=$? - if [ -f "$RULES_ROOT/validation_report.md" ]; then - cat "$RULES_ROOT/validation_report.md" >> "$SUMMARY_REPORT" - echo -e "\n---\n" >> "$SUMMARY_REPORT" - rm -f "$RULES_ROOT/validation_report.md" + # -- Parse per-test-case results produced by run_validation.sh + CASE_RESULTS="$RULES_ROOT/case_results.jsonl" + RULE_ROW_FAILED=0 + + if [ -f "$CASE_RESULTS" ]; then + while IFS= read -r line; do + # Parse all fields in a single python3 call using shlex.quote + # to safely produce shell variable assignments + eval "$(echo "$line" | python3 -c " + import sys, json, shlex + d = json.load(sys.stdin) + for k, v in [ + ('CASE_RULE', d['rule']), + ('CASE_TYPE', d['type']), + ('CASE_NUM', str(d['num'])), + ('EXEC_OK', '✅' if d['exec'] else '❌'), + ('EXPECTED', str(d.get('expected', ''))), + ('GOT', str(d.get('got', ''))), + ('MATCH', '✅' if d.get('match') else '❌'), + ('DIFF_FILE', str(d.get('diff', ''))), + ('STDERR_FILE', str(d.get('stderr', ''))), + ]: + print(k + '=' + shlex.quote(v)) + ")" + + echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" + + # Collect detail only for failures + if [[ "$EXEC_OK" == "❌" || "$MATCH" == "❌" ]]; then + RULE_ROW_FAILED=1 + { + echo "## $CASE_RULE — $CASE_TYPE / $CASE_NUM" + if [[ "$EXEC_OK" == "❌" ]]; then + echo "**Execution failed.**" + if [ -f "$STDERR_FILE" ]; then + echo '```' + cat "$STDERR_FILE" + echo '```' + fi + else + echo "**Expected:** $EXPECTED **Got:** $GOT" + if [ -n "$DIFF_FILE" ] && [ -f "$DIFF_FILE" ]; then + echo '```diff' + cat "$DIFF_FILE" + echo '```' + fi + fi + echo "" + } >> "$DETAIL_REPORT" + fi + done < "$CASE_RESULTS" + rm -f "$CASE_RESULTS" + else + # write a single aggregate row + EXEC_OK=$( [ $RULE_EXIT -eq 0 ] && echo "✅" || echo "❌" ) + echo "| $RULE_ID | — | — | $EXEC_OK | — | — | — |" >> "$SUMMARY_TABLE" + if [ $RULE_EXIT -ne 0 ]; then + RULE_ROW_FAILED=1 + # Append whatever markdown run_validation.sh produced + if [ -f "$RULES_ROOT/validation_report.md" ]; then + { + echo "## $RULE_ID" + cat "$RULES_ROOT/validation_report.md" + echo "" + } >> "$DETAIL_REPORT" + fi + fi fi - if [ $RULE_EXIT -eq 0 ]; then + rm -f "$RULES_ROOT/validation_report.md" + + if [ $RULE_ROW_FAILED -eq 0 ] && [ $RULE_EXIT -eq 0 ]; then RULE_PASS=$((RULE_PASS + 1)) echo " → $RULE_ID: PASSED" else @@ -161,22 +229,14 @@ jobs: fi done - # Insert summary totals after the H1 heading - { - echo "**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" - echo "" - echo "---" - echo "" - } > /tmp/totals.md - head -2 "$SUMMARY_REPORT" > /tmp/final_report.md - cat /tmp/totals.md >> /tmp/final_report.md - tail -n +3 "$SUMMARY_REPORT" >> /tmp/final_report.md - mv /tmp/final_report.md "$SUMMARY_REPORT" + # -- Insert totals line into summary table + TOTALS="**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" + sed -i "2s|^|$TOTALS\n\n|" "$SUMMARY_TABLE" exit $OVERALL_EXIT # ----------------------------------------------------------------------- - # 6. Upload report + results.json as artifacts + # 6. Upload both reports + raw results as artifacts # ----------------------------------------------------------------------- - name: Upload validation artifacts if: always() @@ -185,16 +245,17 @@ jobs: name: published-rules-validation-${{ github.run_id }} path: | open-rules/Published/**/results/results.json - validation_report.md + summary_table.md + detail_report.md if-no-files-found: warn # ----------------------------------------------------------------------- - # 7. Write report to GitHub Actions Job Summary + # 7. Write ONLY the summary table to GitHub Actions Job Summary # ----------------------------------------------------------------------- - - name: Write report to workflow summary + - name: Write summary table to workflow summary if: always() run: | - [ -f validation_report.md ] && cat validation_report.md >> $GITHUB_STEP_SUMMARY || true + [ -f summary_table.md ] && cat summary_table.md >> $GITHUB_STEP_SUMMARY || true # ----------------------------------------------------------------------- # 8. Fail the job if any rule failed @@ -202,6 +263,5 @@ jobs: - name: Check overall status if: steps.validate.outcome == 'failure' run: | - echo "One or more published rules failed validation — see the report above." + echo "One or more published rules failed validation — see the artifacts for detail_report.md." exit 1 - From 37551aff92c3da20510d54e4b040fdcd63c8c972 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 13:53:45 +0200 Subject: [PATCH 07/30] indentation fix --- .../workflows/validate-published-rules.yml | 33 ++++++++++--------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 0ffd9687b..473c98d88 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -156,22 +156,23 @@ jobs: while IFS= read -r line; do # Parse all fields in a single python3 call using shlex.quote # to safely produce shell variable assignments - eval "$(echo "$line" | python3 -c " - import sys, json, shlex - d = json.load(sys.stdin) - for k, v in [ - ('CASE_RULE', d['rule']), - ('CASE_TYPE', d['type']), - ('CASE_NUM', str(d['num'])), - ('EXEC_OK', '✅' if d['exec'] else '❌'), - ('EXPECTED', str(d.get('expected', ''))), - ('GOT', str(d.get('got', ''))), - ('MATCH', '✅' if d.get('match') else '❌'), - ('DIFF_FILE', str(d.get('diff', ''))), - ('STDERR_FILE', str(d.get('stderr', ''))), - ]: - print(k + '=' + shlex.quote(v)) - ")" + eval "$(echo "$line" | python3 <<'PY' + import sys, json, shlex + d = json.load(sys.stdin) + for k, v in [ + ('CASE_RULE', d['rule']), + ('CASE_TYPE', d['type']), + ('CASE_NUM', str(d['num'])), + ('EXEC_OK', '✅' if d['exec'] else '❌'), + ('EXPECTED', str(d.get('expected', ''))), + ('GOT', str(d.get('got', ''))), + ('MATCH', '✅' if d.get('match') else '❌'), + ('DIFF_FILE', str(d.get('diff', ''))), + ('STDERR_FILE', str(d.get('stderr', ''))), + ]: + print(k + '=' + shlex.quote(v)) + PY + )" echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" From 118685eb47dd86b55bbd03c72983dbe4c69daac5 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 13:59:00 +0200 Subject: [PATCH 08/30] indentation fix(2) --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 473c98d88..9e39eeabf 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -171,7 +171,7 @@ jobs: ('STDERR_FILE', str(d.get('stderr', ''))), ]: print(k + '=' + shlex.quote(v)) - PY + 'PY' )" echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" From e9b1a69d7d625907107db9a52935b7d4d960942c Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 30 Apr 2026 14:07:15 +0200 Subject: [PATCH 09/30] indentation fix(3) -- heredoc in tmp file --- .../workflows/validate-published-rules.yml | 39 ++++++++++--------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 9e39eeabf..f230a7758 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -91,6 +91,24 @@ jobs: run: | chmod +x open-rules/.github/scripts/run_validation.sh + # Write the JSON-line parser once; called once per test case in the loop below + cat > /tmp/parse_case.py << 'PYEOF' + import sys, json, shlex + d = json.load(sys.stdin) + for k, v in [ + ('CASE_RULE', d['rule']), + ('CASE_TYPE', d['type']), + ('CASE_NUM', str(d['num'])), + ('EXEC_OK', '\u2705' if d['exec'] else '\u274c'), + ('EXPECTED', str(d.get('expected', ''))), + ('GOT', str(d.get('got', ''))), + ('MATCH', '\u2705' if d.get('match') else '\u274c'), + ('DIFF_FILE', str(d.get('diff', ''))), + ('STDERR_FILE', str(d.get('stderr', ''))), + ]: + print(k + '=' + shlex.quote(v)) + PYEOF + PYTHON_CMD="$(pwd)/venv/bin/python" ENGINE_DIR="$(pwd)/engine" RULES_ROOT="$(pwd)/open-rules" @@ -154,25 +172,8 @@ jobs: if [ -f "$CASE_RESULTS" ]; then while IFS= read -r line; do - # Parse all fields in a single python3 call using shlex.quote - # to safely produce shell variable assignments - eval "$(echo "$line" | python3 <<'PY' - import sys, json, shlex - d = json.load(sys.stdin) - for k, v in [ - ('CASE_RULE', d['rule']), - ('CASE_TYPE', d['type']), - ('CASE_NUM', str(d['num'])), - ('EXEC_OK', '✅' if d['exec'] else '❌'), - ('EXPECTED', str(d.get('expected', ''))), - ('GOT', str(d.get('got', ''))), - ('MATCH', '✅' if d.get('match') else '❌'), - ('DIFF_FILE', str(d.get('diff', ''))), - ('STDERR_FILE', str(d.get('stderr', ''))), - ]: - print(k + '=' + shlex.quote(v)) - 'PY' - )" + # Parse all fields in a single python3 call — script written once above + eval "$(echo "$line" | python3 /tmp/parse_case.py)" echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" From 7ad65fe21db0a97c6b8f48b2c5d570fd4687fa6e Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 11 May 2026 14:23:48 +0200 Subject: [PATCH 10/30] moved validation logic to python script --- .../workflows/validate-published-rules.yml | 150 +------ scripts/validate_published_rules.py | 373 ++++++++++++++++++ 2 files changed, 378 insertions(+), 145 deletions(-) create mode 100644 scripts/validate_published_rules.py diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index f230a7758..162eb2f05 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -91,151 +91,11 @@ jobs: run: | chmod +x open-rules/.github/scripts/run_validation.sh - # Write the JSON-line parser once; called once per test case in the loop below - cat > /tmp/parse_case.py << 'PYEOF' - import sys, json, shlex - d = json.load(sys.stdin) - for k, v in [ - ('CASE_RULE', d['rule']), - ('CASE_TYPE', d['type']), - ('CASE_NUM', str(d['num'])), - ('EXEC_OK', '\u2705' if d['exec'] else '\u274c'), - ('EXPECTED', str(d.get('expected', ''))), - ('GOT', str(d.get('got', ''))), - ('MATCH', '\u2705' if d.get('match') else '\u274c'), - ('DIFF_FILE', str(d.get('diff', ''))), - ('STDERR_FILE', str(d.get('stderr', ''))), - ]: - print(k + '=' + shlex.quote(v)) - PYEOF - - PYTHON_CMD="$(pwd)/venv/bin/python" - ENGINE_DIR="$(pwd)/engine" - RULES_ROOT="$(pwd)/open-rules" - PUBLISHED_DIR="$RULES_ROOT/Published" - SCRIPTS_DIR="$RULES_ROOT/.github/scripts" - - SUMMARY_TABLE="$(pwd)/summary_table.md" - DETAIL_REPORT="$(pwd)/detail_report.md" - - OVERALL_EXIT=0 - RULE_PASS=0 - RULE_FAIL=0 - - mapfile -t RULE_DIRS < <(find "$PUBLISHED_DIR" -mindepth 1 -maxdepth 1 -type d | sort) - - if [ ${#RULE_DIRS[@]} -eq 0 ]; then - echo "::warning::No rule directories found under Published/" - exit 0 - fi - - echo "Found ${#RULE_DIRS[@]} rule(s) under Published/" - - # -- Initialise summary table - { - echo "# Published Rules Validation — Summary" - echo "" - echo "| Rule | Type | Number | Execution | Expected | Got | Match |" - echo "|------|------|--------|-----------|----------|-----|-------|" - } > "$SUMMARY_TABLE" - - # -- Initialise detail report - { - echo "# Published Rules Validation — Failure Details" - echo "" - } > "$DETAIL_REPORT" - - for RULE_DIR in "${RULE_DIRS[@]}"; do - RULE_ID=$(basename "$RULE_DIR") - - RULE_YML=$(find "$RULE_DIR" -maxdepth 1 -name "*.yml" | head -1) - if [ -z "$RULE_YML" ]; then - echo "::warning::Skipping $RULE_ID — no .yml file found" - continue - fi - - echo "========================================" - echo " Validating $RULE_ID" - echo "========================================" - - RULE_EXIT=0 - ENGINE_DIR_OVERRIDE="$ENGINE_DIR" \ - bash "$SCRIPTS_DIR/run_validation.sh" \ - "Published/$RULE_ID" \ - "$PYTHON_CMD" \ - "$RULES_ROOT" \ - || RULE_EXIT=$? - - # -- Parse per-test-case results produced by run_validation.sh - CASE_RESULTS="$RULES_ROOT/case_results.jsonl" - RULE_ROW_FAILED=0 - - if [ -f "$CASE_RESULTS" ]; then - while IFS= read -r line; do - # Parse all fields in a single python3 call — script written once above - eval "$(echo "$line" | python3 /tmp/parse_case.py)" - - echo "| $CASE_RULE | $CASE_TYPE | $CASE_NUM | $EXEC_OK | $EXPECTED | $GOT | $MATCH |" >> "$SUMMARY_TABLE" - - # Collect detail only for failures - if [[ "$EXEC_OK" == "❌" || "$MATCH" == "❌" ]]; then - RULE_ROW_FAILED=1 - { - echo "## $CASE_RULE — $CASE_TYPE / $CASE_NUM" - if [[ "$EXEC_OK" == "❌" ]]; then - echo "**Execution failed.**" - if [ -f "$STDERR_FILE" ]; then - echo '```' - cat "$STDERR_FILE" - echo '```' - fi - else - echo "**Expected:** $EXPECTED **Got:** $GOT" - if [ -n "$DIFF_FILE" ] && [ -f "$DIFF_FILE" ]; then - echo '```diff' - cat "$DIFF_FILE" - echo '```' - fi - fi - echo "" - } >> "$DETAIL_REPORT" - fi - done < "$CASE_RESULTS" - rm -f "$CASE_RESULTS" - else - # write a single aggregate row - EXEC_OK=$( [ $RULE_EXIT -eq 0 ] && echo "✅" || echo "❌" ) - echo "| $RULE_ID | — | — | $EXEC_OK | — | — | — |" >> "$SUMMARY_TABLE" - if [ $RULE_EXIT -ne 0 ]; then - RULE_ROW_FAILED=1 - # Append whatever markdown run_validation.sh produced - if [ -f "$RULES_ROOT/validation_report.md" ]; then - { - echo "## $RULE_ID" - cat "$RULES_ROOT/validation_report.md" - echo "" - } >> "$DETAIL_REPORT" - fi - fi - fi - - rm -f "$RULES_ROOT/validation_report.md" - - if [ $RULE_ROW_FAILED -eq 0 ] && [ $RULE_EXIT -eq 0 ]; then - RULE_PASS=$((RULE_PASS + 1)) - echo " → $RULE_ID: PASSED" - else - RULE_FAIL=$((RULE_FAIL + 1)) - OVERALL_EXIT=1 - echo " → $RULE_ID: FAILED" - fi - done - - # -- Insert totals line into summary table - TOTALS="**Total:** $((RULE_PASS + RULE_FAIL)) | ✅ Passed: $RULE_PASS | ❌ Failed: $RULE_FAIL" - sed -i "2s|^|$TOTALS\n\n|" "$SUMMARY_TABLE" - - exit $OVERALL_EXIT + ./venv/bin/python engine/scripts/validate_published_rules.py \ + --rules-root "$(pwd)/open-rules" \ + --engine-dir "$(pwd)/engine" \ + --python-cmd "$(pwd)/venv/bin/python" \ + --output-dir "$(pwd)" # ----------------------------------------------------------------------- # 6. Upload both reports + raw results as artifacts diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py new file mode 100644 index 000000000..adb47fba4 --- /dev/null +++ b/scripts/validate_published_rules.py @@ -0,0 +1,373 @@ +#!/usr/bin/env python3 +""" +validate_published_rules.py + +Validates every Published rule from cdisc-open-rules against cdisc-rules-engine. +Intended to be called from the CI workflow (validate-published-rules.yml) instead +of the large inline bash block. + +Outputs: + /summary_table.md — per-test-case results table + /detail_report.md — failure details (stderr / diffs) + +Exit code: + 0 — all rules passed + 1 — one or more rules failed +""" + +import argparse +import json +import os +import subprocess +import sys + +SUMMARY_HEADERS = ["Rule", "Type", "Number", "Execution", "Expected", "Got", "Match"] +CHECKMARK = "\u2705" +CROSS = "\u274c" + + +# --------------------------------------------------------------------------- +# Markdown helpers +# --------------------------------------------------------------------------- + + +def create_md_table(table_name, headers, records, property_getter=None): + """ + Create a Markdown table with the given headers and records. + + Args: + table_name: The title of the table + headers: List of column headers + records: List of records to include in the table + property_getter: Optional function to extract properties from records. + If None, assumes records are dictionaries. + Returns: + String containing the formatted Markdown table + """ + title = f"### {table_name}" + header = "| " + " | ".join(headers) + " |" + underline = "| " + " | ".join(["---" for _ in headers]) + " |" + + if property_getter is None: + + def property_getter(record, prop): + return str(record.get(prop, "")) + + values = "\n".join( + "| " + " | ".join([property_getter(record, prop) for prop in headers]) + " |" + for record in records + ) + + return f"{title}\n\n{header}\n{underline}\n{values}" + + +def _parse_case_result(line: str) -> dict: + """ + Parse one JSON line from case_results.jsonl produced by run_validation.sh. + + Returns a flat dict with both display-ready values (keyed by SUMMARY_HEADERS) + and private fields (prefixed with '_') needed to build failure detail sections. + """ + d = json.loads(line) + exec_ok = bool(d["exec"]) + match_ok = bool(d.get("match", False)) + return { + # Display fields — keys match SUMMARY_HEADERS exactly + "Rule": d["rule"], + "Type": d["type"], + "Number": str(d["num"]), + "Execution": CHECKMARK if exec_ok else CROSS, + "Expected": str(d.get("expected", "")), + "Got": str(d.get("got", "")), + "Match": CHECKMARK if match_ok else CROSS, + # Private fields used when generating failure detail + "_exec_ok": exec_ok, + "_match_ok": match_ok, + "_diff_file": d.get("diff", "") or "", + "_stderr_file": d.get("stderr", "") or "", + } + + +def _build_failure_detail(record: dict) -> str: + """Return a Markdown section describing one failing test case.""" + lines = [f"## {record['Rule']} \u2014 {record['Type']} / {record['Number']}\n"] + if not record["_exec_ok"]: + lines.append("**Execution failed.**\n") + stderr_file = record["_stderr_file"] + if stderr_file and os.path.isfile(stderr_file): + lines.append("```") + with open(stderr_file) as fh: + lines.append(fh.read()) + lines.append("```") + else: + lines.append(f"**Expected:** {record['Expected']} **Got:** {record['Got']}\n") + diff_file = record["_diff_file"] + if diff_file and os.path.isfile(diff_file): + lines.append("```diff") + with open(diff_file) as fh: + lines.append(fh.read()) + lines.append("```") + lines.append("") + return "\n".join(lines) + + +def _run_rule_validation( + rule_id: str, + scripts_dir: str, + rules_root: str, + engine_dir: str, + python_cmd: str, +) -> int: + """ + Invoke run_validation.sh for a single rule directory and return its exit code. + Output is streamed directly to stdout/stderr so CI logs remain readable. + """ + env = os.environ.copy() + env["ENGINE_DIR_OVERRIDE"] = engine_dir + + result = subprocess.run( + [ + "bash", + os.path.join(scripts_dir, "run_validation.sh"), + f"Published/{rule_id}", + python_cmd, + rules_root, + ], + env=env, + ) + return result.returncode + + +def _process_case_results(case_results_path: str) -> tuple[list[dict], list[str], bool]: + """ + Read and remove case_results.jsonl, returning: + - summary rows (public fields only) + - failure detail sections + - whether any row failed + """ + summary_rows: list[dict] = [] + details: list[str] = [] + any_failed = False + + with open(case_results_path) as fh: + raw_lines = fh.readlines() + os.remove(case_results_path) + + for raw_line in raw_lines: + raw_line = raw_line.strip() + if not raw_line: + continue + record = _parse_case_result(raw_line) + summary_rows.append({k: v for k, v in record.items() if not k.startswith("_")}) + if not record["_exec_ok"] or not record["_match_ok"]: + any_failed = True + details.append(_build_failure_detail(record)) + + return summary_rows, details, any_failed + + +def _aggregate_row( + rule_id: str, rule_exit: int, rules_root: str +) -> tuple[dict, str | None]: + """ + Build a single-row summary entry and optional failure detail + for a rule that produced no per-case JSONL output. + """ + exec_ok = rule_exit == 0 + row = { + "Rule": rule_id, + "Type": "\u2014", + "Number": "\u2014", + "Execution": CHECKMARK if exec_ok else CROSS, + "Expected": "\u2014", + "Got": "\u2014", + "Match": "\u2014", + } + if exec_ok: + return row, None + + detail = f"## {rule_id}\n\n" + report_file = os.path.join(rules_root, "validation_report.md") + if os.path.isfile(report_file): + with open(report_file) as fh: + detail += fh.read() + detail += "\n" + return row, detail + + +def _validate_one_rule( + rule_id: str, + scripts_dir: str, + rules_root: str, + engine_dir: str, + python_cmd: str, +) -> tuple[list[dict], list[str], bool]: + """ + Run validation for a single rule and return summary rows, failure details, + and whether the rule passed. + """ + print("=" * 40) + print(f" Validating {rule_id}") + print("=" * 40) + + rule_exit = _run_rule_validation( + rule_id, scripts_dir, rules_root, engine_dir, python_cmd + ) + + case_results_path = os.path.join(rules_root, "case_results.jsonl") + + if os.path.isfile(case_results_path): + summary_rows, details, row_failed = _process_case_results(case_results_path) + passed = not row_failed and rule_exit == 0 + else: + row, detail = _aggregate_row(rule_id, rule_exit, rules_root) + summary_rows = [row] + details = [detail] if detail is not None else [] + passed = rule_exit == 0 + + # Clean up any leftover report file + report_file = os.path.join(rules_root, "validation_report.md") + if os.path.isfile(report_file): + os.remove(report_file) + + return summary_rows, details, passed + + +def _write_reports( + summary_records: list[dict], + failure_details: list[str], + rule_pass: int, + rule_fail: int, + output_dir: str, +) -> None: + """Render and write summary_table.md and detail_report.md.""" + total = rule_pass + rule_fail + totals_line = ( + f"**Total:** {total} | " + f"{CHECKMARK} Passed: {rule_pass} | " + f"{CROSS} Failed: {rule_fail}" + ) + summary_md = ( + "# Published Rules Validation \u2014 Summary\n\n" + f"{totals_line}\n\n" + + create_md_table("Results", SUMMARY_HEADERS, summary_records) + ) + detail_body = "\n".join(failure_details) if failure_details else "_No failures._\n" + detail_md = f"# Published Rules Validation \u2014 Failure Details\n\n{detail_body}" + + os.makedirs(output_dir, exist_ok=True) + summary_path = os.path.join(output_dir, "summary_table.md") + detail_path = os.path.join(output_dir, "detail_report.md") + + with open(summary_path, "w", encoding="utf-8") as fh: + fh.write(summary_md) + with open(detail_path, "w", encoding="utf-8") as fh: + fh.write(detail_md) + + print(f"\nSummary written to : {summary_path}") + print(f"Details written to : {detail_path}") + + +def validate_all_rules( + rules_root: str, + engine_dir: str, + python_cmd: str, + output_dir: str, +) -> bool: + """ + Iterate every directory under Published/, run the validation shell script, + parse results, and write the two report files. + + Returns True if any rule failed, False if all passed. + """ + published_dir = os.path.join(rules_root, "Published") + scripts_dir = os.path.join(rules_root, ".github", "scripts") + + if not os.path.isdir(published_dir): + print(f"WARNING: Published/ not found under {rules_root}", file=sys.stderr) + return False + + rule_ids = sorted( + entry + for entry in os.listdir(published_dir) + if os.path.isdir(os.path.join(published_dir, entry)) + ) + + if not rule_ids: + print("WARNING: No rule directories found under Published/", file=sys.stderr) + return False + + print(f"Found {len(rule_ids)} rule(s) under Published/") + + summary_records: list[dict] = [] + failure_details: list[str] = [] + rule_pass = rule_fail = 0 + + for rule_id in rule_ids: + rule_dir = os.path.join(published_dir, rule_id) + yml_files = [f for f in os.listdir(rule_dir) if f.endswith(".yml")] + if not yml_files: + print( + f"WARNING: Skipping {rule_id} \u2014 no .yml file found", + file=sys.stderr, + ) + continue + + rows, details, passed = _validate_one_rule( + rule_id, scripts_dir, rules_root, engine_dir, python_cmd + ) + summary_records.extend(rows) + failure_details.extend(details) + + if passed: + rule_pass += 1 + print(f" \u2192 {rule_id}: PASSED") + else: + rule_fail += 1 + print(f" \u2192 {rule_id}: FAILED") + + _write_reports(summary_records, failure_details, rule_pass, rule_fail, output_dir) + return rule_fail > 0 + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def _parse_args() -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Validate all Published rules from cdisc-open-rules." + ) + parser.add_argument( + "--rules-root", + required=True, + help="Absolute path to the cdisc-open-rules checkout (contains Published/).", + ) + parser.add_argument( + "--engine-dir", + required=True, + help="Absolute path to the cdisc-rules-engine checkout.", + ) + parser.add_argument( + "--python-cmd", + required=True, + help="Python executable passed through to run_validation.sh.", + ) + parser.add_argument( + "--output-dir", + default=".", + help="Directory where summary_table.md and detail_report.md are written (default: cwd).", + ) + return parser.parse_args() + + +if __name__ == "__main__": + _args = _parse_args() + _any_failed = validate_all_rules( + rules_root=os.path.abspath(_args.rules_root), + engine_dir=os.path.abspath(_args.engine_dir), + python_cmd=_args.python_cmd, + output_dir=os.path.abspath(_args.output_dir), + ) + sys.exit(1 if _any_failed else 0) From 0df39d29e4c4af0c36c69ae8834bb7d4354c0cd7 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Mon, 11 May 2026 14:27:41 +0200 Subject: [PATCH 11/30] removed trigger on feature branch push event --- .github/workflows/validate-published-rules.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 162eb2f05..2241af67a 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -14,13 +14,12 @@ on: push: branches: - main - - 798-test-against-published workflow_dispatch: inputs: rules_ref: - description: 'Branch/tag/SHA of cdisc-open-rules to validate against' + description: "Branch/tag/SHA of cdisc-open-rules to validate against" required: false - default: 'main' + default: "main" jobs: validate-published-rules: @@ -71,7 +70,7 @@ jobs: - name: Set up Python 3.12 uses: actions/setup-python@v6 with: - python-version: '3.12' + python-version: "3.12" # ----------------------------------------------------------------------- # 4. Install engine dependencies From d1179a3a293ac5093fa3b0d517b1fe751497cc60 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Tue, 12 May 2026 11:59:02 +0200 Subject: [PATCH 12/30] fix action --- .github/workflows/validate-published-rules.yml | 4 +--- scripts/validate_published_rules.py | 8 ++++---- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 2241af67a..92450d8ed 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -45,10 +45,8 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-open-rules - ref: ${{ inputs.rules_ref || 'rules_2' }} + ref: ${{ inputs.rules_ref }} path: open-rules - # If cdisc-open-rules is private, add a PAT secret: - # token: ${{ secrets.CDISC_OPEN_RULES_TOKEN }} # ----------------------------------------------------------------------- # 2b. Debug — verify directory layout diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index adb47fba4..6510dd2cd 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -284,8 +284,8 @@ def validate_all_rules( scripts_dir = os.path.join(rules_root, ".github", "scripts") if not os.path.isdir(published_dir): - print(f"WARNING: Published/ not found under {rules_root}", file=sys.stderr) - return False + print(f"ERROR: Published/ not found under {rules_root}", file=sys.stderr) + return True rule_ids = sorted( entry @@ -294,8 +294,8 @@ def validate_all_rules( ) if not rule_ids: - print("WARNING: No rule directories found under Published/", file=sys.stderr) - return False + print("ERROR: No rule directories found under Published/", file=sys.stderr) + return True print(f"Found {len(rule_ids)} rule(s) under Published/") From b05b140444f0ab1ec642b424bbc2e53e50ba0437 Mon Sep 17 00:00:00 2001 From: alexfurmenkov Date: Thu, 21 May 2026 10:08:56 +0200 Subject: [PATCH 13/30] fixed naming in report --- scripts/validate_published_rules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 6510dd2cd..7c10f2e3f 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -78,7 +78,7 @@ def _parse_case_result(line: str) -> dict: "Number": str(d["num"]), "Execution": CHECKMARK if exec_ok else CROSS, "Expected": str(d.get("expected", "")), - "Got": str(d.get("got", "")), + "Actual": str(d.get("actual", "")), "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail "_exec_ok": exec_ok, @@ -180,7 +180,7 @@ def _aggregate_row( "Number": "\u2014", "Execution": CHECKMARK if exec_ok else CROSS, "Expected": "\u2014", - "Got": "\u2014", + "Actual": "\u2014", "Match": "\u2014", } if exec_ok: From b63e1c466da14637f980ee5e006703d49adc47e4 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 2 Jun 2026 13:36:04 -0400 Subject: [PATCH 14/30] temp allow to run on branch --- .github/workflows/validate-published-rules.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 92450d8ed..ff64969e7 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -14,6 +14,7 @@ on: push: branches: - main + - 798-test-against-published # TODO: Remove this before merging to main workflow_dispatch: inputs: rules_ref: From 89ceff76a8c1164fa96db2a3eb671d0a8354ed45 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Tue, 2 Jun 2026 13:56:41 -0400 Subject: [PATCH 15/30] try to fix failure --- scripts/validate_published_rules.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 7c10f2e3f..dcdb77e1e 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -285,7 +285,7 @@ def validate_all_rules( if not os.path.isdir(published_dir): print(f"ERROR: Published/ not found under {rules_root}", file=sys.stderr) - return True + return False rule_ids = sorted( entry @@ -295,7 +295,7 @@ def validate_all_rules( if not rule_ids: print("ERROR: No rule directories found under Published/", file=sys.stderr) - return True + return False print(f"Found {len(rule_ids)} rule(s) under Published/") From 91ff691ea6cc6d1efc39d204b2a9be2f036bfe6a Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 01:28:22 -0400 Subject: [PATCH 16/30] still trying --- .github/workflows/validate-published-rules.yml | 2 +- scripts/validate_published_rules.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index ff64969e7..758907781 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -46,7 +46,7 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-open-rules - ref: ${{ inputs.rules_ref }} + ref: ${{ inputs.rules_ref || 'rules_2'}} # TODO: Remove this before merging to main path: open-rules # ----------------------------------------------------------------------- diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index dcdb77e1e..7c10f2e3f 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -285,7 +285,7 @@ def validate_all_rules( if not os.path.isdir(published_dir): print(f"ERROR: Published/ not found under {rules_root}", file=sys.stderr) - return False + return True rule_ids = sorted( entry @@ -295,7 +295,7 @@ def validate_all_rules( if not rule_ids: print("ERROR: No rule directories found under Published/", file=sys.stderr) - return False + return True print(f"Found {len(rule_ids)} rule(s) under Published/") From 24713b5237934c3bbfe601f9150c2d8e078b200c Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 01:33:25 -0400 Subject: [PATCH 17/30] got/actual update --- scripts/validate_published_rules.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 7c10f2e3f..308b83143 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -21,7 +21,7 @@ import subprocess import sys -SUMMARY_HEADERS = ["Rule", "Type", "Number", "Execution", "Expected", "Got", "Match"] +SUMMARY_HEADERS = ["Rule", "Type", "Number", "Execution", "Expected", "Actual", "Match"] CHECKMARK = "\u2705" CROSS = "\u274c" @@ -100,7 +100,9 @@ def _build_failure_detail(record: dict) -> str: lines.append(fh.read()) lines.append("```") else: - lines.append(f"**Expected:** {record['Expected']} **Got:** {record['Got']}\n") + lines.append( + f"**Expected:** {record['Expected']} **Actual:** {record['Actual']}\n" + ) diff_file = record["_diff_file"] if diff_file and os.path.isfile(diff_file): lines.append("```diff") From 352cc7e18285bb30fb11722167a1b047d4c713a1 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 10:45:56 -0400 Subject: [PATCH 18/30] more actual got fix --- scripts/validate_published_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 308b83143..d17e73c7a 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -78,7 +78,7 @@ def _parse_case_result(line: str) -> dict: "Number": str(d["num"]), "Execution": CHECKMARK if exec_ok else CROSS, "Expected": str(d.get("expected", "")), - "Actual": str(d.get("actual", "")), + "Actual": str(d.get("got", "")), "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail "_exec_ok": exec_ok, From ba9773e4f4d095ebed9a61e45571015a7a4d992c Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 13:31:27 -0400 Subject: [PATCH 19/30] comment change --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 758907781..dc29ed518 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -5,7 +5,7 @@ # 3. Installs engine Python dependencies # 4. Iterates every Published/ rule from cdisc-open-rules # 5. Runs the engine against each test case -# 6. Compares output with committed results.csv baseline +# 6. Compares actual output with expected results.csv baseline # 7. Publishes a Markdown report to Job Summary and as an artifact # ============================================================================== name: Validate Published Rules From 8e19c8cd9815853753e90b173b417bed29e04c6b Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 15:22:39 -0400 Subject: [PATCH 20/30] got->actual --- scripts/validate_published_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index d17e73c7a..308b83143 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -78,7 +78,7 @@ def _parse_case_result(line: str) -> dict: "Number": str(d["num"]), "Execution": CHECKMARK if exec_ok else CROSS, "Expected": str(d.get("expected", "")), - "Actual": str(d.get("got", "")), + "Actual": str(d.get("actual", "")), "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail "_exec_ok": exec_ok, From 6f93af6a084916d8507556ad250b898c1797534a Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 20:41:30 -0400 Subject: [PATCH 21/30] core ids arg to limit number of rules run --- .../workflows/validate-published-rules.yml | 14 ++++++++- scripts/validate_published_rules.py | 29 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index dc29ed518..2c4bdfcb0 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -21,6 +21,10 @@ on: description: "Branch/tag/SHA of cdisc-open-rules to validate against" required: false default: "main" + core_ids: + description: "Space-separated list of rule IDs to validate (e.g. CORE-000001 CORE-000002). Leave blank to validate all." + required: false + default: "" jobs: validate-published-rules: @@ -89,11 +93,19 @@ jobs: run: | chmod +x open-rules/.github/scripts/run_validation.sh + CORE_IDS_ARG="" + if [ -n "${{ inputs.core_ids }}" ]; then + CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" + fi + + CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004" # TODO: Remove this before merging to main + ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ --engine-dir "$(pwd)/engine" \ --python-cmd "$(pwd)/venv/bin/python" \ - --output-dir "$(pwd)" + --output-dir "$(pwd)" \ + $CORE_IDS_ARG # ----------------------------------------------------------------------- # 6. Upload both reports + raw results as artifacts diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 308b83143..9a3c85741 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -275,11 +275,16 @@ def validate_all_rules( engine_dir: str, python_cmd: str, output_dir: str, + core_ids: list[str] | None = None, ) -> bool: """ Iterate every directory under Published/, run the validation shell script, parse results, and write the two report files. + Args: + core_ids: Optional list of rule IDs to restrict validation to. + If None or empty, all rules are validated. + Returns True if any rule failed, False if all passed. """ published_dir = os.path.join(rules_root, "Published") @@ -299,6 +304,21 @@ def validate_all_rules( print("ERROR: No rule directories found under Published/", file=sys.stderr) return True + if core_ids: + unknown = [cid for cid in core_ids if cid not in rule_ids] + if unknown: + print( + f"WARNING: The following requested core IDs were not found under Published/: {unknown}", + file=sys.stderr, + ) + rule_ids = [rid for rid in rule_ids if rid in core_ids] + if not rule_ids: + print( + "ERROR: None of the requested core IDs exist under Published/", + file=sys.stderr, + ) + return True + print(f"Found {len(rule_ids)} rule(s) under Published/") summary_records: list[dict] = [] @@ -361,6 +381,14 @@ def _parse_args() -> argparse.Namespace: default=".", help="Directory where summary_table.md and detail_report.md are written (default: cwd).", ) + parser.add_argument( + "--core-ids", + nargs="+", + metavar="CORE_ID", + default=None, + help="Optional list of rule IDs to validate (e.g. CORE-000001 CORE-000002). " + "If omitted, all Published rules are validated.", + ) return parser.parse_args() @@ -371,5 +399,6 @@ def _parse_args() -> argparse.Namespace: engine_dir=os.path.abspath(_args.engine_dir), python_cmd=_args.python_cmd, output_dir=os.path.abspath(_args.output_dir), + core_ids=_args.core_ids, ) sys.exit(1 if _any_failed else 0) From 4a4e62768837d90da01ac3bb5f62d1107872487c Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 20:55:54 -0400 Subject: [PATCH 22/30] cross --- .github/workflows/validate-published-rules.yml | 2 +- scripts/validate_published_rules.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 2c4bdfcb0..a327e7e68 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -98,7 +98,7 @@ jobs: CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" fi - CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004" # TODO: Remove this before merging to main + # CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004" # TODO: Remove this before merging to main ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 9a3c85741..520be5cd6 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -77,7 +77,7 @@ def _parse_case_result(line: str) -> dict: "Type": d["type"], "Number": str(d["num"]), "Execution": CHECKMARK if exec_ok else CROSS, - "Expected": str(d.get("expected", "")), + "Expected": CROSS if d.get("expected") is None else str(d["expected"]), "Actual": str(d.get("actual", "")), "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail From 0f3f781a7a1d9ee2844ef30d57f1d2d20d571c1f Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Wed, 3 Jun 2026 22:32:28 -0400 Subject: [PATCH 23/30] fix the cross --- scripts/validate_published_rules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 520be5cd6..27ba10b47 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -77,7 +77,7 @@ def _parse_case_result(line: str) -> dict: "Type": d["type"], "Number": str(d["num"]), "Execution": CHECKMARK if exec_ok else CROSS, - "Expected": CROSS if d.get("expected") is None else str(d["expected"]), + "Expected": CROSS if d.get("expected") == "" else str(d["expected"]), "Actual": str(d.get("actual", "")), "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail From 6689709ce5fb60e7dfad4112911519dfc8f36da9 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 10:21:24 -0400 Subject: [PATCH 24/30] change csv conversion to an engine output format --- .../workflows/validate-published-rules.yml | 4 +- cdisc_rules_engine/constants/__init__.py | 2 +- cdisc_rules_engine/enums/report_types.py | 1 + .../services/reporting/base_report_data.py | 10 ++++- .../services/reporting/csv_report.py | 43 +++++++++++++++++++ .../services/reporting/report_factory.py | 2 + .../services/reporting/sdtm_report_data.py | 12 ++++++ .../services/reporting/usdm_report_data.py | 11 +++++ docs/cli-reference.md | 16 +++---- 9 files changed, 89 insertions(+), 12 deletions(-) create mode 100644 cdisc_rules_engine/services/reporting/csv_report.py diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index a327e7e68..e80b47ada 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -98,7 +98,7 @@ jobs: CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" fi - # CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004" # TODO: Remove this before merging to main + CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006" # TODO: Remove this before merging to main ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ @@ -116,7 +116,7 @@ jobs: with: name: published-rules-validation-${{ github.run_id }} path: | - open-rules/Published/**/results/results.json + open-rules/Published/**/results/results.csv summary_table.md detail_report.md if-no-files-found: warn diff --git a/cdisc_rules_engine/constants/__init__.py b/cdisc_rules_engine/constants/__init__.py index 4279e87b5..73c50d415 100644 --- a/cdisc_rules_engine/constants/__init__.py +++ b/cdisc_rules_engine/constants/__init__.py @@ -5,7 +5,7 @@ NULL_FLAVORS = ["", None, {}, {None}, [], [None], np.nan] -KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls"] +KNOWN_REPORT_EXTENSIONS = [".json", ".xlsx", ".xls", ".csv"] VALIDATION_FORMATS_MESSAGE = ( "SAS V5 XPT, Dataset-JSON (JSON or NDJSON), or Excel (XLSX)" diff --git a/cdisc_rules_engine/enums/report_types.py b/cdisc_rules_engine/enums/report_types.py index 3b6cfcb9e..077314af3 100644 --- a/cdisc_rules_engine/enums/report_types.py +++ b/cdisc_rules_engine/enums/report_types.py @@ -4,3 +4,4 @@ class ReportTypes(BaseEnum): XLSX = "XLSX" JSON = "JSON" + CSV = "CSV" diff --git a/cdisc_rules_engine/services/reporting/base_report_data.py b/cdisc_rules_engine/services/reporting/base_report_data.py index 1cbaa3746..f2a6fc11f 100644 --- a/cdisc_rules_engine/services/reporting/base_report_data.py +++ b/cdisc_rules_engine/services/reporting/base_report_data.py @@ -1,4 +1,4 @@ -from abc import ABC +from abc import ABC, abstractmethod from io import IOBase from typing import Iterable @@ -53,3 +53,11 @@ def process_values(values: list[str]) -> list[str]: else: processed_values.append(value) return processed_values + + @abstractmethod + def get_csv_rows(self) -> tuple[list[str], list[list[str]]]: + """ + Return (header, rows) for the CSV output format. + Each row is a list of string values matching the header columns. + """ + pass diff --git a/cdisc_rules_engine/services/reporting/csv_report.py b/cdisc_rules_engine/services/reporting/csv_report.py new file mode 100644 index 000000000..fce0c4af3 --- /dev/null +++ b/cdisc_rules_engine/services/reporting/csv_report.py @@ -0,0 +1,43 @@ +import csv +import os +from io import IOBase +from typing import override + +from cdisc_rules_engine.enums.report_types import ReportTypes +from cdisc_rules_engine.models.validation_args import Validation_args +from cdisc_rules_engine.services.reporting.base_report_data import BaseReportData + +from .base_report import BaseReport + + +class CsvReport(BaseReport): + """ + Writes a results.csv file in the format defined by the report standard, + compatible with the cdisc-open-rules test harness baselines. + """ + + def __init__( + self, + report_standard: BaseReportData, + args: Validation_args, + template: IOBase | None = None, + ): + super().__init__(report_standard, args, template) + + @property + @override + def _file_ext(self) -> str: + return ReportTypes.CSV.value.lower() + + @override + def write_report(self) -> None: + output_dir = os.path.dirname(self._output_name) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + header, rows = self._report_standard.get_csv_rows() + + with open(self._output_name, "w", newline="", encoding="utf-8") as fh: + writer = csv.writer(fh) + writer.writerow(header) + writer.writerows(rows) diff --git a/cdisc_rules_engine/services/reporting/report_factory.py b/cdisc_rules_engine/services/reporting/report_factory.py index 6228b4ae3..27c3ecf81 100644 --- a/cdisc_rules_engine/services/reporting/report_factory.py +++ b/cdisc_rules_engine/services/reporting/report_factory.py @@ -13,6 +13,7 @@ from .base_report import BaseReport from .excel_report import ExcelReport from .json_report import JsonReport +from .csv_report import CsvReport class ReportFactory: @@ -46,6 +47,7 @@ def __init__( self._output_type_service_map: dict[str, Type[BaseReport]] = { ReportTypes.XLSX.value: ExcelReport, ReportTypes.JSON.value: JsonReport, + ReportTypes.CSV.value: CsvReport, } self._standard_type_map: dict[str, Type[BaseReportData]] = { "usdm": USDMReportData, diff --git a/cdisc_rules_engine/services/reporting/sdtm_report_data.py b/cdisc_rules_engine/services/reporting/sdtm_report_data.py index 4f5180648..a8354278c 100644 --- a/cdisc_rules_engine/services/reporting/sdtm_report_data.py +++ b/cdisc_rules_engine/services/reporting/sdtm_report_data.py @@ -347,6 +347,18 @@ def _generate_error_details( ) return errors + def get_csv_rows(self) -> tuple[list[str], list[list[str]]]: + header = ["Dataset", "Record", "Variable", "Value"] + rows = [] + for issue in self.data_sheets.get("Issue Details", []): + dataset = (issue.get("dataset") or "").removesuffix(".csv") + record = str(issue.get("row", "")) + variables = issue.get("variables") or [] + values = issue.get("values") or [] + for variable, value in zip(variables, values): + rows.append([dataset, record, variable, str(value)]) + return header, rows + def get_rules_report_data(self) -> list[dict]: """ Generates the rules report data that goes into the excel export. diff --git a/cdisc_rules_engine/services/reporting/usdm_report_data.py b/cdisc_rules_engine/services/reporting/usdm_report_data.py index 0fb94ad50..98795a9dd 100644 --- a/cdisc_rules_engine/services/reporting/usdm_report_data.py +++ b/cdisc_rules_engine/services/reporting/usdm_report_data.py @@ -245,6 +245,17 @@ def _generate_error_details( ) return errors + def get_csv_rows(self) -> tuple[list[str], list[list[str]]]: + header = ["path", "attribute", "value"] + rows = [] + for issue in self.data_sheets.get("Issue Details", []): + path = issue.get("path") or "" + attributes = issue.get("attributes") or [] + values = issue.get("values") or [] + for attribute, value in zip(attributes, values): + rows.append([path, attribute, str(value)]) + return header, rows + def get_rules_report_data(self) -> list[dict]: """ Generates the rules report data that goes into the excel export. diff --git a/docs/cli-reference.md b/docs/cli-reference.md index dabe4eb17..00db2c4c2 100644 --- a/docs/cli-reference.md +++ b/docs/cli-reference.md @@ -69,14 +69,14 @@ python core.py validate --help ### Output -| Flag | Description | -| -------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- | -| `-o, --output TEXT` | Output file path (without extension). Extension is added automatically based on format. | -| `-of, --output-format [JSON\|XLSX]` | Output format. | -| `-rr, --raw-report` | Raw output format (JSON only). | -| `-mr, --max-report-rows INTEGER` | Max rows in the Issue Details tab of Excel output (default: 1000; 0 = unlimited). Also via `MAX_REPORT_ROWS` env var. | -| `-me, --max-errors-per-rule INTEGER BOOLEAN` | Limit errors per rule. Format: `-me `. See below. | -| `-rt, --report-template TEXT` | Path to a custom Excel report template. | +| Flag | Description | +| -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- | +| `-o, --output TEXT` | Output file path (without extension). Extension is added automatically based on format. | +| `-of, --output-format [JSON\|XLSX\|CSV]` | Output format. `CSV` writes issue rows directly (Dataset, Record, Variable, Value) compatible with the open-rules test harness. | +| `-rr, --raw-report` | Raw output format (JSON only). | +| `-mr, --max-report-rows INTEGER` | Max rows in the Issue Details tab of Excel output (default: 1000; 0 = unlimited). Also via `MAX_REPORT_ROWS` env var. | +| `-me, --max-errors-per-rule INTEGER BOOLEAN` | Limit errors per rule. Format: `-me `. See below. | +| `-rt, --report-template TEXT` | Path to a custom Excel report template. | #### `--max-errors-per-rule` Detail From d84e54e8c6d7b113eb808021b84c101fa2d81ad9 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 10:29:45 -0400 Subject: [PATCH 25/30] let's run the entire suite --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index e80b47ada..6094990d4 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -98,7 +98,7 @@ jobs: CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" fi - CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006" # TODO: Remove this before merging to main + # CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006" # TODO: Remove this before merging to main ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ From 5264865a416a5b90078139a8c99601adb0bcc855 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 10:50:25 -0400 Subject: [PATCH 26/30] add unit tests for csv reports --- .../test_reporting/test_report_factory.py | 22 ++++-- .../test_reporting/test_sdtm_report.py | 72 +++++++++++++++++++ .../test_reporting/test_usdm_report.py | 68 ++++++++++++++++++ 3 files changed, 156 insertions(+), 6 deletions(-) diff --git a/tests/unit/test_services/test_reporting/test_report_factory.py b/tests/unit/test_services/test_reporting/test_report_factory.py index 7c96680a0..f5375e9c0 100644 --- a/tests/unit/test_services/test_reporting/test_report_factory.py +++ b/tests/unit/test_services/test_reporting/test_report_factory.py @@ -2,6 +2,7 @@ from cdisc_rules_engine.enums.report_types import ReportTypes from cdisc_rules_engine.services.reporting import ReportFactory +from cdisc_rules_engine.services.reporting.csv_report import CsvReport from cdisc_rules_engine.services.reporting.excel_report import ExcelReport from cdisc_rules_engine.services.reporting.json_report import JsonReport @@ -23,12 +24,21 @@ def test_get_report_services(): dictionary_versions={}, ) services = factory.get_report_services() - assert len(services) == 2 + assert len(services) == 3 for service in services: - is_excel: bool = isinstance(service, ExcelReport) and not isinstance( - service, JsonReport + is_csv: bool = ( + isinstance(service, CsvReport) + and not isinstance(service, ExcelReport) + and not isinstance(service, JsonReport) ) - is_json: bool = isinstance(service, JsonReport) and not isinstance( - service, ExcelReport + is_excel: bool = ( + isinstance(service, ExcelReport) + and not isinstance(service, CsvReport) + and not isinstance(service, JsonReport) ) - assert is_excel or is_json + is_json: bool = ( + isinstance(service, JsonReport) + and not isinstance(service, CsvReport) + and not isinstance(service, ExcelReport) + ) + assert is_csv or is_excel or is_json diff --git a/tests/unit/test_services/test_reporting/test_sdtm_report.py b/tests/unit/test_services/test_reporting/test_sdtm_report.py index f144076d4..be24bd518 100644 --- a/tests/unit/test_services/test_reporting/test_sdtm_report.py +++ b/tests/unit/test_services/test_reporting/test_sdtm_report.py @@ -115,6 +115,78 @@ def test_get_summary_data(mock_validation_results): assert error == summary_data[i] +def test_get_csv_rows_header(mock_validation_results): + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, _ = report.get_csv_rows() + assert header == ["Dataset", "Record", "Variable", "Value"] + + +def test_get_csv_rows_produces_one_row_per_variable(mock_validation_results): + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + # 3 errors total (2 from CORE1, 1 from CORE2), each with 2 variables → 6 rows + assert len(rows) == 6 + for row in rows: + assert len(row) == 4 + + +def test_get_csv_rows_row_values(mock_validation_results): + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + variables = {r[2] for r in rows} + assert variables == {"AESTDY", "DOMAIN", "TTVAR1", "TTVAR2"} + records = {r[1] for r in rows} + assert records == {"1", "9"} + for row in rows: + assert row[3] == "test" + + +def test_get_csv_rows_strips_csv_suffix(mock_validation_results): + # Patch dataset field in Issue Details to include .csv suffix + report = SDTMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + for issue in report.data_sheets["Issue Details"]: + issue["dataset"] = "AE.csv" + _, rows = report.get_csv_rows() + assert all(r[0] == "AE" for r in rows) + + +def test_get_csv_rows_empty_results(): + report = SDTMReportData( + [], + ["test"], + [], + 0.0, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, rows = report.get_csv_rows() + assert header == ["Dataset", "Record", "Variable", "Value"] + assert rows == [] + + def test_no_errors_when_none_value_in_one_of_the_records(mock_validation_results): # forcing None and str comparison in summary and details mock_validation_results[0].id = None diff --git a/tests/unit/test_services/test_reporting/test_usdm_report.py b/tests/unit/test_services/test_reporting/test_usdm_report.py index c39da3e05..d0def4e08 100644 --- a/tests/unit/test_services/test_reporting/test_usdm_report.py +++ b/tests/unit/test_services/test_reporting/test_usdm_report.py @@ -116,6 +116,74 @@ def test_get_summary_data(mock_validation_results): assert error == summary_data[i] +def test_get_csv_rows_header(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, _ = report.get_csv_rows() + assert header == ["path", "attribute", "value"] + + +def test_get_csv_rows_produces_one_row_per_attribute(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + # 3 errors total (2 from CORE1, 1 from CORE2), each with 2 attributes → 6 rows + assert len(rows) == 6 + for row in rows: + assert len(row) == 3 + + +def test_get_csv_rows_row_values(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + attributes = {r[1] for r in rows} + assert attributes == {"AESTDY", "DOMAIN", "TTVAR1", "TTVAR2"} + for row in rows: + assert row[2] == "test" + + +def test_get_csv_rows_empty_path_when_not_set(mock_validation_results): + report = USDMReportData( + [], + ["test"], + mock_validation_results, + 10.1, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + _, rows = report.get_csv_rows() + # mock errors have no 'path' key, so path defaults to "" + assert all(r[0] == "" for r in rows) + + +def test_get_csv_rows_empty_results(): + report = USDMReportData( + [], + ["test"], + [], + 0.0, + MagicMock(define_xml_path=None, max_errors_per_rule=(None, False)), + ) + header, rows = report.get_csv_rows() + assert header == ["path", "attribute", "value"] + assert rows == [] + + def test_no_errors_when_none_value_in_one_of_the_records(mock_validation_results): # forcing None and str comparison in summary and details mock_validation_results[0].id = None From d46e92aae5789d13444d4fc1cc71805c88f4f6c1 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 11:04:46 -0400 Subject: [PATCH 27/30] fix regression test --- tests/QARegressionTests/test_core/test_validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/QARegressionTests/test_core/test_validate.py b/tests/QARegressionTests/test_core/test_validate.py index 54fa64310..b3e6229bc 100644 --- a/tests/QARegressionTests/test_core/test_validate.py +++ b/tests/QARegressionTests/test_core/test_validate.py @@ -320,7 +320,7 @@ def test_validate_with_invalid_output_format(self): "-o", "output.json", "-of", - "csv", + "abc", ] exit_code, stdout, stderr = run_command(args, False) From 19ec15e22429053240ad381ead5fdecec777bb64 Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 14:21:13 -0400 Subject: [PATCH 28/30] remove execution column and put exec fails in actual --- .github/workflows/validate-published-rules.yml | 2 +- scripts/validate_published_rules.py | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index 6094990d4..bdf944aa4 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -98,7 +98,7 @@ jobs: CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" fi - # CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006" # TODO: Remove this before merging to main + CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006 CORE-000132" # TODO: Remove this before merging to main ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ diff --git a/scripts/validate_published_rules.py b/scripts/validate_published_rules.py index 27ba10b47..c645c163a 100644 --- a/scripts/validate_published_rules.py +++ b/scripts/validate_published_rules.py @@ -21,7 +21,7 @@ import subprocess import sys -SUMMARY_HEADERS = ["Rule", "Type", "Number", "Execution", "Expected", "Actual", "Match"] +SUMMARY_HEADERS = ["Rule", "Type", "Number", "Expected", "Actual", "Match"] CHECKMARK = "\u2705" CROSS = "\u274c" @@ -76,9 +76,8 @@ def _parse_case_result(line: str) -> dict: "Rule": d["rule"], "Type": d["type"], "Number": str(d["num"]), - "Execution": CHECKMARK if exec_ok else CROSS, "Expected": CROSS if d.get("expected") == "" else str(d["expected"]), - "Actual": str(d.get("actual", "")), + "Actual": str(d.get("actual", "")) if exec_ok else CROSS, "Match": CHECKMARK if match_ok else CROSS, # Private fields used when generating failure detail "_exec_ok": exec_ok, @@ -180,9 +179,8 @@ def _aggregate_row( "Rule": rule_id, "Type": "\u2014", "Number": "\u2014", - "Execution": CHECKMARK if exec_ok else CROSS, "Expected": "\u2014", - "Actual": "\u2014", + "Actual": CHECKMARK if exec_ok else CROSS, "Match": "\u2014", } if exec_ok: From 5d72869161b547ed2f606d1996931199d66557bd Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 14:32:18 -0400 Subject: [PATCH 29/30] run all again --- .github/workflows/validate-published-rules.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index bdf944aa4..ac5da6398 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -98,7 +98,7 @@ jobs: CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" fi - CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006 CORE-000132" # TODO: Remove this before merging to main + # CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006 CORE-000132" # TODO: Remove this before merging to main ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ From c84ce479c2faf883da28d7517bbb5e8aa784a2ab Mon Sep 17 00:00:00 2001 From: Gerry Campion Date: Thu, 4 Jun 2026 23:59:29 -0400 Subject: [PATCH 30/30] remove todo's --- .github/workflows/validate-published-rules.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/validate-published-rules.yml b/.github/workflows/validate-published-rules.yml index ac5da6398..1c68f210c 100644 --- a/.github/workflows/validate-published-rules.yml +++ b/.github/workflows/validate-published-rules.yml @@ -14,7 +14,6 @@ on: push: branches: - main - - 798-test-against-published # TODO: Remove this before merging to main workflow_dispatch: inputs: rules_ref: @@ -50,7 +49,7 @@ jobs: uses: actions/checkout@v6 with: repository: cdisc-org/cdisc-open-rules - ref: ${{ inputs.rules_ref || 'rules_2'}} # TODO: Remove this before merging to main + ref: ${{ inputs.rules_ref}} path: open-rules # ----------------------------------------------------------------------- @@ -98,8 +97,6 @@ jobs: CORE_IDS_ARG="--core-ids ${{ inputs.core_ids }}" fi - # CORE_IDS_ARG="--core-ids CORE-000001 CORE-000002 CORE-000003 CORE-000004 CORE-000005 CORE-000006 CORE-000132" # TODO: Remove this before merging to main - ./venv/bin/python engine/scripts/validate_published_rules.py \ --rules-root "$(pwd)/open-rules" \ --engine-dir "$(pwd)/engine" \