diff --git a/.github/workflows/CI-integrationtests.yml b/.github/workflows/CI-integrationtests.yml index f9a4df6d02..8bf985fc60 100644 --- a/.github/workflows/CI-integrationtests.yml +++ b/.github/workflows/CI-integrationtests.yml @@ -3,6 +3,13 @@ name: CI-integrationtests permissions: {} +env: + CLONE_ATTEMPTS: 2 + GIT_HTTP_CONNECT_TIMEOUT: 20 + GIT_HTTP_LOW_SPEED_LIMIT: 1000 + GIT_HTTP_LOW_SPEED_TIME: 30 + RETRY_SLEEP_SECONDS: 60 + on: workflow_dispatch: inputs: @@ -68,18 +75,67 @@ jobs: CORSIKA_TABLES: "v0.1.0" steps: + - name: Restore cached CORSIKA interaction tables + id: restore_interaction_tables + uses: actions/cache/restore@v5 + with: + path: ${{ env.IT_NAME }} + key: corsika-interaction-tables-${{ env.CORSIKA_TABLES }} + restore-keys: | + corsika-interaction-tables-${{ env.CORSIKA_TABLES }}- + - name: Clone CORSIKA interaction tables (exclude QGSJet tables) run: | REPO_URL=https://gitlab.cta-observatory.org/cta-computing/dpps/simpipe/simulation_software/${IT_NAME} export GIT_LFS_SKIP_SMUDGE=1 # disable automatic downloading of all LFS files - for attempt in 1 2 3; do - git clone --depth 1 --branch "$CORSIKA_TABLES" "$REPO_URL" "$IT_NAME" && break - echo "Clone attempt $attempt failed. Retrying in 120 seconds..." - rm -rf "$IT_NAME" - sleep 120 + + CLONE_DIR="${IT_NAME}-clone" + CACHE_DIR="$IT_NAME" + rm -rf "$CLONE_DIR" + CLONE_SUCCESS=false + + for attempt in $(seq 1 "$CLONE_ATTEMPTS"); do + if git \ + -c "http.connectTimeout=$GIT_HTTP_CONNECT_TIMEOUT" \ + -c "http.lowSpeedLimit=$GIT_HTTP_LOW_SPEED_LIMIT" \ + -c "http.lowSpeedTime=$GIT_HTTP_LOW_SPEED_TIME" \ + clone --depth 1 --branch "$CORSIKA_TABLES" "$REPO_URL" "$CLONE_DIR" + then + git lfs install + if ! (cd "$CLONE_DIR" && git lfs pull --exclude="interaction-tables/qgsdat-II-04,interaction-tables/qgsdat-III"); then + echo "git lfs pull failed; will retry and/or fall back to the cache." + rm -rf "$CLONE_DIR" + continue + fi + rm -rf "$CACHE_DIR" + mv "$CLONE_DIR" "$CACHE_DIR" + echo "Successfully cloned CORSIKA interaction tables $CORSIKA_TABLES." + CLONE_SUCCESS=true + break + fi + + rm -rf "$CLONE_DIR" + if [[ "$attempt" -lt "$CLONE_ATTEMPTS" ]]; then + echo "Clone attempt $attempt failed. Retrying in $RETRY_SLEEP_SECONDS seconds..." + sleep "$RETRY_SLEEP_SECONDS" + fi done - git lfs install - (cd "$IT_NAME" && git lfs pull --exclude="interaction-tables/qgsdat-II-04,interaction-tables/qgsdat-III") + + if [ "$CLONE_SUCCESS" = false ] && [ ! -d "$CACHE_DIR" ]; then + echo "Failed to clone CORSIKA interaction tables $CORSIKA_TABLES and no cache is available." + exit 1 + fi + + if [ "$CLONE_SUCCESS" = false ]; then + echo "GitLab clone failed; using cached CORSIKA interaction tables $CORSIKA_TABLES." + fi + + - name: Save cached CORSIKA interaction tables + if: steps.restore_interaction_tables.outputs.cache-hit != 'true' + uses: actions/cache/save@v5 + with: + path: ${{ env.IT_NAME }} + key: corsika-interaction-tables-${{ env.CORSIKA_TABLES }} - name: Upload CORSIKA interaction tables uses: actions/upload-artifact@v7 @@ -103,6 +159,7 @@ jobs: - uses: actions/checkout@v6 - name: Determine simulation model branch + id: simulation_model_branch env: HEAD_REF: ${{ github.head_ref }} REF_NAME: ${{ github.ref_name }} @@ -126,17 +183,64 @@ jobs: BRANCH="$SIMTOOLS_DB_SIMULATION_MODEL_VERSION" fi echo "SIMTOOLS_DB_SIMULATION_MODEL_BRANCH=$BRANCH" >> "$GITHUB_ENV" + echo "branch=$BRANCH" >> "$GITHUB_OUTPUT" + + - name: Restore cached simulation models repository + if: github.event_name != 'schedule' + id: restore_simulation_models + uses: actions/cache/restore@v5 + with: + path: simulation-models + key: simulation-models-${{ steps.simulation_model_branch.outputs.branch }}-${{ github.run_id }} + restore-keys: | + simulation-models-${{ steps.simulation_model_branch.outputs.branch }}- - name: Clone simulation models repository if: github.event_name != 'schedule' run: | - for attempt in 1 2 3; do - git clone --depth 1 --branch "$SIMTOOLS_DB_SIMULATION_MODEL_BRANCH" "$SIM_MODELS_REPO" "simulation-models" && break - echo "Clone attempt $attempt failed. Retrying in 120 seconds..." - rm -rf simulation-models - sleep 120 + CLONE_DIR="simulation-models-clone" + CACHE_DIR="simulation-models" + rm -rf "$CLONE_DIR" + CLONE_SUCCESS=false + + for attempt in $(seq 1 "$CLONE_ATTEMPTS"); do + if git \ + -c "http.connectTimeout=$GIT_HTTP_CONNECT_TIMEOUT" \ + -c "http.lowSpeedLimit=$GIT_HTTP_LOW_SPEED_LIMIT" \ + -c "http.lowSpeedTime=$GIT_HTTP_LOW_SPEED_TIME" \ + clone --depth 1 --branch "$SIMTOOLS_DB_SIMULATION_MODEL_BRANCH" "$SIM_MODELS_REPO" "$CLONE_DIR" + then + rm -rf "$CACHE_DIR" + mv "$CLONE_DIR" "$CACHE_DIR" + echo "Successfully cloned simulation models branch $SIMTOOLS_DB_SIMULATION_MODEL_BRANCH." + CLONE_SUCCESS=true + break + fi + + rm -rf "$CLONE_DIR" + if [[ "$attempt" -lt "$CLONE_ATTEMPTS" ]]; then + echo "Clone attempt $attempt failed. Retrying in $RETRY_SLEEP_SECONDS seconds..." + sleep "$RETRY_SLEEP_SECONDS" + fi done + if [ "$CLONE_SUCCESS" = false ] && [ ! -d "$CACHE_DIR" ]; then + echo "Failed to clone simulation models branch $SIMTOOLS_DB_SIMULATION_MODEL_BRANCH and no cache is available." + exit 1 + fi + + if [ "$CLONE_SUCCESS" = false ]; then + echo "GitLab clone failed; using cached simulation models branch $SIMTOOLS_DB_SIMULATION_MODEL_BRANCH." + fi + + + - name: Save cached simulation models repository + if: github.event_name != 'schedule' && steps.restore_simulation_models.outputs.cache-hit != 'true' + uses: actions/cache/save@v5 + with: + path: simulation-models + key: simulation-models-${{ steps.simulation_model_branch.outputs.branch }}-${{ github.run_id }} + - name: Upload simulation models repository if: github.event_name != 'schedule' uses: actions/upload-artifact@v7 diff --git a/.github/workflows/CI-unittests.yml b/.github/workflows/CI-unittests.yml index b7f86e0be5..434539e108 100644 --- a/.github/workflows/CI-unittests.yml +++ b/.github/workflows/CI-unittests.yml @@ -1,6 +1,6 @@ --- name: CI-unittests -# Unit tests (includes CTAO-SonarQube) +# Unit tests and CTAO-SonarQube on: workflow_dispatch: @@ -72,7 +72,7 @@ jobs: - os: ubuntu-latest python-version: "3.13" - extra-args: ["sonarqube", "random-order"] + extra-args: ["random-order"] - os: ubuntu-latest python-version: "3.14" @@ -115,20 +115,14 @@ jobs: pytest --durations=10 --color=yes -n 4 --dist loadscope \ --cov=simtools --cov-report=xml --retries 2 --retry-delay 5 - # CTAO-DPPS-SonarQube - - uses: SonarSource/sonarqube-scan-action@v8.1.0 - if: contains(matrix.extra-args, 'sonarqube') - env: - SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + - name: Upload coverage report + if: matrix.python-version == '3.13' + uses: actions/upload-artifact@v7 with: - args: > - -Dsonar.projectKey=gammasim_simtools_0d23837b-8b2d-4e54-9a98-2f1bde681f14 - -Dsonar.host.url=https://sonar-ctao.zeuthen.desy.de - -Dsonar.qualitygate.wait=true - -Dsonar.python.coverage.reportPaths=coverage.xml - -Dsonar.python.version=${{ matrix.python-version }} - -Dsonar.exclusions=**/docs/**,src/simtools/applications/**,**/__init__.py - -Dsonar.coverage.exclusions=**/tests/**,src/simtools/applications/** + name: coverage-python-3.13 + path: coverage.xml + if-no-files-found: error + retention-days: 3 - name: Random order if: github.event_name == 'schedule' && contains(matrix.extra-args, 'random-order') @@ -139,3 +133,116 @@ jobs: run: | pytest --color=yes -n 4 --dist loadscope --count 5 --random-order \ --retries 2 --retry-delay 5 + + sonarqube: + needs: unit_tests + runs-on: ubuntu-latest + env: + SONAR_SCANNER_VERSION: 8.1.0.6389 + SONAR_SCANNER_PLATFORM: linux-x64 + SONAR_SCANNER_CACHE_DIR: .sonar/sonar-scanner + + defaults: + run: + shell: bash -leo pipefail {0} + + steps: + - name: checkout + uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Download coverage report + uses: actions/download-artifact@v8 + with: + name: coverage-python-3.13 + path: . + + # CTAO-DPPS-SonarQube + - name: Restore Sonar scanner cache + id: sonar_scanner_cache + uses: actions/cache@v5 + with: + path: ${{ env.SONAR_SCANNER_CACHE_DIR }} + key: sonar-scanner-${{ runner.os }}-${{ runner.arch }}-${{ env.SONAR_SCANNER_VERSION }} + + - name: Install Sonar scanner + if: steps.sonar_scanner_cache.outputs.cache-hit != 'true' + run: | + mkdir -p "$SONAR_SCANNER_CACHE_DIR" + curl --fail --location --retry 5 --retry-delay 20 --retry-all-errors \ + "https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${SONAR_SCANNER_VERSION}-${SONAR_SCANNER_PLATFORM}.zip" \ + --output sonar-scanner.zip + unzip -q sonar-scanner.zip -d "$SONAR_SCANNER_CACHE_DIR" + + - name: Run Sonar scanner + id: sonar_scan + # Soft fail for draft PRs (unstable network connections) + continue-on-error: ${{ github.event.pull_request.draft == true }} + env: + SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }} + run: | + set +e + for attempt in 1 2 3; do + scanner_log="sonar-scanner-attempt-${attempt}.log" + "${SONAR_SCANNER_CACHE_DIR}/sonar-scanner-${SONAR_SCANNER_VERSION}-${SONAR_SCANNER_PLATFORM}/bin/sonar-scanner" \ + -Dsonar.projectKey=gammasim_simtools_0d23837b-8b2d-4e54-9a98-2f1bde681f14 \ + -Dsonar.host.url=https://sonar-ctao.zeuthen.desy.de \ + -Dsonar.qualitygate.wait=true \ + -Dsonar.scanner.connectTimeout=60 \ + -Dsonar.scanner.socketTimeout=120 \ + -Dsonar.scanner.responseTimeout=120 \ + -Dsonar.plugins.download.timeout=600 \ + -Dsonar.python.coverage.reportPaths=coverage.xml \ + -Dsonar.python.version=3.13 \ + -Dsonar.exclusions=**/docs/**,src/simtools/applications/**,**/__init__.py \ + -Dsonar.coverage.exclusions=**/tests/**,src/simtools/applications/** \ + 2>&1 | tee "$scanner_log" + status=${PIPESTATUS[0]} + + echo "Sonar scanner attempt $attempt exited with status $status." + + if [[ "$status" -eq 0 ]]; then + echo "Sonar scanner succeeded." + exit 0 + fi + + if grep -Eqi "QUALITY GATE STATUS: FAILED|quality gate failed" "$scanner_log"; then + echo "Sonar quality gate failed; not retrying." + exit "$status" + fi + + if ! grep -Eqi "HTTP connect timed out|connect timed out|Connection refused|Connection reset|Failed to query server version|Failed to upload report|Failed to request|timeout|timed out|temporarily unavailable|502|503|504" "$scanner_log"; then + echo "Sonar scanner failed with a non-transient error; not retrying." + exit "$status" + fi + + if [[ "$attempt" -lt 3 ]]; then + delay=$((attempt * 60)) + echo "Sonar scanner attempt $attempt failed. Retrying in $delay seconds..." + sleep "$delay" + fi + done + + echo "Sonar scanner failed after 3 attempts." + exit 1 + + - name: Report soft-failed Sonar scan + if: always() && github.event.pull_request.draft == true && steps.sonar_scan.outcome == 'failure' + run: | + echo "::warning::Sonar scan failed but was allowed to continue because this is a draft PR." + { + echo "## Sonar scan soft failure" + echo + echo "The Sonar scan failed, but the workflow continued because this is a draft PR." + echo "Open the \`Run Sonar scanner\` step logs for details." + } >> "$GITHUB_STEP_SUMMARY" + + - name: Upload soft-failed Sonar logs + if: always() && github.event.pull_request.draft == true && steps.sonar_scan.outcome == 'failure' + uses: actions/upload-artifact@v7 + with: + name: sonar-scanner-logs + path: sonar-scanner-attempt-*.log + if-no-files-found: ignore + retention-days: 7 diff --git a/docs/changes/2239.maintenance.md b/docs/changes/2239.maintenance.md new file mode 100644 index 0000000000..d76a289c85 --- /dev/null +++ b/docs/changes/2239.maintenance.md @@ -0,0 +1 @@ +Increase robustness of CI unit and integration tests. Add caching steps for CORSIKA interaction tables, simulation models, and sonar binary as a fallback.