@@ -15,18 +15,17 @@ concurrency:
1515 cancel-in-progress : true
1616
1717env :
18- OUTPUT_PATH : ${{ github.workspace }}
1918 RESOURCE_GROUP : CI-gpu
2019
2120on :
2221 push :
23- branches : [ main ]
22+ branches : [main]
2423 pull_request :
25- branches : [ main ]
24+ branches : [main]
2625 workflow_dispatch :
2726 inputs :
2827 tags :
29- description : ' Run GPU tests'
28+ description : " Run GPU tests"
3029
3130jobs :
3231 build :
@@ -38,42 +37,44 @@ jobs:
3837 strategy :
3938 fail-fast : false
4039 matrix :
41- name : [
42- pytest-gpu-acc-nvidia,
43- pytest-gpu-omp-amd
44- ]
45- test_examples : ["examples/seismic/tti/tti_example.py examples/seismic/acoustic/acoustic_example.py examples/seismic/viscoacoustic/viscoacoustic_example.py examples/seismic/viscoelastic/viscoelastic_example.py examples/seismic/elastic/elastic_example.py"]
40+ name : [pytest-gpu-acc-nvidia, pytest-gpu-omp-amd]
41+ test_examples :
42+ - examples/seismic/tti/tti_example.py
43+ - examples/seismic/acoustic/acoustic_example.py
44+ - examples/seismic/viscoacoustic/viscoacoustic_example.py
45+ - examples/seismic/viscoelastic/viscoelastic_example.py
46+ - examples/seismic/elastic/elastic_example.py
4647
4748 include :
48- # -------------------- NVIDIA job --------------------
49- - name : pytest-gpu-acc-nvidia
50- test_files : " tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py"
51- base : " devitocodes/bases:nvidia-nvc"
52- runner_label : nvidiagpu
53- test_drive_cmd : " nvidia-smi"
54- # Respect CUDA_VISIBLE_DEVICES and also hard-limit Docker to that device.
55- # NOTE: CUDA_VISIBLE_DEVICES must be set by the runner (systemd drop-in etc.).
56- flags : >-
57- --init --rm -t
58- --name ${CONTAINER_BASENAME}
59- --env CUDA_VISIBLE_DEVICES
60- --gpus "device=${CUDA_VISIBLE_DEVICES:-all}"
61-
62- # -------------------- AMD job -----------------------
63- - name : pytest-gpu-omp-amd
64- test_files : " tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py"
65- runner_label : amdgpu
66- base : " devitocodes/bases:amd"
67- test_drive_cmd : " rocm-smi"
68- # Unchanged, still passes through required /dev nodes etc.
69- flags : >-
70- --init --network=host
71- --device=/dev/kfd --device=/dev/dri
72- --ipc=host
73- --group-add video --group-add "$(getent group render | cut -d: -f3)"
74- --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
75- --rm -t
76- --name ${CONTAINER_BASENAME}
49+ # -------------------- NVIDIA job --------------------
50+ - name : pytest-gpu-acc-nvidia
51+ test_files : " tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openacc.py"
52+ base : " devitocodes/bases:nvidia-nvc"
53+ runner_label : nvidiagpu
54+ test_drive_cmd : " nvidia-smi"
55+ # Respect CUDA_VISIBLE_DEVICES and also hard-limit Docker to that device.
56+ # NOTE: CUDA_VISIBLE_DEVICES must be set by the runner (systemd drop-in etc.).
57+ flags : >-
58+ --init --rm -t
59+ --name ${CONTAINER_BASENAME}
60+ --env CUDA_VISIBLE_DEVICES
61+ --gpus "device=${CUDA_VISIBLE_DEVICES:-all}"
62+
63+ # -------------------- AMD job -----------------------
64+ - name : pytest-gpu-omp-amd
65+ test_files : " tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py"
66+ runner_label : amdgpu
67+ base : " devitocodes/bases:amd"
68+ test_drive_cmd : " rocm-smi"
69+ # Unchanged, still passes through required /dev nodes etc.
70+ flags : >-
71+ --init --network=host
72+ --device=/dev/kfd --device=/dev/dri
73+ --ipc=host
74+ --group-add video --group-add "$(getent group render | cut -d: -f3)"
75+ --cap-add=SYS_PTRACE --security-opt seccomp=unconfined
76+ --rm -t
77+ --name ${CONTAINER_BASENAME}
7778
7879 steps :
7980 - name : Checkout devito
@@ -84,11 +85,16 @@ jobs:
8485 echo "DOCKER_IMAGE=${{ matrix.name }}-${RUNNER_NAME// /_}" >> $GITHUB_ENV
8586 echo "CONTAINER_BASENAME=testrun-${{ matrix.name }}-${RUNNER_NAME// /_}" >> $GITHUB_ENV
8687
88+ - name : Ensure buildx builder
89+ run : |
90+ docker buildx inspect "$RUNNER_NAME" >/dev/null 2>&1 || \
91+ docker buildx create --name "$RUNNER_NAME" --driver docker-container
92+ docker buildx use "$RUNNER_NAME"
93+
8794 - name : Build docker image
8895 run : |
89- docker buildx create --use --name $RUNNER_NAME || true
90- docker build . \
91- --builder $RUNNER_NAME \
96+ docker buildx build . \
97+ --builder "$RUNNER_NAME" \
9298 --label ci-run=$GITHUB_RUN_ID \
9399 --rm --pull \
94100 --file docker/Dockerfile.devito \
@@ -138,7 +144,9 @@ jobs:
138144 -e CODECOV_TOKEN \
139145 ${NVIDIA_ENV_FLAGS} \
140146 "${DOCKER_IMAGE}" \
141- pytest -vv -ra -l -s --full-trace --maxfail=1 \
147+ pytest -vvv --capture=no --showlocals \
148+ --log-cli-level=DEBUG -o log_cli=true \
149+ --full-trace --durations=10 \
142150 --cov --cov-config=.coveragerc --cov-report=xml \
143151 ${{ matrix.test_files }}
144152
@@ -160,3 +168,8 @@ jobs:
160168 # Guard global prune the layers we created
161169 # (label ci-run=$GITHUB_RUN_ID)
162170 docker image prune -f --filter label=ci-run=$GITHUB_RUN_ID
171+
172+ docker builder prune -f --filter "name=$RUNNER_NAME" --filter "until=168h"
173+
174+ # May be overkill - commented out for now but left here as a reminder
175+ # docker buildx rm $RUNNER_NAME || true
0 commit comments