From 591b73c7866a06ff69ed06b8fa3b48e7cef05073 Mon Sep 17 00:00:00 2001 From: Duncan Spani Date: Wed, 13 May 2026 16:10:02 -0700 Subject: [PATCH] Remove 'exit 0' from nccl-test script Having 'exit 0' at the end of the script masks any errors generated and allows pods to exit with "success"/"completed" status instead of failing. --- gpudirect-rdma/nccl-test-a4x-max-jobset.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/gpudirect-rdma/nccl-test-a4x-max-jobset.yaml b/gpudirect-rdma/nccl-test-a4x-max-jobset.yaml index e5adf8883..7ffbb1aae 100644 --- a/gpudirect-rdma/nccl-test-a4x-max-jobset.yaml +++ b/gpudirect-rdma/nccl-test-a4x-max-jobset.yaml @@ -159,8 +159,6 @@ spec: sleep 5 done fi - - exit 0 volumeMounts: - name: nvidia mountPath: /usr/local/nvidia