diff --git a/03-H_Multi_GPU_Parallelization/.master/Makefile.in b/03-H_Multi_GPU_Parallelization/.master/Makefile.in index e15d85c..72429af 100644 --- a/03-H_Multi_GPU_Parallelization/.master/Makefile.in +++ b/03-H_Multi_GPU_Parallelization/.master/Makefile.in @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu index a9700cc..2d913d9 100644 --- a/03-H_Multi_GPU_Parallelization/.master/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/.master/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/03-H_Multi_GPU_Parallelization/solutions/Makefile b/03-H_Multi_GPU_Parallelization/solutions/Makefile index 92f033c..6d3af79 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/Makefile @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile index a6399eb..5f96c74 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile +++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/Makefile @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu index ee32ce5..ed6a7ff 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/solutions/advanced/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu index e971307..c4b542a 100644 --- a/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/solutions/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/03-H_Multi_GPU_Parallelization/tasks/Makefile b/03-H_Multi_GPU_Parallelization/tasks/Makefile index d293686..883cd2c 100644 --- a/03-H_Multi_GPU_Parallelization/tasks/Makefile +++ b/03-H_Multi_GPU_Parallelization/tasks/Makefile @@ -24,8 +24,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME) -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu index acae736..150886a 100644 --- a/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu +++ b/03-H_Multi_GPU_Parallelization/tasks/jacobi.cu @@ -59,7 +59,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in b/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in index b7ce7a5..271d5f7 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in +++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/Makefile.in @@ -22,9 +22,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp b/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp index f2fc1e6..9200097 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp +++ b/06-H_Overlap_Communication_and_Computation_MPI/.master/jacobi.cpp @@ -56,7 +56,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile index a8ee71a..e2a83e3 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile +++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/Makefile @@ -22,9 +22,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp b/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp index 4a20af1..961171e 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp +++ b/06-H_Overlap_Communication_and_Computation_MPI/solutions/jacobi.cpp @@ -56,7 +56,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile index 36da1bf..cf4eab5 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile +++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/Makefile @@ -22,9 +22,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp b/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp index 4f9026b..9298be5 100644 --- a/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp +++ b/06-H_Overlap_Communication_and_Computation_MPI/tasks/jacobi.cpp @@ -56,7 +56,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in b/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in index ccbec37..1f7a7ae 100644 --- a/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in +++ b/08-H_NCCL_NVSHMEM/.master/NCCL/Makefile.in @@ -1,7 +1,7 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. THIS_TASK := 08H-NCCL-@@TASKSOL@@ OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') -NP ?= 1 +NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 MPICXX=mpicxx @@ -23,9 +23,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp index 5945b12..2e8142e 100644 --- a/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp +++ b/08-H_NCCL_NVSHMEM/.master/NCCL/jacobi.cpp @@ -55,7 +55,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in index 8aad9e0..a234756 100644 --- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in +++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/Makefile.in @@ -29,8 +29,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu index e4f6bcd..10da82d 100644 --- a/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu +++ b/08-H_NCCL_NVSHMEM/.master/NVSHMEM/jacobi.cu @@ -61,7 +61,7 @@ #endif // HAVE_CUB #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile b/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile index b7ff2f5..6335378 100644 --- a/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile +++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/Makefile @@ -1,7 +1,7 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. THIS_TASK := 08H-NCCL-sol OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') -NP ?= 1 +NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 MPICXX=mpicxx @@ -23,9 +23,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp index 0c71eef..926db95 100644 --- a/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp +++ b/08-H_NCCL_NVSHMEM/solutions/NCCL/jacobi.cpp @@ -55,7 +55,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile index 823b736..168eab2 100644 --- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile +++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/Makefile @@ -29,8 +29,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu index dd55b30..6165d01 100644 --- a/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu +++ b/08-H_NCCL_NVSHMEM/solutions/NVSHMEM/jacobi.cu @@ -59,7 +59,7 @@ #endif // HAVE_CUB #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile b/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile index a6bb0dd..4069d29 100644 --- a/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile +++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/Makefile @@ -1,7 +1,7 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. All rights reserved. THIS_TASK := 08H-NCCL-task OUTPUT_NAME := jacobi.$(THIS_TASK)__$(shell date '+%Y%m%d-%H%M') -NP ?= 1 +NP ?= 4 NVCC=nvcc JSC_SUBMIT_CMD ?= srun --cpu-bind=socket --gres=gpu:4 --ntasks-per-node 4 MPICXX=mpicxx @@ -23,9 +23,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 -I$(CUDA_HOME)/include +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp b/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp index 5bcf77e..d71f76b 100644 --- a/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp +++ b/08-H_NCCL_NVSHMEM/tasks/NCCL/jacobi.cpp @@ -55,7 +55,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile index 7c57e3e..3376cba 100644 --- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile +++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/Makefile @@ -29,8 +29,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -I$(CUDA_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu index b754207..7afbe05 100644 --- a/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu +++ b/08-H_NCCL_NVSHMEM/tasks/NVSHMEM/jacobi.cu @@ -57,7 +57,7 @@ #endif // HAVE_CUB #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in index 1917f62..5286a99 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/Makefile.in @@ -28,8 +28,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu index 9829940..92891e6 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Device-initiated_Communication_with_NVSHMEM/jacobi.cu @@ -62,7 +62,7 @@ #endif // HAVE_CUB #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in index 7aa2d9c..f75c8f6 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/Makefile.in @@ -23,9 +23,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp index 360e66a..3242061 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/.master/Using_CUDA_Graphs/jacobi.cpp @@ -51,7 +51,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile index 374e98f..c54d834 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/Makefile @@ -28,8 +28,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu index 04c3b63..a387aea 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Device-initiated_Communication_with_NVSHMEM/jacobi.cu @@ -60,7 +60,7 @@ #endif // HAVE_CUB #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile index 06003e2..b78e2cb 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/Makefile @@ -23,9 +23,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp index f692d4d..655a5f0 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/solutions/Using_CUDA_Graphs/jacobi.cpp @@ -51,7 +51,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile index 687a121..5670704 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/Makefile @@ -28,8 +28,8 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++14 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include -NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvToolsExt -lnvidia-ml +NVCC_FLAGS += -dc -Xcompiler -fopenmp -lineinfo -DUSE_NVTX -lnvToolsExt $(GENCODE_FLAGS) -std=c++17 -I$(NVSHMEM_HOME)/include -I$(MPI_HOME)/include +NVCC_LDFLAGS = -ccbin=mpic++ -L$(NVSHMEM_HOME)/lib -lnvshmem_host -lnvshmem_device -L$(MPI_HOME)/lib -lmpi -L$(CUDA_HOME)/lib64 -lcuda -lcudart -lnvidia-ml jacobi: Makefile jacobi.cu $(NVCC) $(NVCC_FLAGS) jacobi.cu -c -o jacobi.o $(NVCC) $(GENCODE_FLAGS) jacobi.o -o jacobi $(NVCC_LDFLAGS) diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu index a242329..98065fe 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Device-initiated_Communication_with_NVSHMEM/jacobi.cu @@ -58,7 +58,7 @@ #endif // HAVE_CUB #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff}; diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile index ca0ae2f..8c0d823 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/Makefile @@ -23,9 +23,9 @@ ifdef DISABLE_CUB else NVCC_FLAGS = -DHAVE_CUB endif -NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++14 -MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++14 -LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnvToolsExt -lnccl +NVCC_FLAGS += -lineinfo $(GENCODE_FLAGS) -std=c++17 +MPICXX_FLAGS = -DUSE_NVTX -I$(CUDA_HOME)/include -I$(NCCL_HOME)/include -std=c++17 +LD_FLAGS = -L$(CUDA_HOME)/lib64 -lcudart -lnccl jacobi: Makefile jacobi.cpp jacobi_kernels.o $(MPICXX) $(MPICXX_FLAGS) jacobi.cpp jacobi_kernels.o $(LD_FLAGS) -o jacobi diff --git a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp index b9197e7..b2eb0b9 100644 --- a/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp +++ b/10-H_CUDA_Graphs_and_Device-initiated_Communication_with_NVSHMEM/tasks/Using_CUDA_Graphs/jacobi.cpp @@ -51,7 +51,7 @@ #include #ifdef USE_NVTX -#include +#include const uint32_t colors[] = {0x0000ff00, 0x000000ff, 0x00ffff00, 0x00ff00ff, 0x0000ffff, 0x00ff0000, 0x00ffffff};