From 02b31f0427c546303e4004fd64a66e44aeb2fed4 Mon Sep 17 00:00:00 2001 From: Bryce Allen Date: Fri, 7 Aug 2020 18:50:11 -0400 Subject: [PATCH] hacky multi-node support assumes 6 procs per node --- mpi_daxpy_nvtx.cc | 14 ++++++++++++-- summit/job.lsf | 16 ++++++++++++++++ summit/run.sh | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 60 insertions(+), 2 deletions(-) create mode 100644 summit/job.lsf create mode 100755 summit/run.sh diff --git a/mpi_daxpy_nvtx.cc b/mpi_daxpy_nvtx.cc index 261e8a6..2743edf 100644 --- a/mpi_daxpy_nvtx.cc +++ b/mpi_daxpy_nvtx.cc @@ -68,7 +68,9 @@ void set_rank_device(int n_ranks, int rank) { int main(int argc, char **argv) { - const int nall = 48*MB; + const int n_per_node = 48*MB; + int nodes = 1; + int nall = n_per_node; int n = 0; int world_size, world_rank; @@ -99,10 +101,18 @@ int main(int argc, char **argv) { MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); + // hack: assume max 6 mpi per node, so we use bigger + // arrays on multi-node runs + if (world_size > 6) { + nodes = (world_size + 5) / 6; + } + + nall = nodes * n_per_node; n = nall / world_size; if (world_rank == 0) { - printf("%d ranks, %d elements each, total %d\n", world_size, n, nall); + printf("%d nodes, %d ranks, %d elements each, total %d\n", + nodes, world_size, n, nall); } /* diff --git a/summit/job.lsf b/summit/job.lsf new file mode 100644 index 0000000..19bd3b4 --- /dev/null +++ b/summit/job.lsf @@ -0,0 +1,16 @@ +#!/bin/bash +#BSUB -P fus123 +#BSUB -W 0:20 +#BSUB -nnodes 2 +#BSUB -J cublas-nsys-test +#BSUB -o cublas-nsys-test.%J +#BSUB -q debug + +./run.sh noum none 6 2 +./run.sh noum nsys 6 2 +./run.sh noum none 6 1 & +./run.sh noum nsys 6 1 +wait +./run.sh noum none 3 1 & +./run.sh noum nsys 3 1 +wait diff --git a/summit/run.sh b/summit/run.sh new file mode 100755 index 0000000..da9e91f --- /dev/null +++ b/summit/run.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +if [ $# -ne 4 ]; then + echo "Usage: $0 um|noum nsys|nvprof|none nodes ppn" + exit 1 +fi + +um=$1 +prof=$2 +nodes=$3 +ppn=$4 + +tag=${um}_${prof}_${nodes}_${ppn} + +if [ $prof == "nsys" ]; then + prof_cmd="nsys profile --kill=none -c cudaProfilerApi -o profile/${tag}.%q{PMIX_RANK}" +elif [ $prof == "nvprof" ]; then + prof_cmd="nvprof -o profile/nvprof.%q{PMIX_RANK}.nvvp --profile-from-start off" +else + prof_cmd="" +fi + +if [ $um == "um" ]; then + cmd=./mpi_daxpy_nvtx_managed +else + cmd=./mpi_daxpy_nvtx_unmanaged +fi + +set +x +jsrun --smpiargs="-gpu" -n$nodes -c$ppn -g$ppn -a$ppn \ + $prof_cmd $cmd >out-${tag}.txt 2>&1 +set -x