hacky multi-node support

assumes 6 procs per node
2020-08-07 18:50:11 -04:00
parent c32b86422f
commit 02b31f0427
3 changed files with 60 additions and 2 deletions
--- a/mpi_daxpy_nvtx.cc
+++ b/mpi_daxpy_nvtx.cc
@@ -68,7 +68,9 @@ void set_rank_device(int n_ranks, int rank) {


 int main(int argc, char **argv) {
-    const int nall = 48*MB;
+    const int n_per_node = 48*MB;
+    int nodes = 1;
+    int nall = n_per_node;
    int n = 0;
    int world_size, world_rank;

@@ -99,10 +101,18 @@ int main(int argc, char **argv) {
    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);

+    // hack: assume max 6 mpi per node, so we use bigger
+    // arrays on multi-node runs
+    if (world_size > 6) {
+        nodes = (world_size + 5) / 6;
+    }
+
+    nall = nodes * n_per_node;
    n = nall / world_size;

    if (world_rank == 0) {
-        printf("%d ranks, %d elements each, total %d\n", world_size, n, nall);
+        printf("%d nodes, %d ranks, %d elements each, total %d\n",
+               nodes, world_size, n, nall);
    }

    /*
--- a/summit/job.lsf
+++ b/summit/job.lsf
@@ -0,0 +1,16 @@
+#!/bin/bash
+#BSUB -P fus123
+#BSUB -W 0:20
+#BSUB -nnodes 2
+#BSUB -J cublas-nsys-test
+#BSUB -o cublas-nsys-test.%J
+#BSUB -q debug
+
+./run.sh noum none 6 2
+./run.sh noum nsys 6 2
+./run.sh noum none 6 1 &
+./run.sh noum nsys 6 1
+wait
+./run.sh noum none 3 1 &
+./run.sh noum nsys 3 1
+wait
--- a/summit/run.sh
+++ b/summit/run.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+if [ $# -ne 4 ]; then
+  echo "Usage: $0 um|noum nsys|nvprof|none nodes ppn"
+  exit 1
+fi
+
+um=$1
+prof=$2
+nodes=$3
+ppn=$4
+
+tag=${um}_${prof}_${nodes}_${ppn}
+
+if [ $prof == "nsys" ]; then
+  prof_cmd="nsys profile --kill=none -c cudaProfilerApi -o profile/${tag}.%q{PMIX_RANK}"
+elif [ $prof == "nvprof" ]; then
+  prof_cmd="nvprof -o profile/nvprof.%q{PMIX_RANK}.nvvp --profile-from-start off"
+else
+  prof_cmd=""
+fi
+
+if [ $um == "um" ]; then
+  cmd=./mpi_daxpy_nvtx_managed
+else
+  cmd=./mpi_daxpy_nvtx_unmanaged
+fi
+
+set +x
+jsrun --smpiargs="-gpu" -n$nodes -c$ppn -g$ppn -a$ppn \
+   $prof_cmd $cmd >out-${tag}.txt 2>&1
+set -x