add jlse runners, more flexible node counter
This commit is contained in:
21
jlse/job.pbs
Executable file
21
jlse/job.pbs
Executable file
@@ -0,0 +1,21 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#COBALT -t 00:20:00
|
||||||
|
#COBALT -n 2
|
||||||
|
#COBALT --jobname cublas-nsys-test
|
||||||
|
#COBALT -O cublas-nsys-test
|
||||||
|
#COBALT -q gpu_v100_smx2
|
||||||
|
|
||||||
|
cd $HOME/hpc/mpi-cuda/jlse
|
||||||
|
pwd
|
||||||
|
source ./setup.sh
|
||||||
|
which mpirun
|
||||||
|
which nsys
|
||||||
|
|
||||||
|
./run.sh noum none 2 4
|
||||||
|
./run.sh noum nsys 2 4
|
||||||
|
./run.sh noum none 1 4 &
|
||||||
|
./run.sh noum nsys 1 4
|
||||||
|
wait
|
||||||
|
./run.sh noum none 1 2 &
|
||||||
|
./run.sh noum nsys 1 2
|
||||||
|
wait
|
||||||
34
jlse/run.sh
Executable file
34
jlse/run.sh
Executable file
@@ -0,0 +1,34 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
if [ $# -ne 4 ]; then
|
||||||
|
echo "Usage: $0 um|noum nsys|nvprof|none nodes ppn"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
um=$1
|
||||||
|
prof=$2
|
||||||
|
nodes=$3
|
||||||
|
ppn=$4
|
||||||
|
|
||||||
|
tag=${um}_${prof}_${nodes}_${ppn}
|
||||||
|
|
||||||
|
if [ $prof == "nsys" ]; then
|
||||||
|
prof_cmd="nsys profile --kill=none -c cudaProfilerApi -o profile/${tag}.%q{PMIX_RANK}"
|
||||||
|
elif [ $prof == "nvprof" ]; then
|
||||||
|
prof_cmd="nvprof -o profile/nvprof.%q{PMIX_RANK}.nvvp --profile-from-start off"
|
||||||
|
else
|
||||||
|
prof_cmd=""
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $um == "um" ]; then
|
||||||
|
cmd=./mpi_daxpy_nvtx_managed
|
||||||
|
else
|
||||||
|
cmd=./mpi_daxpy_nvtx_unmanaged
|
||||||
|
fi
|
||||||
|
|
||||||
|
total_procs=$((ppn * nodes))
|
||||||
|
|
||||||
|
set +x
|
||||||
|
mpirun -np $total_procs \
|
||||||
|
$prof_cmd $cmd >out-${tag}.txt 2>&1
|
||||||
|
set -x
|
||||||
5
jlse/setup.sh
Executable file
5
jlse/setup.sh
Executable file
@@ -0,0 +1,5 @@
|
|||||||
|
source $HOME/fusion/spack/ivolta86/share/spack/setup-env.sh
|
||||||
|
spack load -r openmpi
|
||||||
|
|
||||||
|
module use $HOME/soft/modulefiles
|
||||||
|
module load nsight-systems
|
||||||
@@ -69,6 +69,19 @@ void set_rank_device(int n_ranks, int rank) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int get_node_count(int n_ranks) {
|
||||||
|
int shm_size;
|
||||||
|
MPI_Comm shm_comm;
|
||||||
|
|
||||||
|
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, 0,
|
||||||
|
MPI_INFO_NULL, &shm_comm);
|
||||||
|
MPI_Comm_size(shm_comm, &shm_size);
|
||||||
|
|
||||||
|
MPI_Comm_free(&shm_comm);
|
||||||
|
return n_ranks / shm_size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
const int n_per_node = 48*MB;
|
const int n_per_node = 48*MB;
|
||||||
int nodes = 1;
|
int nodes = 1;
|
||||||
@@ -105,11 +118,15 @@ int main(int argc, char **argv) {
|
|||||||
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
|
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
|
||||||
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
|
MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
|
||||||
|
|
||||||
|
nodes = get_node_count(world_size);
|
||||||
|
|
||||||
// hack: assume max 6 mpi per node, so we use bigger
|
// hack: assume max 6 mpi per node, so we use bigger
|
||||||
// arrays on multi-node runs
|
// arrays on multi-node runs
|
||||||
|
/*
|
||||||
if (world_size > 6) {
|
if (world_size > 6) {
|
||||||
nodes = (world_size + 5) / 6;
|
nodes = (world_size + 5) / 6;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
nall = nodes * n_per_node;
|
nall = nodes * n_per_node;
|
||||||
n = nall / world_size;
|
n = nall / world_size;
|
||||||
|
|||||||
Reference in New Issue
Block a user