Use MPI_IN_PLACE in one of the allgathers

Try to reproduce nsys segfault seen when running GENE, which has an in place allgather as the BT for the segfault.
2020-09-02 16:34:37 -04:00
parent cff437eace
commit 7a1d10349e
1 changed files with 5 additions and 1 deletions
--- a/mpi_daxpy_nvtx.cc
+++ b/mpi_daxpy_nvtx.cc
@@ -267,6 +267,10 @@ int main(int argc, char **argv) {
    nvtxRangePop();
    printf("%d/%d SUM = %f\n", world_rank, world_size, sum);
    nvtxRangePushA("copyPrepAllxInplace");
    cudaMemcpy(d_allx+(world_rank*n), d_x, n*sizeof(*d_x), cudaMemcpyDeviceToDevice);
    nvtxRangePop();
 #ifdef BARRIER
    b_start_time = MPI_Wtime();
    nvtxRangePushA("mpiBarrier");
@@ -278,7 +282,7 @@ int main(int argc, char **argv) {
    g_start_time = MPI_Wtime();
    nvtxRangePushA("mpiAllGather");
    nvtxRangePushA("x");
-    MPI_Allgather(d_x, n, MPI_DOUBLE, d_allx, n, MPI_DOUBLE, MPI_COMM_WORLD);
+    MPI_Allgather(MPI_IN_PLACE, n, MPI_DOUBLE, d_allx, n, MPI_DOUBLE, MPI_COMM_WORLD);
    nvtxRangePop();
    nvtxRangePushA("y");
    MPI_Allgather(d_y, n, MPI_DOUBLE, d_ally, n, MPI_DOUBLE, MPI_COMM_WORLD);