Use MPI_IN_PLACE in one of the allgathers

Try to reproduce nsys segfault seen when running GENE, which
has an in place allgather as the BT for the segfault.
main
Bryce Allen 5 years ago
parent cff437eace
commit 7a1d10349e

@ -267,6 +267,10 @@ int main(int argc, char **argv) {
nvtxRangePop(); nvtxRangePop();
printf("%d/%d SUM = %f\n", world_rank, world_size, sum); printf("%d/%d SUM = %f\n", world_rank, world_size, sum);
nvtxRangePushA("copyPrepAllxInplace");
cudaMemcpy(d_allx+(world_rank*n), d_x, n*sizeof(*d_x), cudaMemcpyDeviceToDevice);
nvtxRangePop();
#ifdef BARRIER #ifdef BARRIER
b_start_time = MPI_Wtime(); b_start_time = MPI_Wtime();
nvtxRangePushA("mpiBarrier"); nvtxRangePushA("mpiBarrier");
@ -278,7 +282,7 @@ int main(int argc, char **argv) {
g_start_time = MPI_Wtime(); g_start_time = MPI_Wtime();
nvtxRangePushA("mpiAllGather"); nvtxRangePushA("mpiAllGather");
nvtxRangePushA("x"); nvtxRangePushA("x");
MPI_Allgather(d_x, n, MPI_DOUBLE, d_allx, n, MPI_DOUBLE, MPI_COMM_WORLD); MPI_Allgather(MPI_IN_PLACE, n, MPI_DOUBLE, d_allx, n, MPI_DOUBLE, MPI_COMM_WORLD);
nvtxRangePop(); nvtxRangePop();
nvtxRangePushA("y"); nvtxRangePushA("y");
MPI_Allgather(d_y, n, MPI_DOUBLE, d_ally, n, MPI_DOUBLE, MPI_COMM_WORLD); MPI_Allgather(d_y, n, MPI_DOUBLE, d_ally, n, MPI_DOUBLE, MPI_COMM_WORLD);

Loading…
Cancel
Save