Use MPI_IN_PLACE in one of the allgathers
Try to reproduce nsys segfault seen when running GENE, which has an in place allgather as the BT for the segfault.
This commit is contained in:
@@ -267,6 +267,10 @@ int main(int argc, char **argv) {
|
|||||||
nvtxRangePop();
|
nvtxRangePop();
|
||||||
printf("%d/%d SUM = %f\n", world_rank, world_size, sum);
|
printf("%d/%d SUM = %f\n", world_rank, world_size, sum);
|
||||||
|
|
||||||
|
nvtxRangePushA("copyPrepAllxInplace");
|
||||||
|
cudaMemcpy(d_allx+(world_rank*n), d_x, n*sizeof(*d_x), cudaMemcpyDeviceToDevice);
|
||||||
|
nvtxRangePop();
|
||||||
|
|
||||||
#ifdef BARRIER
|
#ifdef BARRIER
|
||||||
b_start_time = MPI_Wtime();
|
b_start_time = MPI_Wtime();
|
||||||
nvtxRangePushA("mpiBarrier");
|
nvtxRangePushA("mpiBarrier");
|
||||||
@@ -278,7 +282,7 @@ int main(int argc, char **argv) {
|
|||||||
g_start_time = MPI_Wtime();
|
g_start_time = MPI_Wtime();
|
||||||
nvtxRangePushA("mpiAllGather");
|
nvtxRangePushA("mpiAllGather");
|
||||||
nvtxRangePushA("x");
|
nvtxRangePushA("x");
|
||||||
MPI_Allgather(d_x, n, MPI_DOUBLE, d_allx, n, MPI_DOUBLE, MPI_COMM_WORLD);
|
MPI_Allgather(MPI_IN_PLACE, n, MPI_DOUBLE, d_allx, n, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||||
nvtxRangePop();
|
nvtxRangePop();
|
||||||
nvtxRangePushA("y");
|
nvtxRangePushA("y");
|
||||||
MPI_Allgather(d_y, n, MPI_DOUBLE, d_ally, n, MPI_DOUBLE, MPI_COMM_WORLD);
|
MPI_Allgather(d_y, n, MPI_DOUBLE, d_ally, n, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||||
|
|||||||
Reference in New Issue
Block a user