add mpi wtime counters, fix make clean
This commit is contained in:
2
Makefile
2
Makefile
@@ -21,7 +21,7 @@ mpienv: mpienv.f90
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -rf daxpy mpi_daxpy
|
||||
rm -rf daxpy mpi_daxpy daxpy_nvtx mpi_daxpy_nvtx
|
||||
|
||||
.PHONY: force
|
||||
force: clean all
|
||||
|
||||
@@ -66,12 +66,19 @@ void set_rank_device(int n_ranks, int rank) {
|
||||
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int n = 1024;
|
||||
int n = 32*1024*1024;
|
||||
int world_size, world_rank;
|
||||
|
||||
double a = 2.0;
|
||||
double sum = 0.0;
|
||||
|
||||
double start_time = 0.0;
|
||||
double end_time = 0.0;
|
||||
double k_start_time = 0.0;
|
||||
double k_end_time = 0.0;
|
||||
double g_start_time = 0.0;
|
||||
double g_end_time = 0.0;
|
||||
|
||||
//double *x, *y, *d_x, *d_y;
|
||||
double *m_x, *m_y;
|
||||
|
||||
@@ -113,6 +120,7 @@ int main(int argc, char **argv) {
|
||||
//CHECK("setDevice", cudaSetDevice(0));
|
||||
|
||||
cudaProfilerStart();
|
||||
start_time = MPI_Wtime();
|
||||
|
||||
CHECK( "cublas", cublasCreate(&handle) );
|
||||
|
||||
@@ -155,12 +163,14 @@ int main(int argc, char **argv) {
|
||||
MEMINFO("m_x", m_x, sizeof(m_x));
|
||||
MEMINFO("m_y", m_y, sizeof(m_y));
|
||||
|
||||
k_start_time = MPI_Wtime();
|
||||
nvtxRangePushA("cublasDaxpy");
|
||||
CHECK("daxpy",
|
||||
cublasDaxpy(handle, n, &a, m_x, 1, m_y, 1) );
|
||||
|
||||
CHECK("daxpy sync", cudaDeviceSynchronize());
|
||||
nvtxRangePop();
|
||||
k_end_time = MPI_Wtime();
|
||||
|
||||
/*
|
||||
CHECK("y = d_y",
|
||||
@@ -182,6 +192,7 @@ int main(int argc, char **argv) {
|
||||
nvtxRangePop();
|
||||
printf("%d/%d SUM = %f\n", world_rank, world_size, sum);
|
||||
|
||||
g_start_time = MPI_Wtime();
|
||||
nvtxRangePushA("allGather");
|
||||
nvtxRangePushA("x");
|
||||
MPI_Allgather(m_x, n, MPI_DOUBLE, m_allx, n, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||
@@ -190,6 +201,7 @@ int main(int argc, char **argv) {
|
||||
MPI_Allgather(m_y, n, MPI_DOUBLE, m_ally, n, MPI_DOUBLE, MPI_COMM_WORLD);
|
||||
nvtxRangePop();
|
||||
nvtxRangePop();
|
||||
g_end_time = MPI_Wtime();
|
||||
|
||||
sum = 0.0;
|
||||
nvtxRangePushA("allSum");
|
||||
@@ -211,10 +223,15 @@ int main(int argc, char **argv) {
|
||||
|
||||
nvtxRangePop();
|
||||
|
||||
end_time = MPI_Wtime();
|
||||
cudaProfilerStop();
|
||||
|
||||
cublasDestroy(handle);
|
||||
MPI_Finalize();
|
||||
|
||||
printf("total time: %0.3f\n", end_time-start_time);
|
||||
printf("kernel time: %0.3f\n", k_end_time-k_start_time);
|
||||
printf("gather time: %0.3f\n", g_end_time-g_start_time);
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user