/* * ===================================================================================== * * Filename: daxpy_nvtx.c * * Description: Test cublas DAXPY, specifically to verify usage on * summit with GPUMPS and all 6 GPUs shared over 42 procs. * * Version: 1.0 * Created: 05/20/2019 10:33:30 AM * Revision: none * Compiler: gcc * * Author: YOUR NAME (), * Organization: * * ===================================================================================== */ #include #include #include "cublas_v2.h" #include "cuda_runtime_api.h" #include "nvToolsExt.h" #include "cuda_profiler_api.h" #define GPU_CHECK_CALLS #include "cuda_error.h" // column major #define IDX2C(i,j,ld) (((j)*(ld))+(i)) static cublasHandle_t handle; int main(int argc, char **argv) { int n = 1024; double a = 2.0; double sum = 0.0; double *x, *y, *d_x, *d_y; x = (double *)malloc(n*sizeof(*x)); if (x == NULL) { printf("host malloc(x) failed\n"); return EXIT_FAILURE; } y = (double *)malloc(n*sizeof(*y)); if (x == NULL) { printf("host malloc(y) failed\n"); return EXIT_FAILURE; } for (int i=0; i