/* * ===================================================================================== * * Filename: mpi_daxpy_gt.c * * Description: Port to gtensor / gt-blas * * Version: 1.0 * Created: 05/20/2019 10:33:30 AM * Revision: none * Compiler: gcc * * Author: YOUR NAME (), * Organization: * * ===================================================================================== */ #include #include #include #include "gtensor/gtensor.h" #include "gt-blas/blas.h" void set_rank_device(int n_ranks, int rank) { int n_devices, device, ranks_per_device; n_devices = gt::backend::clib::device_get_count(); if (n_ranks > n_devices) { if (n_ranks % n_devices != 0) { printf("ERROR: Number of ranks (%d) not a multiple of number of GPUs (%d)\n", n_ranks, n_devices); exit(EXIT_FAILURE); } ranks_per_device = n_ranks / n_devices; device = rank / ranks_per_device; } else { ranks_per_device = 1; device = rank; } gt::backend::clib::device_set(device); } int main(int argc, char **argv) { int n = 1024; int world_size, world_rank, device_id; uint32_t vendor_id; double a = 2.0; double sum = 0.0; MPI_Init(NULL, NULL); MPI_Comm_size(MPI_COMM_WORLD, &world_size); MPI_Comm_rank(MPI_COMM_WORLD, &world_rank); set_rank_device(world_size, world_rank); auto x = gt::empty({n}); auto y = gt::empty({n}); auto d_x = gt::empty_device({n}); auto d_y = gt::empty_device({n}); for (int i=0; i