diff --git a/CMakeLists.txt b/CMakeLists.txt
index 930bd2a..bb149a7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -17,9 +17,11 @@ find_package(MPI REQUIRED)
 add_executable(mpi_daxpy_gt)
 target_sources(mpi_daxpy_gt PRIVATE mpi_daxpy_gt.cc)
 target_link_libraries(mpi_daxpy_gt gtensor::gtensor)
+target_link_libraries(mpi_daxpy_gt gtensor::blas)
 target_link_libraries(mpi_daxpy_gt MPI::MPI_CXX)
 
 if ("${GTENSOR_DEVICE}" STREQUAL "cuda") 
+  enable_language(CUDA)
   set_source_files_properties(mpi_daxpy_gt.cc
                               TARGET_DIRECTORY mpi_daxpy_gt
                               PROPERTIES LANGUAGE CUDA)
diff --git a/mpi_daxpy_gt.cc b/mpi_daxpy_gt.cc
index b42715d..d9d53a6 100644
--- a/mpi_daxpy_gt.cc
+++ b/mpi_daxpy_gt.cc
@@ -25,9 +25,8 @@
 
 void set_rank_device(int n_ranks, int rank) {
     int n_devices, device, ranks_per_device;
-    size_t memory_per_rank;
 
-    n_devices = gt::backend::device_get_count();
+    n_devices = gt::backend::clib::device_get_count();
 
     if (n_ranks > n_devices) {
         if (n_ranks % n_devices != 0) {
@@ -42,14 +41,14 @@ void set_rank_device(int n_ranks, int rank) {
         device = rank;
     }
 
-
-    gt::backend::device_set(device);
+    gt::backend::clib::device_set(device);
 }
 
 
 int main(int argc, char **argv) {
     int n = 1024;
     int world_size, world_rank;
+    uint32_t vendor_id;
 
     double a = 2.0;
     double sum = 0.0;
@@ -70,6 +69,7 @@ int main(int argc, char **argv) {
     }
 
     set_rank_device(world_size, world_rank);
+    vendor_id = gt::backend::clib::device_get_vendor_id(gt::backend::clib::device_get());
 
     gt::blas::handle_t* h = gt::blas::create();
 
@@ -87,7 +87,7 @@ int main(int argc, char **argv) {
         //printf("%f\n", y[i]);
         sum += y[i];
     }
-    printf("%d/%d SUM = %f\n", world_rank, world_size, sum);
+    printf("%d/%d [%x] SUM = %f\n", world_rank, world_size, vendor_id, sum);
 
     MPI_Finalize();