MODULE mpipara INTEGER,SAVE :: mpi_rank INTEGER,SAVE :: mpi_size INTEGER,SAVE :: mpi_threading_mode INTEGER,SAVE :: comm_icosa INTEGER,SAVE :: ierr LOGICAL,SAVE :: using_mpi LOGICAL,SAVE :: is_mpi_root LOGICAL,SAVE :: is_mpi_master INTEGER,SAVE :: mpi_master INTEGER,SAVE :: id_mpi ! id for profiling INTEGER, SAVE :: device_id INTERFACE allocate_mpi_buffer MODULE PROCEDURE allocate_mpi_buffer_r2, allocate_mpi_buffer_r3,allocate_mpi_buffer_r4 END INTERFACE allocate_mpi_buffer INTERFACE free_mpi_buffer MODULE PROCEDURE free_mpi_buffer_r2, free_mpi_buffer_r3, free_mpi_buffer_r4 END INTERFACE free_mpi_buffer PRIVATE :: getin CONTAINS SUBROUTINE getin(name,value) ! Copied from getin.f90 to avoid circular dependency USE ioipsl, ONLY : getin_=>getin USE transfert_omp_mod USE omp_para IMPLICIT NONE CHARACTER(LEN=*) :: name CHARACTER(LEN=*) :: value !$OMP MASTER CALL getin_(name,value) IF(is_mpi_root) PRINT *,'GETIN ',TRIM(name),' = ', TRIM(value) !$OMP END MASTER IF (omp_in_parallel()) CALL bcast_omp(value) END SUBROUTINE getin SUBROUTINE init_mpipara USE mpi_mod #ifdef CPP_USING_XIOS USE xios #endif USE abort_mod IMPLICIT NONE CHARACTER(LEN=256) :: required_mode_str INTEGER :: required_mode using_mpi=.FALSE. #ifdef CPP_USING_MPI using_mpi=.TRUE. #endif IF (using_mpi) THEN required_mode_str='funneled' CALL getin('mpi_threading_mode',required_mode_str) SELECT CASE(TRIM(required_mode_str)) CASE ('single') required_mode=MPI_THREAD_SINGLE CASE ('funneled') required_mode=MPI_THREAD_FUNNELED CASE ('serialized') required_mode=MPI_THREAD_SERIALIZED CASE ('multiple') required_mode=MPI_THREAD_MULTIPLE CASE DEFAULT PRINT*,'Bad selector for variable mpi_threading_mode : <', TRIM(required_mode_str), & '> => options are , , , ' STOP END SELECT IF (required_mode==MPI_THREAD_SERIALIZED .OR. required_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode /= 'single' .AND. mpi_threading_mode /= 'funneled'") ENDIF IF (required_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD required' IF (required_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED required' IF (required_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED required' IF (required_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE required' CALL MPI_INIT_THREAD(required_mode,mpi_threading_mode,ierr) IF (mpi_threading_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD provided' IF (mpi_threading_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED provided' IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED provided' IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE provided' IF (mpi_threading_mode > required_mode) mpi_threading_mode=required_mode IF (mpi_threading_mode==MPI_THREAD_SINGLE) THEN PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD used : Warning : openMP is not garanted to work' ENDIF IF (mpi_threading_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED used' IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED used' IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE used' #ifdef CPP_USING_XIOS CALL xios_initialize("icosagcm",return_comm=comm_icosa) #else comm_icosa=MPI_COMM_WORLD #endif CALL MPI_COMM_SIZE(comm_icosa,mpi_size,ierr) CALL MPI_COMM_RANK(comm_icosa,mpi_rank,ierr) PRINT *, 'MPI Process ', mpi_rank, '/', mpi_size ELSE comm_icosa=-1 mpi_size=1 mpi_rank=0 ENDIF mpi_master=0 IF (mpi_rank==0) THEN is_mpi_root=.TRUE. is_mpi_master=.TRUE. ELSE is_mpi_root=.FALSE. is_mpi_master=.FALSE. ENDIF #ifdef _OPENACC device_id = setDevice(mpi_size, mpi_rank) PRINT *, 'GPU device ', device_id #else device_id = -1 #endif END SUBROUTINE init_mpipara SUBROUTINE finalize_mpipara USE mpi_mod #ifdef CPP_USING_XIOS USE xios #endif IMPLICIT NONE #ifdef CPP_USING_XIOS CALL xios_finalize #endif IF (using_mpi) CALL MPI_FINALIZE(ierr) END SUBROUTINE finalize_mpipara SUBROUTINE allocate_mpi_buffer_r2(buffer,length) USE ISO_C_BINDING USE mpi_mod USE prec IMPLICIT NONE REAL(rstd), POINTER :: buffer(:) INTEGER,INTENT(IN) :: length TYPE(C_PTR) :: base_ptr INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size INTEGER :: ierr CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) size=length*real_size CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) CALL C_F_POINTER(base_ptr, buffer, (/ length /)) END SUBROUTINE allocate_mpi_buffer_r2 SUBROUTINE free_mpi_buffer_r2(buffer) USE ISO_C_BINDING USE mpi_mod USE prec IMPLICIT NONE REAL(rstd), POINTER :: buffer(:) CALL MPI_FREE_MEM(buffer,ierr) END SUBROUTINE free_mpi_buffer_r2 SUBROUTINE allocate_mpi_buffer_r3(buffer,length,dim3) USE ISO_C_BINDING USE mpi_mod USE prec IMPLICIT NONE REAL(rstd), POINTER :: buffer(:,:) INTEGER,INTENT(IN) :: length INTEGER,INTENT(IN) :: dim3 TYPE(C_PTR) :: base_ptr INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size INTEGER :: ierr CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) size=length*real_size*dim3 CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) CALL C_F_POINTER(base_ptr, buffer, (/ length,dim3 /)) END SUBROUTINE allocate_mpi_buffer_r3 SUBROUTINE free_mpi_buffer_r3(buffer) USE ISO_C_BINDING USE mpi_mod USE prec IMPLICIT NONE REAL(rstd), POINTER :: buffer(:,:) CALL MPI_FREE_MEM(buffer,ierr) END SUBROUTINE free_mpi_buffer_r3 SUBROUTINE allocate_mpi_buffer_r4(buffer,length,dim3,dim4) USE ISO_C_BINDING USE mpi_mod USE prec IMPLICIT NONE REAL(rstd), POINTER :: buffer(:,:,:) INTEGER,INTENT(IN) :: length INTEGER,INTENT(IN) :: dim3 INTEGER,INTENT(IN) :: dim4 TYPE(C_PTR) :: base_ptr INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size INTEGER :: ierr CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) size=length*real_size*dim3*dim4 CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) CALL C_F_POINTER(base_ptr, buffer, (/ length, dim3, dim4 /)) END SUBROUTINE allocate_mpi_buffer_r4 SUBROUTINE free_mpi_buffer_r4(buffer) USE ISO_C_BINDING USE mpi_mod USE prec IMPLICIT NONE REAL(rstd), POINTER :: buffer(:,:,:) CALL MPI_FREE_MEM(buffer,ierr) END SUBROUTINE free_mpi_buffer_r4 #ifdef _OPENACC FUNCTION setDevice(nprocs, myrank) use iso_c_binding use openacc USE mpi_mod implicit none interface function gethostid() bind(C) use iso_c_binding integer(C_INT) :: gethostid end function gethostid end interface integer, intent(in) :: nprocs, myrank integer :: hostids(nprocs), localprocs(nprocs) integer :: hostid, ierr, numdev, mydev, i, numlocal integer :: setDevice ! get the hostids so we can determine what other processes are on this node hostid = gethostid() call mpi_allgather(hostid,1,MPI_INTEGER,hostids,1,MPI_INTEGER, MPI_COMM_WORLD, ierr) ! determine which processors are on this node numlocal = 0 localprocs(:) = 0 do i = 1, nprocs if (hostid == hostids(i)) then localprocs(i) = numlocal numlocal = numlocal + 1 end if end do ! get the number of device on this node numdev = acc_get_num_devices(ACC_DEVICE_NVIDIA) if (numdev < 1) then print *, "Error: there are no devices available on this host. ABORTING", myrank stop end if ! print a warning if the number of devices is less than the number of processes on this node. Having multiple processes share a devices is not recommended if (numdev < numlocal) then if (localprocs(myrank+1) == 1) then ! print warning message only once per node print *, "WARNING: the number of process is greater than the number of GPUs.", myrank end if mydev = mod(localprocs(myrank+1), numdev) else mydev = localprocs(myrank+1) end if call acc_set_device_num(mydev,ACC_DEVICE_NVIDIA) call acc_init(ACC_DEVICE_NVIDIA) setDevice = acc_get_device_num(ACC_DEVICE_NVIDIA) END FUNCTION setDevice #endif END MODULE mpipara