[26] | 1 | MODULE mpipara |
---|
| 2 | |
---|
| 3 | INTEGER,SAVE :: mpi_rank |
---|
| 4 | INTEGER,SAVE :: mpi_size |
---|
[186] | 5 | INTEGER,SAVE :: mpi_threading_mode |
---|
[26] | 6 | |
---|
| 7 | INTEGER,SAVE :: comm_icosa |
---|
| 8 | INTEGER,SAVE :: ierr |
---|
| 9 | LOGICAL,SAVE :: using_mpi |
---|
| 10 | LOGICAL,SAVE :: is_mpi_root |
---|
[266] | 11 | LOGICAL,SAVE :: is_mpi_master |
---|
[402] | 12 | INTEGER,SAVE :: mpi_master |
---|
[151] | 13 | |
---|
[667] | 14 | INTEGER,SAVE :: id_mpi ! id for profiling |
---|
| 15 | |
---|
[953] | 16 | INTEGER, SAVE :: device_id |
---|
| 17 | |
---|
[151] | 18 | INTERFACE allocate_mpi_buffer |
---|
| 19 | MODULE PROCEDURE allocate_mpi_buffer_r2, allocate_mpi_buffer_r3,allocate_mpi_buffer_r4 |
---|
| 20 | END INTERFACE allocate_mpi_buffer |
---|
[26] | 21 | |
---|
[186] | 22 | INTERFACE free_mpi_buffer |
---|
| 23 | MODULE PROCEDURE free_mpi_buffer_r2, free_mpi_buffer_r3, free_mpi_buffer_r4 |
---|
| 24 | END INTERFACE free_mpi_buffer |
---|
| 25 | |
---|
[216] | 26 | PRIVATE :: getin |
---|
| 27 | |
---|
[26] | 28 | CONTAINS |
---|
| 29 | |
---|
[216] | 30 | SUBROUTINE getin(name,value) ! Copied from getin.f90 to avoid circular dependency |
---|
| 31 | USE ioipsl, ONLY : getin_=>getin |
---|
| 32 | USE transfert_omp_mod |
---|
| 33 | USE omp_para |
---|
| 34 | IMPLICIT NONE |
---|
| 35 | CHARACTER(LEN=*) :: name |
---|
| 36 | CHARACTER(LEN=*) :: value |
---|
| 37 | |
---|
| 38 | !$OMP MASTER |
---|
| 39 | CALL getin_(name,value) |
---|
| 40 | IF(is_mpi_root) PRINT *,'GETIN ',TRIM(name),' = ', TRIM(value) |
---|
| 41 | !$OMP END MASTER |
---|
| 42 | IF (omp_in_parallel()) CALL bcast_omp(value) |
---|
| 43 | END SUBROUTINE getin |
---|
| 44 | |
---|
[26] | 45 | SUBROUTINE init_mpipara |
---|
| 46 | USE mpi_mod |
---|
[171] | 47 | #ifdef CPP_USING_XIOS |
---|
| 48 | USE xios |
---|
| 49 | #endif |
---|
[953] | 50 | USE abort_mod |
---|
[26] | 51 | IMPLICIT NONE |
---|
[186] | 52 | CHARACTER(LEN=256) :: required_mode_str |
---|
| 53 | INTEGER :: required_mode |
---|
[26] | 54 | |
---|
| 55 | using_mpi=.FALSE. |
---|
| 56 | #ifdef CPP_USING_MPI |
---|
| 57 | using_mpi=.TRUE. |
---|
| 58 | #endif |
---|
| 59 | |
---|
| 60 | IF (using_mpi) THEN |
---|
[186] | 61 | |
---|
[708] | 62 | required_mode_str='funneled' |
---|
[186] | 63 | CALL getin('mpi_threading_mode',required_mode_str) |
---|
| 64 | |
---|
| 65 | SELECT CASE(TRIM(required_mode_str)) |
---|
| 66 | CASE ('single') |
---|
| 67 | required_mode=MPI_THREAD_SINGLE |
---|
| 68 | CASE ('funneled') |
---|
| 69 | required_mode=MPI_THREAD_FUNNELED |
---|
| 70 | CASE ('serialized') |
---|
| 71 | required_mode=MPI_THREAD_SERIALIZED |
---|
| 72 | CASE ('multiple') |
---|
| 73 | required_mode=MPI_THREAD_MULTIPLE |
---|
| 74 | CASE DEFAULT |
---|
| 75 | PRINT*,'Bad selector for variable mpi_threading_mode : <', TRIM(required_mode_str), & |
---|
| 76 | '> => options are <single>, <funneled>, <serialized>, <multiple>' |
---|
| 77 | STOP |
---|
| 78 | END SELECT |
---|
| 79 | |
---|
[953] | 80 | IF (required_mode==MPI_THREAD_SERIALIZED .OR. required_mode==MPI_THREAD_MULTIPLE) THEN |
---|
| 81 | CALL abort_acc("mpi_threading_mode /= 'single' .AND. mpi_threading_mode /= 'funneled'") |
---|
| 82 | ENDIF |
---|
[171] | 83 | |
---|
[186] | 84 | IF (required_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD required' |
---|
| 85 | IF (required_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED required' |
---|
| 86 | IF (required_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED required' |
---|
| 87 | IF (required_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE required' |
---|
| 88 | |
---|
[708] | 89 | CALL MPI_INIT_THREAD(required_mode,mpi_threading_mode,ierr) |
---|
[186] | 90 | |
---|
| 91 | IF (mpi_threading_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD provided' |
---|
| 92 | IF (mpi_threading_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED provided' |
---|
| 93 | IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED provided' |
---|
| 94 | IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE provided' |
---|
| 95 | |
---|
| 96 | IF (mpi_threading_mode > required_mode) mpi_threading_mode=required_mode |
---|
| 97 | |
---|
[193] | 98 | IF (mpi_threading_mode==MPI_THREAD_SINGLE) THEN |
---|
| 99 | PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD used : Warning : openMP is not garanted to work' |
---|
| 100 | ENDIF |
---|
[186] | 101 | IF (mpi_threading_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED used' |
---|
| 102 | IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED used' |
---|
| 103 | IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE used' |
---|
| 104 | |
---|
[171] | 105 | #ifdef CPP_USING_XIOS |
---|
| 106 | CALL xios_initialize("icosagcm",return_comm=comm_icosa) |
---|
| 107 | #else |
---|
| 108 | comm_icosa=MPI_COMM_WORLD |
---|
| 109 | #endif |
---|
[26] | 110 | CALL MPI_COMM_SIZE(comm_icosa,mpi_size,ierr) |
---|
| 111 | CALL MPI_COMM_RANK(comm_icosa,mpi_rank,ierr) |
---|
[118] | 112 | PRINT *, 'MPI Process ', mpi_rank, '/', mpi_size |
---|
[26] | 113 | ELSE |
---|
| 114 | comm_icosa=-1 |
---|
| 115 | mpi_size=1 |
---|
| 116 | mpi_rank=0 |
---|
| 117 | ENDIF |
---|
| 118 | |
---|
[266] | 119 | mpi_master=0 |
---|
[26] | 120 | IF (mpi_rank==0) THEN |
---|
| 121 | is_mpi_root=.TRUE. |
---|
[266] | 122 | is_mpi_master=.TRUE. |
---|
[26] | 123 | ELSE |
---|
| 124 | is_mpi_root=.FALSE. |
---|
[266] | 125 | is_mpi_master=.FALSE. |
---|
[26] | 126 | ENDIF |
---|
| 127 | |
---|
[953] | 128 | |
---|
| 129 | #ifdef _OPENACC |
---|
| 130 | device_id = setDevice(mpi_size, mpi_rank) |
---|
| 131 | PRINT *, 'GPU device ', device_id |
---|
| 132 | #else |
---|
| 133 | device_id = -1 |
---|
| 134 | #endif |
---|
| 135 | |
---|
[26] | 136 | END SUBROUTINE init_mpipara |
---|
| 137 | |
---|
| 138 | SUBROUTINE finalize_mpipara |
---|
| 139 | USE mpi_mod |
---|
[266] | 140 | #ifdef CPP_USING_XIOS |
---|
| 141 | USE xios |
---|
| 142 | #endif |
---|
[26] | 143 | IMPLICIT NONE |
---|
| 144 | |
---|
[266] | 145 | #ifdef CPP_USING_XIOS |
---|
| 146 | CALL xios_finalize |
---|
| 147 | #endif |
---|
[26] | 148 | IF (using_mpi) CALL MPI_FINALIZE(ierr) |
---|
| 149 | |
---|
| 150 | END SUBROUTINE finalize_mpipara |
---|
| 151 | |
---|
[151] | 152 | |
---|
| 153 | SUBROUTINE allocate_mpi_buffer_r2(buffer,length) |
---|
| 154 | USE ISO_C_BINDING |
---|
| 155 | USE mpi_mod |
---|
| 156 | USE prec |
---|
| 157 | IMPLICIT NONE |
---|
| 158 | REAL(rstd), POINTER :: buffer(:) |
---|
| 159 | INTEGER,INTENT(IN) :: length |
---|
| 160 | |
---|
| 161 | TYPE(C_PTR) :: base_ptr |
---|
[892] | 162 | INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size |
---|
| 163 | INTEGER :: ierr |
---|
[151] | 164 | |
---|
[892] | 165 | CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) |
---|
[151] | 166 | size=length*real_size |
---|
| 167 | |
---|
| 168 | CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) |
---|
| 169 | CALL C_F_POINTER(base_ptr, buffer, (/ length /)) |
---|
| 170 | |
---|
[186] | 171 | END SUBROUTINE allocate_mpi_buffer_r2 |
---|
[151] | 172 | |
---|
[186] | 173 | SUBROUTINE free_mpi_buffer_r2(buffer) |
---|
| 174 | USE ISO_C_BINDING |
---|
| 175 | USE mpi_mod |
---|
| 176 | USE prec |
---|
| 177 | IMPLICIT NONE |
---|
| 178 | REAL(rstd), POINTER :: buffer(:) |
---|
| 179 | |
---|
| 180 | CALL MPI_FREE_MEM(buffer,ierr) |
---|
| 181 | |
---|
| 182 | END SUBROUTINE free_mpi_buffer_r2 |
---|
| 183 | |
---|
[151] | 184 | SUBROUTINE allocate_mpi_buffer_r3(buffer,length,dim3) |
---|
| 185 | USE ISO_C_BINDING |
---|
| 186 | USE mpi_mod |
---|
| 187 | USE prec |
---|
| 188 | IMPLICIT NONE |
---|
| 189 | REAL(rstd), POINTER :: buffer(:,:) |
---|
| 190 | INTEGER,INTENT(IN) :: length |
---|
| 191 | INTEGER,INTENT(IN) :: dim3 |
---|
| 192 | |
---|
| 193 | TYPE(C_PTR) :: base_ptr |
---|
[892] | 194 | INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size |
---|
| 195 | INTEGER :: ierr |
---|
[151] | 196 | |
---|
[892] | 197 | CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) |
---|
[151] | 198 | size=length*real_size*dim3 |
---|
| 199 | |
---|
| 200 | CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) |
---|
| 201 | CALL C_F_POINTER(base_ptr, buffer, (/ length,dim3 /)) |
---|
| 202 | |
---|
[186] | 203 | END SUBROUTINE allocate_mpi_buffer_r3 |
---|
[151] | 204 | |
---|
[186] | 205 | SUBROUTINE free_mpi_buffer_r3(buffer) |
---|
| 206 | USE ISO_C_BINDING |
---|
| 207 | USE mpi_mod |
---|
| 208 | USE prec |
---|
| 209 | IMPLICIT NONE |
---|
| 210 | REAL(rstd), POINTER :: buffer(:,:) |
---|
| 211 | |
---|
| 212 | CALL MPI_FREE_MEM(buffer,ierr) |
---|
| 213 | |
---|
| 214 | END SUBROUTINE free_mpi_buffer_r3 |
---|
| 215 | |
---|
[151] | 216 | SUBROUTINE allocate_mpi_buffer_r4(buffer,length,dim3,dim4) |
---|
| 217 | USE ISO_C_BINDING |
---|
| 218 | USE mpi_mod |
---|
| 219 | USE prec |
---|
| 220 | IMPLICIT NONE |
---|
| 221 | REAL(rstd), POINTER :: buffer(:,:,:) |
---|
| 222 | INTEGER,INTENT(IN) :: length |
---|
| 223 | INTEGER,INTENT(IN) :: dim3 |
---|
| 224 | INTEGER,INTENT(IN) :: dim4 |
---|
| 225 | |
---|
| 226 | TYPE(C_PTR) :: base_ptr |
---|
[892] | 227 | INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size |
---|
| 228 | INTEGER :: ierr |
---|
[151] | 229 | |
---|
[892] | 230 | CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) |
---|
[151] | 231 | size=length*real_size*dim3*dim4 |
---|
| 232 | |
---|
| 233 | CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) |
---|
| 234 | CALL C_F_POINTER(base_ptr, buffer, (/ length, dim3, dim4 /)) |
---|
| 235 | |
---|
| 236 | END SUBROUTINE allocate_mpi_buffer_r4 |
---|
[186] | 237 | |
---|
| 238 | SUBROUTINE free_mpi_buffer_r4(buffer) |
---|
| 239 | USE ISO_C_BINDING |
---|
| 240 | USE mpi_mod |
---|
| 241 | USE prec |
---|
| 242 | IMPLICIT NONE |
---|
| 243 | REAL(rstd), POINTER :: buffer(:,:,:) |
---|
| 244 | |
---|
| 245 | CALL MPI_FREE_MEM(buffer,ierr) |
---|
| 246 | |
---|
| 247 | END SUBROUTINE free_mpi_buffer_r4 |
---|
[26] | 248 | |
---|
[953] | 249 | #ifdef _OPENACC |
---|
| 250 | FUNCTION setDevice(nprocs, myrank) |
---|
| 251 | use iso_c_binding |
---|
| 252 | use openacc |
---|
| 253 | USE mpi_mod |
---|
| 254 | implicit none |
---|
| 255 | |
---|
| 256 | interface |
---|
| 257 | function gethostid() bind(C) |
---|
| 258 | use iso_c_binding |
---|
| 259 | integer(C_INT) :: gethostid |
---|
| 260 | end function gethostid |
---|
| 261 | end interface |
---|
| 262 | |
---|
| 263 | integer, intent(in) :: nprocs, myrank |
---|
| 264 | integer :: hostids(nprocs), localprocs(nprocs) |
---|
| 265 | integer :: hostid, ierr, numdev, mydev, i, numlocal |
---|
| 266 | integer :: setDevice |
---|
| 267 | |
---|
| 268 | ! get the hostids so we can determine what other processes are on this node |
---|
| 269 | hostid = gethostid() |
---|
| 270 | call mpi_allgather(hostid,1,MPI_INTEGER,hostids,1,MPI_INTEGER, MPI_COMM_WORLD, ierr) |
---|
| 271 | |
---|
| 272 | ! determine which processors are on this node |
---|
| 273 | numlocal = 0 |
---|
| 274 | localprocs(:) = 0 |
---|
| 275 | do i = 1, nprocs |
---|
| 276 | if (hostid == hostids(i)) then |
---|
| 277 | localprocs(i) = numlocal |
---|
| 278 | numlocal = numlocal + 1 |
---|
| 279 | end if |
---|
| 280 | end do |
---|
| 281 | |
---|
| 282 | ! get the number of device on this node |
---|
| 283 | numdev = acc_get_num_devices(ACC_DEVICE_NVIDIA) |
---|
| 284 | |
---|
| 285 | if (numdev < 1) then |
---|
| 286 | print *, "Error: there are no devices available on this host. ABORTING", myrank |
---|
| 287 | stop |
---|
| 288 | end if |
---|
| 289 | |
---|
| 290 | ! print a warning if the number of devices is less than the number of processes on this node. Having multiple processes share a devices is not recommended |
---|
| 291 | if (numdev < numlocal) then |
---|
| 292 | if (localprocs(myrank+1) == 1) then |
---|
| 293 | ! print warning message only once per node |
---|
| 294 | print *, "WARNING: the number of process is greater than the number of GPUs.", myrank |
---|
| 295 | end if |
---|
| 296 | mydev = mod(localprocs(myrank+1), numdev) |
---|
| 297 | else |
---|
| 298 | mydev = localprocs(myrank+1) |
---|
| 299 | end if |
---|
| 300 | |
---|
| 301 | call acc_set_device_num(mydev,ACC_DEVICE_NVIDIA) |
---|
| 302 | call acc_init(ACC_DEVICE_NVIDIA) |
---|
| 303 | setDevice = acc_get_device_num(ACC_DEVICE_NVIDIA) |
---|
| 304 | END FUNCTION setDevice |
---|
| 305 | |
---|
| 306 | #endif |
---|
| 307 | |
---|
| 308 | |
---|
| 309 | |
---|
[26] | 310 | END MODULE mpipara |
---|