1 | MODULE mpipara |
---|
2 | |
---|
3 | INTEGER,SAVE :: mpi_rank |
---|
4 | INTEGER,SAVE :: mpi_size |
---|
5 | INTEGER,SAVE :: mpi_threading_mode |
---|
6 | |
---|
7 | INTEGER,SAVE :: comm_icosa |
---|
8 | INTEGER,SAVE :: ierr |
---|
9 | LOGICAL,SAVE :: using_mpi |
---|
10 | LOGICAL,SAVE :: is_mpi_root |
---|
11 | LOGICAL,SAVE :: is_mpi_master |
---|
12 | INTEGER,SAVE :: mpi_master |
---|
13 | |
---|
14 | INTEGER,SAVE :: id_mpi ! id for profiling |
---|
15 | |
---|
16 | INTEGER, SAVE :: device_id |
---|
17 | |
---|
18 | INTERFACE allocate_mpi_buffer |
---|
19 | MODULE PROCEDURE allocate_mpi_buffer_r2, allocate_mpi_buffer_r3,allocate_mpi_buffer_r4 |
---|
20 | END INTERFACE allocate_mpi_buffer |
---|
21 | |
---|
22 | INTERFACE free_mpi_buffer |
---|
23 | MODULE PROCEDURE free_mpi_buffer_r2, free_mpi_buffer_r3, free_mpi_buffer_r4 |
---|
24 | END INTERFACE free_mpi_buffer |
---|
25 | |
---|
26 | PRIVATE :: getin |
---|
27 | |
---|
28 | CONTAINS |
---|
29 | |
---|
30 | SUBROUTINE getin(name,value) ! Copied from getin.f90 to avoid circular dependency |
---|
31 | USE ioipsl, ONLY : getin_=>getin |
---|
32 | USE transfert_omp_mod |
---|
33 | USE omp_para |
---|
34 | IMPLICIT NONE |
---|
35 | CHARACTER(LEN=*) :: name |
---|
36 | CHARACTER(LEN=*) :: value |
---|
37 | |
---|
38 | !$OMP MASTER |
---|
39 | CALL getin_(name,value) |
---|
40 | IF(is_mpi_root) PRINT *,'GETIN ',TRIM(name),' = ', TRIM(value) |
---|
41 | !$OMP END MASTER |
---|
42 | IF (omp_in_parallel()) CALL bcast_omp(value) |
---|
43 | END SUBROUTINE getin |
---|
44 | |
---|
45 | SUBROUTINE init_mpipara |
---|
46 | USE mpi_mod |
---|
47 | #ifdef CPP_USING_XIOS |
---|
48 | USE xios |
---|
49 | #endif |
---|
50 | USE abort_mod |
---|
51 | IMPLICIT NONE |
---|
52 | CHARACTER(LEN=256) :: required_mode_str |
---|
53 | INTEGER :: required_mode |
---|
54 | |
---|
55 | using_mpi=.FALSE. |
---|
56 | #ifdef CPP_USING_MPI |
---|
57 | using_mpi=.TRUE. |
---|
58 | #endif |
---|
59 | |
---|
60 | IF (using_mpi) THEN |
---|
61 | |
---|
62 | required_mode_str='funneled' |
---|
63 | CALL getin('mpi_threading_mode',required_mode_str) |
---|
64 | |
---|
65 | SELECT CASE(TRIM(required_mode_str)) |
---|
66 | CASE ('single') |
---|
67 | required_mode=MPI_THREAD_SINGLE |
---|
68 | CASE ('funneled') |
---|
69 | required_mode=MPI_THREAD_FUNNELED |
---|
70 | CASE ('serialized') |
---|
71 | required_mode=MPI_THREAD_SERIALIZED |
---|
72 | CASE ('multiple') |
---|
73 | required_mode=MPI_THREAD_MULTIPLE |
---|
74 | CASE DEFAULT |
---|
75 | PRINT*,'Bad selector for variable mpi_threading_mode : <', TRIM(required_mode_str), & |
---|
76 | '> => options are <single>, <funneled>, <serialized>, <multiple>' |
---|
77 | STOP |
---|
78 | END SELECT |
---|
79 | |
---|
80 | IF (required_mode==MPI_THREAD_SERIALIZED .OR. required_mode==MPI_THREAD_MULTIPLE) THEN |
---|
81 | CALL abort_acc("mpi_threading_mode /= 'single' .AND. mpi_threading_mode /= 'funneled'") |
---|
82 | ENDIF |
---|
83 | |
---|
84 | IF (required_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD required' |
---|
85 | IF (required_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED required' |
---|
86 | IF (required_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED required' |
---|
87 | IF (required_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE required' |
---|
88 | |
---|
89 | CALL MPI_INIT_THREAD(required_mode,mpi_threading_mode,ierr) |
---|
90 | |
---|
91 | IF (mpi_threading_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD provided' |
---|
92 | IF (mpi_threading_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED provided' |
---|
93 | IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED provided' |
---|
94 | IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE provided' |
---|
95 | |
---|
96 | IF (mpi_threading_mode > required_mode) mpi_threading_mode=required_mode |
---|
97 | |
---|
98 | IF (mpi_threading_mode==MPI_THREAD_SINGLE) THEN |
---|
99 | PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD used : Warning : openMP is not garanted to work' |
---|
100 | ENDIF |
---|
101 | IF (mpi_threading_mode==MPI_THREAD_FUNNELED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_FUNNELED used' |
---|
102 | IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_SERIALIZED used' |
---|
103 | IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) PRINT*,'MPI_INIT_THREAD : MPI_THREAD_MULTIPLE used' |
---|
104 | |
---|
105 | #ifdef CPP_USING_XIOS |
---|
106 | CALL xios_initialize("icosagcm",return_comm=comm_icosa) |
---|
107 | #else |
---|
108 | comm_icosa=MPI_COMM_WORLD |
---|
109 | #endif |
---|
110 | CALL MPI_COMM_SIZE(comm_icosa,mpi_size,ierr) |
---|
111 | CALL MPI_COMM_RANK(comm_icosa,mpi_rank,ierr) |
---|
112 | PRINT *, 'MPI Process ', mpi_rank, '/', mpi_size |
---|
113 | ELSE |
---|
114 | comm_icosa=-1 |
---|
115 | mpi_size=1 |
---|
116 | mpi_rank=0 |
---|
117 | ENDIF |
---|
118 | |
---|
119 | mpi_master=0 |
---|
120 | IF (mpi_rank==0) THEN |
---|
121 | is_mpi_root=.TRUE. |
---|
122 | is_mpi_master=.TRUE. |
---|
123 | ELSE |
---|
124 | is_mpi_root=.FALSE. |
---|
125 | is_mpi_master=.FALSE. |
---|
126 | ENDIF |
---|
127 | |
---|
128 | |
---|
129 | #ifdef _OPENACC |
---|
130 | device_id = setDevice(mpi_size, mpi_rank) |
---|
131 | PRINT *, 'GPU device ', device_id |
---|
132 | #else |
---|
133 | device_id = -1 |
---|
134 | #endif |
---|
135 | |
---|
136 | END SUBROUTINE init_mpipara |
---|
137 | |
---|
138 | SUBROUTINE finalize_mpipara |
---|
139 | USE mpi_mod |
---|
140 | #ifdef CPP_USING_XIOS |
---|
141 | USE xios |
---|
142 | #endif |
---|
143 | IMPLICIT NONE |
---|
144 | |
---|
145 | #ifdef CPP_USING_XIOS |
---|
146 | CALL xios_finalize |
---|
147 | #endif |
---|
148 | IF (using_mpi) CALL MPI_FINALIZE(ierr) |
---|
149 | |
---|
150 | END SUBROUTINE finalize_mpipara |
---|
151 | |
---|
152 | |
---|
153 | SUBROUTINE allocate_mpi_buffer_r2(buffer,length) |
---|
154 | USE ISO_C_BINDING |
---|
155 | USE mpi_mod |
---|
156 | USE prec |
---|
157 | IMPLICIT NONE |
---|
158 | REAL(rstd), POINTER :: buffer(:) |
---|
159 | INTEGER,INTENT(IN) :: length |
---|
160 | |
---|
161 | TYPE(C_PTR) :: base_ptr |
---|
162 | INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size |
---|
163 | INTEGER :: ierr |
---|
164 | |
---|
165 | CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) |
---|
166 | size=length*real_size |
---|
167 | |
---|
168 | CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) |
---|
169 | CALL C_F_POINTER(base_ptr, buffer, (/ length /)) |
---|
170 | |
---|
171 | END SUBROUTINE allocate_mpi_buffer_r2 |
---|
172 | |
---|
173 | SUBROUTINE free_mpi_buffer_r2(buffer) |
---|
174 | USE ISO_C_BINDING |
---|
175 | USE mpi_mod |
---|
176 | USE prec |
---|
177 | IMPLICIT NONE |
---|
178 | REAL(rstd), POINTER :: buffer(:) |
---|
179 | |
---|
180 | CALL MPI_FREE_MEM(buffer,ierr) |
---|
181 | |
---|
182 | END SUBROUTINE free_mpi_buffer_r2 |
---|
183 | |
---|
184 | SUBROUTINE allocate_mpi_buffer_r3(buffer,length,dim3) |
---|
185 | USE ISO_C_BINDING |
---|
186 | USE mpi_mod |
---|
187 | USE prec |
---|
188 | IMPLICIT NONE |
---|
189 | REAL(rstd), POINTER :: buffer(:,:) |
---|
190 | INTEGER,INTENT(IN) :: length |
---|
191 | INTEGER,INTENT(IN) :: dim3 |
---|
192 | |
---|
193 | TYPE(C_PTR) :: base_ptr |
---|
194 | INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size |
---|
195 | INTEGER :: ierr |
---|
196 | |
---|
197 | CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) |
---|
198 | size=length*real_size*dim3 |
---|
199 | |
---|
200 | CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) |
---|
201 | CALL C_F_POINTER(base_ptr, buffer, (/ length,dim3 /)) |
---|
202 | |
---|
203 | END SUBROUTINE allocate_mpi_buffer_r3 |
---|
204 | |
---|
205 | SUBROUTINE free_mpi_buffer_r3(buffer) |
---|
206 | USE ISO_C_BINDING |
---|
207 | USE mpi_mod |
---|
208 | USE prec |
---|
209 | IMPLICIT NONE |
---|
210 | REAL(rstd), POINTER :: buffer(:,:) |
---|
211 | |
---|
212 | CALL MPI_FREE_MEM(buffer,ierr) |
---|
213 | |
---|
214 | END SUBROUTINE free_mpi_buffer_r3 |
---|
215 | |
---|
216 | SUBROUTINE allocate_mpi_buffer_r4(buffer,length,dim3,dim4) |
---|
217 | USE ISO_C_BINDING |
---|
218 | USE mpi_mod |
---|
219 | USE prec |
---|
220 | IMPLICIT NONE |
---|
221 | REAL(rstd), POINTER :: buffer(:,:,:) |
---|
222 | INTEGER,INTENT(IN) :: length |
---|
223 | INTEGER,INTENT(IN) :: dim3 |
---|
224 | INTEGER,INTENT(IN) :: dim4 |
---|
225 | |
---|
226 | TYPE(C_PTR) :: base_ptr |
---|
227 | INTEGER(KIND=MPI_ADDRESS_KIND) :: real_size,lb,size |
---|
228 | INTEGER :: ierr |
---|
229 | |
---|
230 | CALL MPI_Type_get_extent(MPI_REAL8, lb, real_size, ierr) |
---|
231 | size=length*real_size*dim3*dim4 |
---|
232 | |
---|
233 | CALL MPI_ALLOC_MEM(size,MPI_INFO_NULL,base_ptr,ierr) |
---|
234 | CALL C_F_POINTER(base_ptr, buffer, (/ length, dim3, dim4 /)) |
---|
235 | |
---|
236 | END SUBROUTINE allocate_mpi_buffer_r4 |
---|
237 | |
---|
238 | SUBROUTINE free_mpi_buffer_r4(buffer) |
---|
239 | USE ISO_C_BINDING |
---|
240 | USE mpi_mod |
---|
241 | USE prec |
---|
242 | IMPLICIT NONE |
---|
243 | REAL(rstd), POINTER :: buffer(:,:,:) |
---|
244 | |
---|
245 | CALL MPI_FREE_MEM(buffer,ierr) |
---|
246 | |
---|
247 | END SUBROUTINE free_mpi_buffer_r4 |
---|
248 | |
---|
249 | #ifdef _OPENACC |
---|
250 | FUNCTION setDevice(nprocs, myrank) |
---|
251 | use iso_c_binding |
---|
252 | use openacc |
---|
253 | USE mpi_mod |
---|
254 | implicit none |
---|
255 | |
---|
256 | interface |
---|
257 | function gethostid() bind(C) |
---|
258 | use iso_c_binding |
---|
259 | integer(C_INT) :: gethostid |
---|
260 | end function gethostid |
---|
261 | end interface |
---|
262 | |
---|
263 | integer, intent(in) :: nprocs, myrank |
---|
264 | integer :: hostids(nprocs), localprocs(nprocs) |
---|
265 | integer :: hostid, ierr, numdev, mydev, i, numlocal |
---|
266 | integer :: setDevice |
---|
267 | |
---|
268 | ! get the hostids so we can determine what other processes are on this node |
---|
269 | hostid = gethostid() |
---|
270 | call mpi_allgather(hostid,1,MPI_INTEGER,hostids,1,MPI_INTEGER, MPI_COMM_WORLD, ierr) |
---|
271 | |
---|
272 | ! determine which processors are on this node |
---|
273 | numlocal = 0 |
---|
274 | localprocs(:) = 0 |
---|
275 | do i = 1, nprocs |
---|
276 | if (hostid == hostids(i)) then |
---|
277 | localprocs(i) = numlocal |
---|
278 | numlocal = numlocal + 1 |
---|
279 | end if |
---|
280 | end do |
---|
281 | |
---|
282 | ! get the number of device on this node |
---|
283 | numdev = acc_get_num_devices(ACC_DEVICE_NVIDIA) |
---|
284 | |
---|
285 | if (numdev < 1) then |
---|
286 | print *, "Error: there are no devices available on this host. ABORTING", myrank |
---|
287 | stop |
---|
288 | end if |
---|
289 | |
---|
290 | ! print a warning if the number of devices is less than the number of processes on this node. Having multiple processes share a devices is not recommended |
---|
291 | if (numdev < numlocal) then |
---|
292 | if (localprocs(myrank+1) == 1) then |
---|
293 | ! print warning message only once per node |
---|
294 | print *, "WARNING: the number of process is greater than the number of GPUs.", myrank |
---|
295 | end if |
---|
296 | mydev = mod(localprocs(myrank+1), numdev) |
---|
297 | else |
---|
298 | mydev = localprocs(myrank+1) |
---|
299 | end if |
---|
300 | |
---|
301 | call acc_set_device_num(mydev,ACC_DEVICE_NVIDIA) |
---|
302 | call acc_init(ACC_DEVICE_NVIDIA) |
---|
303 | setDevice = acc_get_device_num(ACC_DEVICE_NVIDIA) |
---|
304 | END FUNCTION setDevice |
---|
305 | |
---|
306 | #endif |
---|
307 | |
---|
308 | |
---|
309 | |
---|
310 | END MODULE mpipara |
---|