1 | ! Module for MPI communication of field halos |
---|
2 | ! This module uses Fortran 2003 features : move_alloc intrinsic |
---|
3 | module transfert_mpi_mod |
---|
4 | use abort_mod, only : dynamico_abort |
---|
5 | use profiling_mod, only : register_id |
---|
6 | use field_mod, only : t_field |
---|
7 | use transfert_types_mod |
---|
8 | use transfert_request_mod |
---|
9 | implicit none |
---|
10 | private |
---|
11 | |
---|
12 | public :: t_message, t_request, & |
---|
13 | & req_i1, req_e1_scal, req_e1_vect, & |
---|
14 | & req_i0, req_e0_scal, req_e0_vect, & |
---|
15 | & req_z1_scal, & |
---|
16 | & send_message, & |
---|
17 | & wait_message, & |
---|
18 | & test_message, & |
---|
19 | & init_transfert, & |
---|
20 | & init_message, & |
---|
21 | & finalize_message |
---|
22 | |
---|
23 | contains |
---|
24 | |
---|
25 | ! Initialize transfert : must be called before any other transfert_mpi routines |
---|
26 | subroutine init_transfert |
---|
27 | use mpi_mod, only : MPI_THREAD_SINGLE, MPI_THREAD_FUNNELED |
---|
28 | use mpipara, only : mpi_threading_mode |
---|
29 | use profiling_mod, only : register_id |
---|
30 | |
---|
31 | !$omp master |
---|
32 | ! Check requested threads support |
---|
33 | if( mpi_threading_mode /= MPI_THREAD_SINGLE .and. mpi_threading_mode /= MPI_THREAD_FUNNELED ) call dynamico_abort("Only single and funneled threading mode are supported.") |
---|
34 | |
---|
35 | ! Register profiling ids |
---|
36 | call register_id("MPI", profile_mpi) |
---|
37 | if( profile_mpi_detail ) then |
---|
38 | call register_id("MPI_copies", profile_mpi_copies) |
---|
39 | call register_id("MPI_waitall", profile_mpi_waitall) |
---|
40 | call register_id("MPI_omp_barrier", profile_mpi_barrier) |
---|
41 | else |
---|
42 | profile_mpi_copies = profile_mpi |
---|
43 | profile_mpi_waitall = profile_mpi |
---|
44 | profile_mpi_barrier = profile_mpi |
---|
45 | endif |
---|
46 | |
---|
47 | ! Initialize requests |
---|
48 | call init_all_requests() |
---|
49 | !$omp end master |
---|
50 | !$omp barrier |
---|
51 | end subroutine init_transfert |
---|
52 | |
---|
53 | subroutine init_message(field, request, message, name) |
---|
54 | use mpi_mod |
---|
55 | use mpipara |
---|
56 | use domain_mod, only : ndomain_glo, domloc_glo_ind, domglo_loc_ind, domglo_rank |
---|
57 | type(t_field), pointer, intent(in) :: field(:) |
---|
58 | type(t_request),pointer, intent(inout) :: request(:) |
---|
59 | type(t_message), target, intent(out) :: message ! Needs intent out for call to finalize_message |
---|
60 | character(len=*), intent(in),optional :: name |
---|
61 | |
---|
62 | !$omp barrier |
---|
63 | !$omp master |
---|
64 | !init off-device |
---|
65 | message%ondevice=.false. |
---|
66 | message%send_seq = 0 |
---|
67 | message%wait_seq = 0 |
---|
68 | |
---|
69 | if( request(1)%field_type /= field(1)%field_type ) call dynamico_abort( "init_message : field_type/request mismatch" ) |
---|
70 | |
---|
71 | message%field => field |
---|
72 | message%request => request |
---|
73 | |
---|
74 | ! Fortran 2003 : assignment to allocatable array allocates the array to fit r.h.s |
---|
75 | IF(request(1)%has_message) THEN |
---|
76 | message%message_in = request(1)%message_in |
---|
77 | message%message_out = request(1)%message_out |
---|
78 | message%message_local = request(1)%message_local |
---|
79 | ELSE |
---|
80 | CALL init_message_in_out_local(message) |
---|
81 | request(1)%message_in = message%message_in |
---|
82 | request(1)%message_out = message%message_out |
---|
83 | request(1)%message_local = message%message_local |
---|
84 | request(1)%has_message = .TRUE. |
---|
85 | END IF |
---|
86 | |
---|
87 | CALL message_alloc_mpi_buf(size(field(1)%rval4d,2) * size(field(1)%rval4d,3), message) |
---|
88 | CALL message_create_MPI_request(message) |
---|
89 | CALL message_compactify(message) |
---|
90 | !$omp end master |
---|
91 | !$omp barrier |
---|
92 | end subroutine init_message |
---|
93 | |
---|
94 | SUBROUTINE init_message_in_out_local(message) |
---|
95 | ! Create list of inbound/outbound/local messages |
---|
96 | use mpi_mod |
---|
97 | use mpipara |
---|
98 | use domain_mod, only : ndomain_glo, domloc_glo_ind, domglo_loc_ind, domglo_rank |
---|
99 | type(t_message), target, intent(inout) :: message |
---|
100 | |
---|
101 | type(t_local_submessage), allocatable :: message_local_tmp(:) |
---|
102 | type(t_submessage), allocatable :: message_in_tmp(:), message_out_tmp(:) |
---|
103 | type(t_request), POINTER :: request(:) |
---|
104 | type(t_submessage) :: submessage_in, submessage_out |
---|
105 | type(t_local_submessage) :: submessage_local |
---|
106 | integer, parameter :: INITIAL_ALLOC_SIZE = 10, GROW_FACTOR = 2 |
---|
107 | integer :: field_type, ind, ind_loc, remote_ind_glo, loc_ind_glo, i, k |
---|
108 | integer :: message_in_size, message_out_size, message_local_size |
---|
109 | |
---|
110 | request => message%request |
---|
111 | field_type = request(1)%field_type |
---|
112 | |
---|
113 | allocate(message_in_tmp(INITIAL_ALLOC_SIZE)) |
---|
114 | message_in_size=0 |
---|
115 | allocate(message_out_tmp(INITIAL_ALLOC_SIZE)) |
---|
116 | message_out_size=0 |
---|
117 | allocate(message_local_tmp(INITIAL_ALLOC_SIZE)) |
---|
118 | message_local_size=0 |
---|
119 | do loc_ind_glo = 1, ndomain_glo |
---|
120 | do remote_ind_glo = 1, ndomain_glo |
---|
121 | if(domglo_rank(loc_ind_glo) == mpi_rank) then |
---|
122 | ind_loc = domglo_loc_ind(loc_ind_glo) |
---|
123 | if( domglo_rank(remote_ind_glo) == mpi_rank ) then ! If sending to local domain |
---|
124 | if(request(ind_loc)%points_HtoB(remote_ind_glo)%npoints > 0 ) then ! Add only non-empty messages |
---|
125 | ! Add local message ind_loc -> remote_ind_glo, aggregarting submessage_in and submessage_out into submessage_local |
---|
126 | submessage_out = make_submessage( field_type, & |
---|
127 | request(ind_loc)%points_HtoB(remote_ind_glo), & |
---|
128 | ind_loc, remote_ind_glo, request(1)%vector ) |
---|
129 | submessage_in = make_submessage( field_type, & |
---|
130 | request(domglo_loc_ind(remote_ind_glo))%points_BtoH(domloc_glo_ind(ind_loc)), & |
---|
131 | domglo_loc_ind(remote_ind_glo), domloc_glo_ind(ind_loc), request(1)%vector) |
---|
132 | submessage_local%src_ind_loc = ind_loc |
---|
133 | submessage_local%dest_ind_loc = domglo_loc_ind(remote_ind_glo) |
---|
134 | submessage_local%npoints = submessage_out%npoints |
---|
135 | submessage_local%displ_src = submessage_out%displs |
---|
136 | submessage_local%displ_dest = submessage_in%displs |
---|
137 | submessage_local%sign = submessage_in%sign |
---|
138 | ! Add to local message list |
---|
139 | call array_append_local_submessage( message_local_tmp, message_local_size, submessage_local) |
---|
140 | endif |
---|
141 | else ! If remote domain |
---|
142 | ! When data to send to remote_domain, add submessage in message%message_out |
---|
143 | if( request(ind_loc)%points_HtoB(remote_ind_glo)%npoints > 0 ) then |
---|
144 | submessage_out = make_submessage( field_type, request(ind_loc)%points_HtoB(remote_ind_glo), & |
---|
145 | ind_loc, remote_ind_glo, request(1)%vector ) |
---|
146 | call array_append_submessage( message_out_tmp, message_out_size, submessage_out ) |
---|
147 | end if |
---|
148 | end if |
---|
149 | end if |
---|
150 | end do |
---|
151 | end do |
---|
152 | ! Recv and Send submessages are transposed to recieve and send in same order |
---|
153 | ! We iterate over global domain index to match sends with recieves (local domains are not ordered like global domains) |
---|
154 | do remote_ind_glo = 1, ndomain_glo |
---|
155 | do loc_ind_glo = 1, ndomain_glo |
---|
156 | if( (domglo_rank(loc_ind_glo) == mpi_rank) .and. (domglo_rank(remote_ind_glo) /= mpi_rank) ) then |
---|
157 | ind_loc = domglo_loc_ind(loc_ind_glo) |
---|
158 | if( request(ind_loc)%points_BtoH(remote_ind_glo)%npoints > 0 ) then |
---|
159 | submessage_in = make_submessage( field_type, request(ind_loc)%points_BtoH(remote_ind_glo), & |
---|
160 | ind_loc, remote_ind_glo, request(1)%vector ) |
---|
161 | call array_append_submessage( message_in_tmp, message_in_size, submessage_in ) |
---|
162 | end if |
---|
163 | end if |
---|
164 | end do |
---|
165 | end do |
---|
166 | |
---|
167 | ! Trim message_xx_tmp and put it in message%message_xx |
---|
168 | ! Fortran 2003 : assignment to allocatable array allocates the array to fit r.h.s |
---|
169 | message%message_in = message_in_tmp(:message_in_size) |
---|
170 | message%message_out = message_out_tmp(:message_out_size) |
---|
171 | message%message_local = message_local_tmp(:message_local_size) |
---|
172 | |
---|
173 | END SUBROUTINE init_message_in_out_local |
---|
174 | |
---|
175 | ! Generate submessage from points |
---|
176 | function make_submessage(field_type, points, ind_loc, remote_ind_glo, vector) result(submessage) |
---|
177 | use dimensions, only : swap_dimensions, iim, u_pos, z_pos |
---|
178 | use domain_mod, only : domain, domglo_rank |
---|
179 | use field_mod, only : field_T, field_U, field_Z |
---|
180 | integer, intent(in) :: field_type |
---|
181 | type(t_points), intent(in) :: points |
---|
182 | integer, intent(in) :: ind_loc, remote_ind_glo |
---|
183 | logical, intent(in) :: vector |
---|
184 | integer :: k |
---|
185 | type(t_submessage) :: submessage |
---|
186 | |
---|
187 | call swap_dimensions(ind_loc) |
---|
188 | submessage%ind_loc = ind_loc |
---|
189 | submessage%remote_ind_glo = remote_ind_glo |
---|
190 | submessage%remote_rank = domglo_rank(remote_ind_glo) |
---|
191 | submessage%npoints = points%npoints |
---|
192 | submessage%mpi_buffer_displ = -1 ! Buffers not allocated yet |
---|
193 | allocate( submessage%displs( points%npoints ) ) |
---|
194 | submessage%displs(:) = points%i + (points%j-1)*iim |
---|
195 | if(field_type == field_U) submessage%displs = submessage%displs + u_pos( points%elt ) |
---|
196 | if(field_type == field_Z) submessage%displs = submessage%displs + z_pos( points%elt ) |
---|
197 | allocate(submessage%sign( points%npoints )) |
---|
198 | if( vector ) then ! For U fields only |
---|
199 | submessage%sign(:) = (/( domain(ind_loc)%edge_assign_sign(points%elt(k)-1, points%i(k), points%j(k)) ,k=1,points%npoints)/) |
---|
200 | else |
---|
201 | submessage%sign(:) = 1 |
---|
202 | endif |
---|
203 | end function make_submessage |
---|
204 | |
---|
205 | ! Add element to array, and reallocate if necessary |
---|
206 | subroutine array_append_submessage( a, a_size, elt ) |
---|
207 | type(t_submessage), allocatable, intent(inout) :: a(:) |
---|
208 | integer, intent(inout) :: a_size |
---|
209 | type(t_submessage), intent(in) :: elt |
---|
210 | type(t_submessage), allocatable :: a_tmp(:) |
---|
211 | integer, parameter :: GROW_FACTOR = 2 |
---|
212 | |
---|
213 | if( size( a ) <= a_size ) then |
---|
214 | allocate( a_tmp ( a_size * GROW_FACTOR ) ) |
---|
215 | a_tmp(1:a_size) = a(1:a_size) |
---|
216 | call move_alloc(a_tmp, a) |
---|
217 | end if |
---|
218 | a_size = a_size + 1 |
---|
219 | a(a_size) = elt; |
---|
220 | end subroutine array_append_submessage |
---|
221 | |
---|
222 | ! Add element to array, and reallocate if necessary |
---|
223 | subroutine array_append_local_submessage( a, a_size, elt ) |
---|
224 | type(t_local_submessage), allocatable, intent(inout) :: a(:) |
---|
225 | integer, intent(inout) :: a_size |
---|
226 | type(t_local_submessage), intent(in) :: elt |
---|
227 | type(t_local_submessage), allocatable :: a_tmp(:) |
---|
228 | integer, parameter :: GROW_FACTOR = 2 |
---|
229 | |
---|
230 | if( size( a ) <= a_size ) then |
---|
231 | allocate( a_tmp ( a_size * GROW_FACTOR ) ) |
---|
232 | a_tmp(1:a_size) = a(1:a_size) |
---|
233 | call move_alloc(a_tmp, a) |
---|
234 | end if |
---|
235 | a_size = a_size + 1 |
---|
236 | a(a_size) = elt; |
---|
237 | end subroutine array_append_local_submessage |
---|
238 | |
---|
239 | subroutine finalize_message(message) |
---|
240 | use mpi_mod |
---|
241 | use mpipara, only : mpi_size |
---|
242 | type(t_message), intent(inout) :: message |
---|
243 | integer :: i, ierr |
---|
244 | |
---|
245 | !$omp barrier |
---|
246 | !$omp master |
---|
247 | if(message%send_seq /= message%wait_seq) call dynamico_abort("No matching wait_message before finalization") |
---|
248 | |
---|
249 | if(message%ondevice) call message_delete_ondevice(message) |
---|
250 | deallocate(message%message_in) |
---|
251 | deallocate(message%message_out) |
---|
252 | deallocate(message%message_local) |
---|
253 | do i=0, mpi_size-1 |
---|
254 | if(message%mpi_requests_in(i) /= MPI_REQUEST_NULL) call MPI_Request_free(message%mpi_requests_in(i), ierr) |
---|
255 | if(message%mpi_requests_out(i) /= MPI_REQUEST_NULL)call MPI_Request_free(message%mpi_requests_out(i), ierr) |
---|
256 | deallocate(message%mpi_buffer_in(i)%buff) |
---|
257 | deallocate(message%mpi_buffer_out(i)%buff) |
---|
258 | end do |
---|
259 | deallocate(message%mpi_buffer_in) |
---|
260 | deallocate(message%mpi_buffer_out) |
---|
261 | deallocate(message%mpi_requests_in) |
---|
262 | deallocate(message%mpi_requests_out) |
---|
263 | deallocate(message%message_in_compact) |
---|
264 | deallocate(message%message_out_compact) |
---|
265 | deallocate(message%message_local_compact) |
---|
266 | !$omp end master |
---|
267 | !$omp barrier |
---|
268 | end subroutine finalize_message |
---|
269 | |
---|
270 | end module transfert_mpi_mod |
---|