[963] | 1 | ! Module for MPI communication of field halos |
---|
| 2 | ! This module uses Fortran 2003 features : move_alloc intrinsic, pointer bounds remapping, allocatable type fields |
---|
| 3 | module transfert_mpi_mod |
---|
| 4 | use abort_mod, only : dynamico_abort, abort_acc |
---|
| 5 | use profiling_mod, only : enter_profile, exit_profile, register_id |
---|
| 6 | use domain_mod, only : ndomain, ndomain_glo, domain, domain_glo, domloc_glo_ind, domglo_rank, domglo_loc_ind |
---|
[965] | 7 | use field_mod, only : t_field, field_T, field_U, field_Z |
---|
[963] | 8 | use transfert_request_mod |
---|
| 9 | implicit none |
---|
| 10 | private |
---|
[186] | 11 | |
---|
[963] | 12 | ! Describes how to pack/unpack a message from a local domain to another |
---|
| 13 | type t_local_submessage |
---|
| 14 | integer :: src_ind_loc, dest_ind_loc ! index of local and remote domain |
---|
| 15 | integer, allocatable :: displ_src(:) ! List of indexes to copy from domain src_ind_loc |
---|
| 16 | integer, allocatable :: displ_dest(:) ! List of indexes to copy to domain dest_ind_loc |
---|
| 17 | integer, allocatable :: sign(:) ! Sign change to be applied for vector requests |
---|
| 18 | end type |
---|
[266] | 19 | |
---|
[963] | 20 | ! Describes how to pack/unpack a message from a domain to another, and contains MPI buffer |
---|
| 21 | type t_submessage |
---|
| 22 | integer :: ind_loc, remote_ind_glo ! index of local and remote domain |
---|
| 23 | integer, allocatable :: displs(:) ! List of indexes to copy from field to buffer for each level |
---|
| 24 | integer, allocatable :: sign(:) ! Sign change to be applied for vector requests |
---|
| 25 | real, allocatable :: buff(:,:,:) ! MPI buffer buff(iim*jjm[*3],dim3,dim4) |
---|
| 26 | end type |
---|
[266] | 27 | |
---|
[963] | 28 | ! Describes how to exchange data for a field. |
---|
| 29 | type t_message |
---|
| 30 | type (t_field), pointer :: field(:) => null() ! Field to exchange |
---|
| 31 | type (t_request), pointer :: request(:) => null() ! Type of message to send |
---|
| 32 | type (t_local_submessage), pointer :: message_local(:) ! Local halo copies |
---|
| 33 | type (t_submessage), pointer :: message_in(:) ! Messages to recieve from remote ranks and to copy back to the field |
---|
| 34 | type (t_submessage), pointer :: message_out(:) ! Halos to copy to MPI buffer and to send to remote ranks |
---|
| 35 | integer, pointer :: mpi_requests_in(:) ! MPI requests used for message_in. |
---|
| 36 | integer, pointer :: mpi_requests_out(:) ! MPI requests used for message_out. |
---|
| 37 | ! NOTE : requests are persistant requests initialized in init_message. MPI_Start and MPI_Wait are then used to initiate and complete communications. |
---|
| 38 | ! ex : Give mpi_requests_in(i) to MPI_Start to send the buffer contained in message_in(i) |
---|
| 39 | integer :: send_seq ! Sequence number : send_seq is incremented each time send_message is called |
---|
| 40 | integer :: wait_seq ! Sequence number : wait_seq is incremented each time wait_message is called |
---|
| 41 | logical :: ondevice ! Ready to transfer ondevice field |
---|
| 42 | end type t_message |
---|
[266] | 43 | |
---|
[963] | 44 | public :: t_message, t_request, & |
---|
| 45 | req_i1, req_e1_scal, req_e1_vect, & |
---|
| 46 | req_i0, req_e0_scal, req_e0_vect, & |
---|
| 47 | req_z1_scal, & |
---|
| 48 | init_transfert, & |
---|
| 49 | init_message, & |
---|
| 50 | finalize_message, & |
---|
| 51 | send_message, & |
---|
| 52 | wait_message, & |
---|
| 53 | test_message |
---|
[962] | 54 | |
---|
[963] | 55 | ! ---- Private variables ---- |
---|
| 56 | ! Profiling id for mpi |
---|
| 57 | integer :: profile_mpi, profile_mpi_copies, profile_mpi_waitall, profile_mpi_barrier |
---|
| 58 | contains |
---|
| 59 | ! Initialize transfert : must be called before any other transfert_mpi routines |
---|
| 60 | subroutine init_transfert |
---|
| 61 | use mpi_mod, only : MPI_THREAD_SINGLE, MPI_THREAD_FUNNELED |
---|
| 62 | use mpipara, only : mpi_threading_mode |
---|
| 63 | use profiling_mod, only : register_id |
---|
| 64 | logical, parameter :: profile_mpi_detail = .true. |
---|
[667] | 65 | |
---|
[963] | 66 | !$omp master |
---|
| 67 | ! Check requested threads support |
---|
| 68 | if( mpi_threading_mode /= MPI_THREAD_SINGLE .and. mpi_threading_mode /= MPI_THREAD_FUNNELED ) call dynamico_abort("Only single and funneled threading mode are supported.") |
---|
[667] | 69 | |
---|
[963] | 70 | ! Register profiling ids |
---|
| 71 | call register_id("MPI", profile_mpi) |
---|
| 72 | if( profile_mpi_detail ) then |
---|
| 73 | call register_id("MPI_copies", profile_mpi_copies) |
---|
| 74 | call register_id("MPI_waitall", profile_mpi_waitall) |
---|
| 75 | call register_id("MPI_omp_barrier", profile_mpi_barrier) |
---|
| 76 | else |
---|
| 77 | profile_mpi_copies = profile_mpi |
---|
| 78 | profile_mpi_waitall = profile_mpi |
---|
| 79 | profile_mpi_barrier = profile_mpi |
---|
| 80 | endif |
---|
[26] | 81 | |
---|
[963] | 82 | ! Initialize requests |
---|
| 83 | call init_all_requests() |
---|
| 84 | !$omp end master |
---|
| 85 | !$omp barrier |
---|
| 86 | end subroutine |
---|
[26] | 87 | |
---|
[963] | 88 | subroutine init_message(field, request, message, name) |
---|
| 89 | use mpi_mod |
---|
| 90 | use mpipara |
---|
| 91 | type(t_field), pointer, intent(in) :: field(:) |
---|
| 92 | type(t_request),pointer, intent(in) :: request(:) |
---|
| 93 | type(t_message), target, intent(out) :: message ! Needs intent out for call to finalize_message |
---|
| 94 | character(len=*), intent(in),optional :: name |
---|
| 95 | integer, parameter :: INITIAL_ALLOC_SIZE = 10, GROW_FACTOR = 2 |
---|
[26] | 96 | |
---|
[963] | 97 | type(t_submessage) :: submessage_in, submessage_out |
---|
| 98 | type(t_local_submessage) :: submessage_local |
---|
| 99 | integer :: dim3, dim4 |
---|
| 100 | integer :: ind, ind_loc, remote_ind_glo, i |
---|
| 101 | integer :: message_in_size, message_out_size, message_local_size |
---|
| 102 | type(t_local_submessage), allocatable :: message_local_tmp(:) |
---|
| 103 | type(t_submessage), allocatable :: message_in_tmp(:), message_out_tmp(:) |
---|
| 104 | type(t_submessage), pointer :: submessage |
---|
[965] | 105 | integer :: field_type |
---|
[148] | 106 | |
---|
[963] | 107 | !$omp barrier |
---|
| 108 | !$omp master |
---|
[953] | 109 | !init off-device |
---|
| 110 | message%ondevice=.false. |
---|
[963] | 111 | message%send_seq = 0 |
---|
| 112 | message%wait_seq = 0 |
---|
[965] | 113 | |
---|
| 114 | if( request(1)%field_type /= field(1)%field_type ) call dynamico_abort( "init_message : field_type/request mismatch" ) |
---|
| 115 | field_type = request(1)%field_type |
---|
| 116 | |
---|
[963] | 117 | ! Set field%rval4d pointer to always use 4d array |
---|
| 118 | do ind = 1, ndomain |
---|
| 119 | if( field(ind)%ndim == 2 ) field(ind)%rval4d(1:size(field(ind)%rval2d,1),1:1,1:1) => field(ind)%rval2d |
---|
| 120 | ! This is Fortran 2008 : can be avoided by using a subroutine with rval3d as a 1D dummy argument |
---|
| 121 | ! (/!\ : using a subroutine might generate a temporary contiguous array) |
---|
| 122 | if( field(ind)%ndim == 3 ) field(ind)%rval4d(1:size(field(ind)%rval3d,1), & |
---|
| 123 | 1:size(field(ind)%rval3d,2), 1:1) => field(ind)%rval3d |
---|
| 124 | end do |
---|
| 125 | dim3 = size(field(1)%rval4d,2) |
---|
| 126 | dim4 = size(field(1)%rval4d,3) |
---|
| 127 | message%field => field |
---|
| 128 | message%request => request |
---|
| 129 | ! Create list of inbound/outbound/local messages |
---|
| 130 | allocate(message_in_tmp(INITIAL_ALLOC_SIZE)) |
---|
| 131 | message_in_size=0 |
---|
| 132 | allocate(message_out_tmp(INITIAL_ALLOC_SIZE)) |
---|
| 133 | message_out_size=0 |
---|
| 134 | allocate(message_local_tmp(INITIAL_ALLOC_SIZE)) |
---|
| 135 | message_local_size=0 |
---|
| 136 | do ind_loc = 1, ndomain |
---|
| 137 | do remote_ind_glo = 1, ndomain_glo |
---|
| 138 | if( domglo_rank(remote_ind_glo) == mpi_rank ) then ! If sending to local domain |
---|
| 139 | if(request(ind_loc)%points_HtoB(remote_ind_glo)%npoints > 0 ) then ! Add only non-empty messages |
---|
| 140 | ! Add local message ind_loc -> remote_ind_glo, aggregarting submessage_in and submessage_out into submessage_local |
---|
[965] | 141 | submessage_out = make_submessage( field_type, request(ind_loc)%points_HtoB(remote_ind_glo), & |
---|
[963] | 142 | ind_loc, remote_ind_glo, dim3, dim4, request(1)%vector ) |
---|
[965] | 143 | submessage_in = make_submessage( field_type, request(domglo_loc_ind(remote_ind_glo))%points_BtoH(domloc_glo_ind(ind_loc)), & |
---|
[963] | 144 | domglo_loc_ind(remote_ind_glo), domloc_glo_ind(ind_loc), dim3, dim4, request(1)%vector) |
---|
| 145 | submessage_local%src_ind_loc = ind_loc |
---|
| 146 | submessage_local%dest_ind_loc = domglo_loc_ind(remote_ind_glo) |
---|
| 147 | submessage_local%displ_src = submessage_out%displs |
---|
| 148 | submessage_local%displ_dest = submessage_in%displs |
---|
| 149 | submessage_local%sign = submessage_in%sign |
---|
| 150 | ! Add to local message list |
---|
| 151 | call array_append_local_submessage( message_local_tmp, message_local_size, submessage_local) |
---|
| 152 | endif |
---|
| 153 | else ! If remote domain |
---|
| 154 | ! When data to send to remote_domain, add submessage in message%message_out |
---|
| 155 | if( request(ind_loc)%points_HtoB(remote_ind_glo)%npoints > 0 ) then |
---|
[965] | 156 | submessage_out = make_submessage( field_type, request(ind_loc)%points_HtoB(remote_ind_glo), & |
---|
[963] | 157 | ind_loc, remote_ind_glo, dim3, dim4, request(1)%vector ) |
---|
| 158 | call array_append_submessage( message_out_tmp, message_out_size, submessage_out ) |
---|
| 159 | end if |
---|
| 160 | if( request(ind_loc)%points_BtoH(remote_ind_glo)%npoints > 0 ) then |
---|
[965] | 161 | submessage_in = make_submessage( field_type, request(ind_loc)%points_BtoH(remote_ind_glo), & |
---|
[963] | 162 | ind_loc, remote_ind_glo, dim3, dim4, request(1)%vector ) |
---|
| 163 | call array_append_submessage( message_in_tmp, message_in_size, submessage_in ) |
---|
| 164 | end if |
---|
| 165 | end if |
---|
| 166 | end do |
---|
| 167 | end do |
---|
| 168 | ! Trim message_xx_tmp and put it in message%message_xx |
---|
| 169 | allocate(message%message_in(message_in_size)); message%message_in(:) = message_in_tmp(:message_in_size) |
---|
| 170 | allocate(message%message_out(message_out_size)); message%message_out(:) = message_out_tmp(:message_out_size) |
---|
| 171 | allocate(message%message_local(message_local_size)); message%message_local(:) = message_local_tmp(:message_local_size) |
---|
[953] | 172 | |
---|
[963] | 173 | ! Create MPI Persistant Send/Recv requests |
---|
| 174 | allocate( message%mpi_requests_in(size(message%message_in)) ) |
---|
| 175 | allocate( message%mpi_requests_out(size(message%message_out)) ) |
---|
| 176 | do i=1, size(message%message_out) |
---|
| 177 | submessage => message%message_out(i) |
---|
| 178 | call MPI_Send_init( submessage%buff, size(submessage%buff), MPI_DOUBLE, domglo_rank(submessage%remote_ind_glo), & |
---|
| 179 | submessage%remote_ind_glo+ndomain_glo*domloc_glo_ind(submessage%ind_loc), & |
---|
| 180 | comm_icosa, message%mpi_requests_out(i), ierr ) |
---|
| 181 | end do |
---|
| 182 | do i=1, size(message%message_in) |
---|
| 183 | submessage => message%message_in(i) |
---|
| 184 | call MPI_Recv_init( submessage%buff, size(submessage%buff), MPI_DOUBLE, domglo_rank(submessage%remote_ind_glo), & |
---|
| 185 | domloc_glo_ind(submessage%ind_loc)+ndomain_glo*submessage%remote_ind_glo, & |
---|
| 186 | comm_icosa, message%mpi_requests_in(i), ierr ) |
---|
| 187 | end do |
---|
| 188 | !$omp end master |
---|
| 189 | !$omp barrier |
---|
| 190 | contains |
---|
| 191 | ! Generate submessage from points |
---|
[965] | 192 | function make_submessage(field_type, points, ind_loc, remote_ind_glo, dim3, dim4, vector) result(submessage) |
---|
| 193 | use dimensions, only : swap_dimensions, iim, u_pos, z_pos |
---|
| 194 | integer, intent(in) :: field_type |
---|
[963] | 195 | type(t_points), intent(in) :: points |
---|
| 196 | integer, intent(in) :: ind_loc, remote_ind_glo, dim3, dim4 |
---|
| 197 | logical, intent(in) :: vector |
---|
| 198 | integer :: k |
---|
| 199 | type(t_submessage) :: submessage |
---|
[186] | 200 | |
---|
[963] | 201 | call swap_dimensions(ind_loc) |
---|
| 202 | submessage%ind_loc = ind_loc |
---|
| 203 | submessage%remote_ind_glo = remote_ind_glo |
---|
| 204 | allocate( submessage%buff( points%npoints, dim3, dim4 ) ) |
---|
| 205 | allocate( submessage%displs( points%npoints ) ) |
---|
| 206 | submessage%displs(:) = points%i + (points%j-1)*iim |
---|
[965] | 207 | if(field_type == field_U) submessage%displs = submessage%displs + u_pos( points%elt ) |
---|
| 208 | if(field_type == field_Z) submessage%displs = submessage%displs + z_pos( points%elt ) |
---|
[963] | 209 | allocate(submessage%sign( points%npoints )) |
---|
[965] | 210 | if( vector ) then ! For U fields only |
---|
| 211 | submessage%sign(:) = (/( domain(ind_loc)%edge_assign_sign(points%elt(k)-1, points%i(k), points%j(k)) ,k=1,points%npoints)/) |
---|
[963] | 212 | else |
---|
| 213 | submessage%sign(:) = 1 |
---|
| 214 | endif |
---|
| 215 | end function |
---|
[186] | 216 | |
---|
[963] | 217 | ! Add element to array, and reallocate if necessary |
---|
| 218 | subroutine array_append_submessage( a, a_size, elt ) |
---|
| 219 | type(t_submessage), allocatable, intent(inout) :: a(:) |
---|
| 220 | integer, intent(inout) :: a_size |
---|
| 221 | type(t_submessage), intent(in) :: elt |
---|
| 222 | type(t_submessage), allocatable :: a_tmp(:) |
---|
| 223 | integer, parameter :: GROW_FACTOR = 2 |
---|
[364] | 224 | |
---|
[963] | 225 | if( size( a ) <= a_size ) then |
---|
| 226 | allocate( a_tmp ( a_size * GROW_FACTOR ) ) |
---|
| 227 | a_tmp(1:a_size) = a(1:a_size) |
---|
| 228 | call move_alloc(a_tmp, a) |
---|
| 229 | end if |
---|
| 230 | a_size = a_size + 1 |
---|
| 231 | a(a_size) = elt; |
---|
| 232 | end subroutine |
---|
| 233 | ! Add element to array, and reallocate if necessary |
---|
| 234 | subroutine array_append_local_submessage( a, a_size, elt ) |
---|
| 235 | type(t_local_submessage), allocatable, intent(inout) :: a(:) |
---|
| 236 | integer, intent(inout) :: a_size |
---|
| 237 | type(t_local_submessage), intent(in) :: elt |
---|
| 238 | type(t_local_submessage), allocatable :: a_tmp(:) |
---|
| 239 | integer, parameter :: GROW_FACTOR = 2 |
---|
[186] | 240 | |
---|
[963] | 241 | if( size( a ) <= a_size ) then |
---|
| 242 | allocate( a_tmp ( a_size * GROW_FACTOR ) ) |
---|
| 243 | a_tmp(1:a_size) = a(1:a_size) |
---|
| 244 | call move_alloc(a_tmp, a) |
---|
| 245 | end if |
---|
| 246 | a_size = a_size + 1 |
---|
| 247 | a(a_size) = elt; |
---|
| 248 | end subroutine |
---|
| 249 | ! Je demande pardon au dieu du copier-coller car j'ai péché |
---|
| 250 | end subroutine |
---|
[186] | 251 | |
---|
[963] | 252 | subroutine message_create_ondevice(message) |
---|
| 253 | use mpi_mod |
---|
| 254 | use mpipara, only : comm_icosa |
---|
| 255 | type(t_message), intent(inout) :: message |
---|
| 256 | type(t_submessage), pointer :: submessage |
---|
| 257 | integer :: i, ierr |
---|
[151] | 258 | |
---|
[963] | 259 | if( message%ondevice ) call dynamico_abort("Message already on device") |
---|
[186] | 260 | |
---|
[963] | 261 | !$acc enter data copyin(message) async |
---|
| 262 | !$acc enter data copyin(message%message_in(:)) async |
---|
| 263 | do i = 1, size( message%message_in ) |
---|
| 264 | !$acc enter data copyin(message%message_in(i)%buff(:,:,:)) async |
---|
| 265 | !$acc enter data copyin(message%message_in(i)%displs(:)) async |
---|
| 266 | !$acc enter data copyin(message%message_in(i)%sign(:)) async |
---|
| 267 | end do |
---|
| 268 | !$acc enter data copyin(message%message_out(:)) async |
---|
| 269 | do i = 1, size( message%message_out ) |
---|
| 270 | !$acc enter data copyin(message%message_out(i)%buff(:,:,:)) async |
---|
| 271 | !$acc enter data copyin(message%message_out(i)%displs(:)) async |
---|
| 272 | !!$acc enter data copyin(message%message_out(i)%sign(:)) async |
---|
| 273 | end do |
---|
| 274 | !$acc enter data copyin(message%message_local(:)) async |
---|
| 275 | do i = 1, size( message%message_local ) |
---|
| 276 | !$acc enter data copyin(message%message_local(i)%displ_src(:)) async |
---|
| 277 | !$acc enter data copyin(message%message_local(i)%displ_dest(:)) async |
---|
| 278 | !$acc enter data copyin(message%message_local(i)%sign(:)) async |
---|
| 279 | end do |
---|
| 280 | !$acc enter data copyin(message%field(:)) async |
---|
| 281 | do i = 1, ndomain |
---|
| 282 | !$acc enter data copyin(message%field(i)%rval4d(:,:,:)) async |
---|
| 283 | end do |
---|
[186] | 284 | |
---|
[963] | 285 | do i=1, size(message%message_out) |
---|
| 286 | submessage => message%message_out(i) |
---|
| 287 | call MPI_Request_free(message%mpi_requests_out(i), ierr) |
---|
| 288 | !$acc host_data use_device(submessage%buff) |
---|
| 289 | call MPI_Send_init( submessage%buff, size(submessage%buff), MPI_DOUBLE, domglo_rank(submessage%remote_ind_glo), & |
---|
| 290 | submessage%remote_ind_glo+ndomain_glo*domloc_glo_ind(submessage%ind_loc), & |
---|
| 291 | comm_icosa, message%mpi_requests_out(i), ierr ) |
---|
| 292 | !$acc end host_data |
---|
| 293 | end do |
---|
| 294 | do i=1, size(message%message_in) |
---|
| 295 | submessage => message%message_in(i) |
---|
| 296 | call MPI_Request_free(message%mpi_requests_in(i), ierr) |
---|
| 297 | !$acc host_data use_device(submessage%buff) |
---|
| 298 | call MPI_Recv_init( submessage%buff, size(submessage%buff), MPI_DOUBLE, domglo_rank(submessage%remote_ind_glo), & |
---|
| 299 | domloc_glo_ind(submessage%ind_loc)+ndomain_glo*submessage%remote_ind_glo, & |
---|
| 300 | comm_icosa, message%mpi_requests_in(i), ierr ) |
---|
| 301 | !$acc end host_data |
---|
| 302 | end do |
---|
[151] | 303 | |
---|
[963] | 304 | message%ondevice=.true. |
---|
| 305 | !!$acc update device(message%ondevice) |
---|
| 306 | end subroutine |
---|
[186] | 307 | |
---|
[963] | 308 | subroutine message_delete_ondevice(message) |
---|
| 309 | type(t_message), intent(inout) :: message |
---|
| 310 | integer :: i |
---|
[186] | 311 | |
---|
[963] | 312 | if( .not. message%ondevice ) call dynamico_abort("Message not on device") |
---|
[186] | 313 | |
---|
[963] | 314 | do i = 1, size( message%message_in ) |
---|
| 315 | !$acc exit data delete(message%message_in(i)%buff(:,:,:)) async |
---|
| 316 | !$acc exit data delete(message%message_in(i)%displs(:)) async |
---|
| 317 | !$acc exit data delete(message%message_in(i)%sign(:)) async |
---|
| 318 | end do |
---|
| 319 | !$acc exit data delete(message%message_in(:)) async |
---|
| 320 | do i = 1, size( message%message_out ) |
---|
| 321 | !$acc exit data delete(message%message_out(i)%buff(:,:,:)) async |
---|
| 322 | !$acc exit data delete(message%message_out(i)%displs(:)) async |
---|
| 323 | !!$acc exit data delete(message%message_out(i)%sign(:)) async |
---|
| 324 | end do |
---|
| 325 | !$acc exit data delete(message%message_out(:)) async |
---|
| 326 | do i = 1, size( message%message_local ) |
---|
| 327 | !$acc exit data delete(message%message_local(i)%displ_src(:)) async |
---|
| 328 | !$acc exit data delete(message%message_local(i)%displ_dest(:)) async |
---|
| 329 | !$acc exit data delete(message%message_local(i)%sign(:)) async |
---|
| 330 | end do |
---|
| 331 | !$acc exit data delete(message%message_local(:)) async |
---|
[186] | 332 | |
---|
[963] | 333 | do i = 1, ndomain |
---|
| 334 | !$acc exit data delete(message%field(i)%rval4d(:,:,:)) async |
---|
| 335 | end do |
---|
| 336 | !$acc exit data delete(message%field(:)) async |
---|
| 337 | !$acc exit data delete(message) async |
---|
| 338 | message%ondevice=.false. |
---|
| 339 | end subroutine |
---|
[953] | 340 | |
---|
[963] | 341 | subroutine finalize_message(message) |
---|
| 342 | type(t_message), intent(inout) :: message |
---|
| 343 | integer :: i, ierr |
---|
[953] | 344 | |
---|
[963] | 345 | !$omp barrier |
---|
| 346 | !$omp master |
---|
| 347 | if(message%send_seq /= message%wait_seq) call dynamico_abort("No matching wait_message before finalization") |
---|
[186] | 348 | |
---|
[963] | 349 | if(message%ondevice) call message_delete_ondevice(message) |
---|
| 350 | deallocate(message%message_in) |
---|
| 351 | deallocate(message%message_out) |
---|
| 352 | deallocate(message%message_local) |
---|
| 353 | do i=1, size(message%mpi_requests_in) |
---|
| 354 | call MPI_Request_free(message%mpi_requests_in(i), ierr) |
---|
[953] | 355 | end do |
---|
[963] | 356 | deallocate(message%mpi_requests_in) |
---|
| 357 | do i=1, size(message%mpi_requests_out) |
---|
| 358 | call MPI_Request_free(message%mpi_requests_out(i), ierr) |
---|
| 359 | end do |
---|
| 360 | deallocate(message%mpi_requests_out) |
---|
| 361 | !$omp end master |
---|
| 362 | !$omp barrier |
---|
| 363 | end subroutine |
---|
[953] | 364 | |
---|
[963] | 365 | subroutine send_message(field, message) |
---|
| 366 | use mpi_mod |
---|
| 367 | use domain_mod, only : assigned_domain |
---|
| 368 | use omp_para, only : distrib_level |
---|
| 369 | type(t_field),pointer :: field(:) |
---|
| 370 | type(t_message), target :: message |
---|
| 371 | integer :: i, k, d3, d4 |
---|
| 372 | integer :: ierr, d3_begin, d3_end, dim4 |
---|
[151] | 373 | |
---|
[963] | 374 | call enter_profile(profile_mpi) |
---|
[186] | 375 | |
---|
[963] | 376 | ! Needed because rval4d is set in init_message |
---|
| 377 | if( .not. associated( message%field, field ) ) & |
---|
| 378 | call dynamico_abort("send_message must be called with the same field used in init_message") |
---|
[667] | 379 | |
---|
[953] | 380 | !Prepare 'message' for on-device copies if field is on device |
---|
[963] | 381 | !$omp master |
---|
| 382 | if( field(1)%ondevice .and. .not. message%ondevice ) call message_create_ondevice(message) |
---|
| 383 | if( field(1)%ondevice .neqv. message%ondevice ) call dynamico_abort("send_message : internal device/host memory synchronization error") |
---|
| 384 | ! Check if previous message has been waited |
---|
| 385 | if(message%send_seq /= message%wait_seq) & |
---|
| 386 | call dynamico_abort("No matching wait_message before new send_message") |
---|
| 387 | message%send_seq = message%send_seq + 1 |
---|
| 388 | !$omp end master |
---|
[965] | 389 | |
---|
[963] | 390 | call enter_profile(profile_mpi_barrier) |
---|
| 391 | !$omp barrier |
---|
| 392 | call exit_profile(profile_mpi_barrier) |
---|
[186] | 393 | |
---|
[963] | 394 | dim4 = size(message%field(1)%rval4d, 3) |
---|
| 395 | CALL distrib_level( 1, size(message%field(1)%rval4d, 2), d3_begin, d3_end ) |
---|
[151] | 396 | |
---|
[963] | 397 | call enter_profile(profile_mpi_copies) |
---|
| 398 | !$acc data present(message) async if(message%ondevice) |
---|
[151] | 399 | |
---|
[963] | 400 | ! Halo to Buffer : copy outbound message to MPI buffers |
---|
| 401 | !$acc parallel loop default(none) present(message, assigned_domain) async if(message%ondevice) |
---|
| 402 | do i = 1, size( message%message_out ) |
---|
| 403 | if( assigned_domain( message%message_out(i)%ind_loc ) ) then |
---|
| 404 | !$acc loop collapse(2) |
---|
| 405 | do d4 = 1, dim4 |
---|
| 406 | do d3 = d3_begin, d3_end |
---|
| 407 | !$acc loop |
---|
| 408 | do k = 1, size(message%message_out(i)%displs) |
---|
| 409 | message%message_out(i)%buff(k,d3,d4) = message%field(message%message_out(i)%ind_loc)%rval4d(message%message_out(i)%displs(k),d3, d4) |
---|
| 410 | end do |
---|
| 411 | end do |
---|
| 412 | end do |
---|
| 413 | endif |
---|
| 414 | end do |
---|
[151] | 415 | |
---|
[963] | 416 | ! Halo to Halo : copy local messages from source field to destination field |
---|
| 417 | !$acc parallel loop default(none) present(message, assigned_domain) async if(message%ondevice) |
---|
| 418 | do i = 1, size( message%message_local ) |
---|
| 419 | if( assigned_domain( message%message_local(i)%dest_ind_loc ) ) then |
---|
[965] | 420 | !$acc loop collapse(2) |
---|
[963] | 421 | do d4 = 1, dim4 |
---|
| 422 | do d3 = d3_begin, d3_end |
---|
| 423 | ! Cannot collapse because size(displ_dest) varies with i |
---|
| 424 | !$acc loop vector |
---|
| 425 | do k = 1, size(message%message_local(i)%displ_dest) |
---|
| 426 | message%field(message%message_local(i)%dest_ind_loc)%rval4d(message%message_local(i)%displ_dest(k),d3,d4) = & |
---|
| 427 | message%message_local(i)%sign(k)*message%field(message%message_local(i)%src_ind_loc)%rval4d(message%message_local(i)%displ_src(k),d3,d4) |
---|
| 428 | end do |
---|
| 429 | end do |
---|
| 430 | end do |
---|
| 431 | endif |
---|
| 432 | end do |
---|
[186] | 433 | |
---|
[963] | 434 | !$acc end data |
---|
| 435 | call exit_profile(profile_mpi_copies) |
---|
[151] | 436 | |
---|
[962] | 437 | |
---|
[963] | 438 | call enter_profile(profile_mpi_barrier) |
---|
| 439 | !$acc wait |
---|
| 440 | !$omp barrier |
---|
| 441 | call exit_profile(profile_mpi_barrier) |
---|
[478] | 442 | |
---|
[963] | 443 | !$omp master |
---|
| 444 | call MPI_Startall( size(message%mpi_requests_out), message%mpi_requests_out, ierr ) |
---|
| 445 | call MPI_Startall( size(message%mpi_requests_in), message%mpi_requests_in, ierr ) |
---|
| 446 | !$omp end master |
---|
[962] | 447 | |
---|
[963] | 448 | call exit_profile(profile_mpi) |
---|
| 449 | end subroutine |
---|
[962] | 450 | |
---|
[963] | 451 | subroutine test_message(message) |
---|
| 452 | use mpi_mod |
---|
| 453 | type(t_message) :: message |
---|
| 454 | integer :: ierr |
---|
| 455 | logical :: completed |
---|
[478] | 456 | |
---|
[963] | 457 | !$omp master |
---|
| 458 | call MPI_Testall( size(message%mpi_requests_out), message%mpi_requests_out, completed, MPI_STATUSES_IGNORE, ierr ) |
---|
| 459 | call MPI_Testall( size(message%mpi_requests_in), message%mpi_requests_in, completed, MPI_STATUSES_IGNORE, ierr ) |
---|
| 460 | !$omp end master |
---|
| 461 | end subroutine |
---|
[186] | 462 | |
---|
[963] | 463 | subroutine wait_message(message) |
---|
| 464 | use mpi_mod |
---|
| 465 | use domain_mod, only : assigned_domain |
---|
| 466 | use omp_para, only : distrib_level |
---|
| 467 | type(t_message), target :: message |
---|
| 468 | integer :: d3_begin, d3_end, dim4 |
---|
| 469 | integer :: i, ierr, k, d3, d4 |
---|
[186] | 470 | |
---|
[963] | 471 | ! Check if message has been sent and not recieved yet |
---|
| 472 | ! note : barrier needed between this and send_seq increment, and this and wait_seq increment |
---|
| 473 | ! note : watch out for integer overflow a = b+1 doesn't imply a>b |
---|
| 474 | if(message%send_seq /= message%wait_seq+1) then |
---|
| 475 | print*, "WARNING : wait_message called multiple times for one send_message, skipping" |
---|
| 476 | return ! Don't recieve message if already recieved |
---|
| 477 | end if |
---|
[186] | 478 | |
---|
[963] | 479 | call enter_profile(profile_mpi) |
---|
[186] | 480 | |
---|
[963] | 481 | CALL distrib_level( 1, size(message%field(1)%rval4d, 2), d3_begin, d3_end ) |
---|
[186] | 482 | |
---|
[963] | 483 | call enter_profile(profile_mpi_waitall) |
---|
| 484 | !$omp master |
---|
| 485 | call MPI_Waitall( size(message%mpi_requests_out), message%mpi_requests_out, MPI_STATUSES_IGNORE, ierr ) |
---|
| 486 | call MPI_Waitall( size(message%mpi_requests_in), message%mpi_requests_in, MPI_STATUSES_IGNORE, ierr ) |
---|
| 487 | !$omp end master |
---|
| 488 | call exit_profile(profile_mpi_waitall) |
---|
[186] | 489 | |
---|
[963] | 490 | call enter_profile(profile_mpi_barrier) |
---|
| 491 | !$omp barrier |
---|
| 492 | call exit_profile(profile_mpi_barrier) |
---|
[667] | 493 | |
---|
[963] | 494 | call enter_profile(profile_mpi_copies) |
---|
| 495 | !$acc data present(message) async if(message%ondevice) |
---|
[151] | 496 | |
---|
[963] | 497 | dim4 = size(message%field(1)%rval4d, 3) |
---|
| 498 | ! Buffer to Halo : copy inbound message to field |
---|
| 499 | !$acc parallel loop default(none) present(message, assigned_domain) async if(message%ondevice) |
---|
| 500 | do i = 1, size( message%message_in ) |
---|
| 501 | if( assigned_domain( message%message_in(i)%ind_loc ) ) then |
---|
| 502 | !$acc loop collapse(2) |
---|
| 503 | do d4 = 1, dim4 |
---|
| 504 | do d3 = d3_begin, d3_end |
---|
| 505 | !$acc loop |
---|
| 506 | do k = 1, size(message%message_in(i)%displs) |
---|
| 507 | message%field(message%message_in(i)%ind_loc)%rval4d(message%message_in(i)%displs(k),d3,d4) = message%message_in(i)%sign(k)*message%message_in(i)%buff(k,d3,d4) |
---|
| 508 | end do |
---|
| 509 | end do |
---|
| 510 | end do |
---|
| 511 | endif |
---|
| 512 | end do |
---|
[151] | 513 | |
---|
[963] | 514 | !$acc end data |
---|
| 515 | call exit_profile(profile_mpi_copies) |
---|
[151] | 516 | |
---|
[963] | 517 | !$omp master |
---|
| 518 | message%wait_seq = message%wait_seq + 1 |
---|
| 519 | !$omp end master |
---|
[151] | 520 | |
---|
[963] | 521 | call enter_profile(profile_mpi_barrier) |
---|
| 522 | !$omp barrier |
---|
| 523 | call exit_profile(profile_mpi_barrier) |
---|
| 524 | call exit_profile(profile_mpi) |
---|
| 525 | end subroutine |
---|
| 526 | end module |
---|