New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
lbc_lnk_pt2pt_async.h90 in NEMO/branches/2021/dev_r14447_HPC-07_Irrmann_try_new_pt2pt/src/OCE/LBC – NEMO

source: NEMO/branches/2021/dev_r14447_HPC-07_Irrmann_try_new_pt2pt/src/OCE/LBC/lbc_lnk_pt2pt_async.h90 @ 14899

Last change on this file since 14899 was 14899, checked in by girrmann, 3 years ago

Asynchronous communications now works in time splitting with nncomm == 3 (newpt2pt communication) and yiels identical results but is not compatible with other schemes. Under investigation...

File size: 16.7 KB
Line 
1
2#if ! defined BLOCK_ISEND && ! defined BLOCK_IRECV && ! defined BLOCK_FILL
3   SUBROUTINE lbc_lnk_async_/**/PRECISION( cdname, ptab, cd_nat, psgn, kfld, kfillmode, pfillval, khls, lsend, lrecv, ld4only, loop_fct )
4      CHARACTER(len=*)              , INTENT(in   ) ::   cdname      ! name of the calling subroutine
5
6      TYPE(PTR_4d_/**/PRECISION),  DIMENSION(:), INTENT(inout) ::   ptab        ! pointer of arrays on which apply the b.c.
7      CHARACTER(len=1), DIMENSION(:), INTENT(in   ) ::   cd_nat      ! nature of array grid-points
8      REAL(PRECISION),  DIMENSION(:), INTENT(in   ) ::   psgn        ! sign used across the north fold boundary
9      INTEGER                       , INTENT(in   ) ::   kfld        ! number of pt3d arrays
10      INTEGER ,             OPTIONAL, INTENT(in   ) ::   kfillmode   ! filling method for halo over land (default = constant)
11      REAL(PRECISION),      OPTIONAL, INTENT(in   ) ::   pfillval    ! background value (used at closed boundaries)
12      INTEGER ,             OPTIONAL, INTENT(in   ) ::   khls        ! halo size, default = nn_hls
13      LOGICAL, DIMENSION(4),OPTIONAL, INTENT(in   ) ::   lsend, lrecv  ! communication with other 4 proc
14      LOGICAL,              OPTIONAL, INTENT(in   ) ::   ld4only     ! if .T., do only 4-neighbour comm (ignore corners)
15      interface
16         subroutine loop_fct(i0, i1, j0, j1, k0, k1, buf)
17           integer, intent(in) :: i0, i1, j0, j1, k0, k1
18           REAL*8, dimension(:,:,:,:,:,:), optional, intent(out) :: buf
19         end subroutine loop_fct
20      end interface
21      optional :: loop_fct
22      !
23      INTEGER  ::     ji,   jj,  jk,  jl,  jf, jn     ! dummy loop indices
24      INTEGER  ::    ipi,  ipj, ipk, ipl, ipf         ! dimension of the input array
25      INTEGER  ::   ip0i, ip1i, im0i, im1i
26      INTEGER  ::   ip0j, ip1j, im0j, im1j
27      INTEGER  ::   ishti, ishtj, ishti2, ishtj2
28      INTEGER  ::   ifill_nfd, icomm, ierr
29      INTEGER  ::   ihls, idxs, idxr, iszS, iszR
30      INTEGER, DIMENSION(4)  ::   iwewe, issnn
31      INTEGER, DIMENSION(8)  ::   isizei, ishtSi, ishtRi, ishtPi
32      INTEGER, DIMENSION(8)  ::   isizej, ishtSj, ishtRj, ishtPj
33      INTEGER, DIMENSION(8)  ::   ifill, iszall, ishtS, ishtR
34      INTEGER, DIMENSION(8)  ::   ireq             ! mpi_request id
35      INTEGER, DIMENSION(8)  ::   iStag, iRtag     ! Send and Recv mpi_tag id
36      REAL(PRECISION) ::   zland
37      LOGICAL, DIMENSION(8)  ::   llsend, llrecv
38      LOGICAL  ::   ll4only                                        ! default: 8 neighbourgs
39      LOGICAL  ::   ll_IdoNFold
40      !!----------------------------------------------------------------------
41      !
42      ! ----------------------------------------- !
43      !     1. local variables initialization     !
44      ! ----------------------------------------- !
45      !
46      ipi = SIZE(ptab(1)%pt4d,1)
47      ipj = SIZE(ptab(1)%pt4d,2)
48      ipk = SIZE(ptab(1)%pt4d,3)
49      ipl = SIZE(ptab(1)%pt4d,4)
50      ipf = kfld
51      !
52      IF( narea == 1 .AND. numcom == -1 ) CALL mpp_report( cdname, ipk, ipl, ipf, ld_lbc = .TRUE. )
53      !
54      idxs = 1   ! initalize index for send buffer
55      idxr = 1   ! initalize index for recv buffer
56      icomm = mpi_comm_oce        ! shorter name
57      !
58      ! take care of optional parameters
59      !
60      ihls = nn_hls       ! default definition
61      IF( PRESENT( khls ) )   ihls = khls
62      IF( ihls > n_hlsmax ) THEN
63         WRITE(ctmp1,*) TRIM(cdname), '  is calling lbc_lnk with khls > n_hlsmax : ', khls, '>', n_hlsmax
64         CALL ctl_stop( 'STOP', ctmp1 )
65      ENDIF
66      IF( ipi /= Ni_0+2*ihls ) THEN
67         WRITE(ctmp1,*) TRIM(cdname), ' is calling lbc_lnk with an input array which does not match ihls along i: ', ipi, ihls, Ni_0
68         CALL ctl_stop( 'STOP', ctmp1 )
69      ENDIF
70      IF( ipj /= Nj_0+2*ihls ) THEN
71         WRITE(ctmp1,*) TRIM(cdname), ' is calling lbc_lnk with an input array which does not match ihls along j:', ipj, ihls , Nj_0
72         CALL ctl_stop( 'STOP', ctmp1 )
73      ENDIF
74      !
75      ll4only = .FALSE.    ! default definition
76      IF( PRESENT(ld4only) )   ll4only = ld4only
77      !
78      zland = 0._wp                                     ! land filling value: zero by default
79      IF( PRESENT( pfillval ) )   zland = pfillval      ! set land value
80      !
81      ! define llsend and llrecv: logicals which say if mpi-neibourgs for send or receive exist or not.
82      IF     ( PRESENT(lsend) .AND. PRESENT(lrecv) ) THEN   ! localy defined neighbourgs
83         llsend(1:4) = lsend(1:4)   ;   llrecv(1:4) = lrecv(1:4)
84      ELSE IF( PRESENT(lsend) .OR.  PRESENT(lrecv) ) THEN
85         WRITE(ctmp1,*) TRIM(cdname), '  is calling lbc_lnk with only one of the two arguments lsend or lrecv'
86         CALL ctl_stop( 'STOP', ctmp1 )
87      ELSE                                              ! default neighbours
88         llsend(:) = mpiSnei(ihls,:) >= 0
89         IF( ll4only )   llsend(5:8) = .FALSE.          ! exclude corners
90         llrecv(:) = mpiRnei(ihls,:) >= 0
91         IF( ll4only )   llrecv(5:8) = .FALSE.          ! exclude corners
92      ENDIF
93      !
94      ! define ifill: which method should be used to fill each parts (sides+corners) of the halos
95      ! default definition
96      DO jn = 1, 8
97         IF(             llrecv(jn) ) THEN   ;   ifill(jn) = jpfillmpi    ! with an mpi communication
98         ELSEIF(    l_SelfPerio(jn) ) THEN   ;   ifill(jn) = jpfillperio  ! with self-periodicity
99         ELSEIF( PRESENT(kfillmode) ) THEN   ;   ifill(jn) = kfillmode    ! localy defined
100         ELSE                                ;   ifill(jn) = jpfillcst    ! constant value (zland)
101         ENDIF
102      END DO
103      DO jn = 5, 8
104         IF(             llrecv(jn) ) THEN   ;   ifill(jn) = jpfillmpi    ! with an mpi communication
105         ELSE                                ;   ifill(jn) = jpfillnothing! do nothing
106         ENDIF
107      END DO
108      ! north fold treatment
109      ll_IdoNFold = l_IdoNFold .AND. ifill(jpno) /= jpfillnothing
110      IF( ll_IdoNFold ) THEN
111         ifill_nfd = ifill(jpno)             ! if we are here, this means llrecv(jpno) = .false. and l_SelfPerio(jpno) = .false.
112         ifill( (/jpno/) ) = jpfillnothing   ! we do north fold -> do nothing for northern halo
113      ENDIF
114     
115      ! We first define the localization and size of the parts of the array that will be sent (s), received (r)
116      ! or used for periodocity (p). The localization is defined as "the bottom left corner - 1" in i and j directions.
117      ! This is a shift that will be applied later in the do loops to pick-up the appropriate part of the array
118      !
119      ! all definitions bellow do not refer to N[ij][se]0 so we can use it with any local value of ihls
120      !                   !                       ________________________
121      ip0i =          0   !          im0j = inner |__|__|__________|__|__|
122      ip1i =       ihls   !   im1j = inner - halo |__|__|__________|__|__|
123      im1i = ipi-2*ihls   !                       |  |  |          |  |  |
124      im0i = ipi - ihls   !                       |  |  |          |  |  |
125      ip0j =          0   !                       |  |  |          |  |  |
126      ip1j =       ihls   !                       |__|__|__________|__|__|
127      im1j = ipj-2*ihls   !           ip1j = halo |__|__|__________|__|__|
128      im0j = ipj - ihls   !              ip0j = 0 |__|__|__________|__|__|
129      !                   !                    ip0i ip1i        im1i im0i
130      !
131      iwewe(:) = (/ jpwe,jpea,jpwe,jpea /)   ;   issnn(:) = (/ jpso,jpso,jpno,jpno /)
132      !     sides:     west  east south north      ;   corners: so-we, so-ea, no-we, no-ea
133      isizei(1:4) = (/ ihls, ihls, Ni_0, Ni_0 /)   ;   isizei(5:8) = ihls              ! i- count
134      isizej(1:4) = (/ Nj_0, Nj_0, ihls, ihls /)   ;   isizej(5:8) = ihls              ! j- count
135      ishtSi(1:4) = (/ ip1i, im1i, ip1i, ip1i /)   ;   ishtSi(5:8) = ishtSi( iwewe )   ! i- shift send data
136      ishtSj(1:4) = (/ ip1j, ip1j, ip1j, im1j /)   ;   ishtSj(5:8) = ishtSj( issnn )   ! j- shift send data
137      ishtRi(1:4) = (/ ip0i, im0i, ip1i, ip1i /)   ;   ishtRi(5:8) = ishtRi( iwewe )   ! i- shift received data location
138      ishtRj(1:4) = (/ ip1j, ip1j, ip0j, im0j /)   ;   ishtRj(5:8) = ishtRj( issnn )   ! j- shift received data location
139      ishtPi(1:4) = (/ im1i, ip1i, ip1i, ip1i /)   ;   ishtPi(5:8) = ishtPi( iwewe )   ! i- shift data used for periodicity
140      ishtPj(1:4) = (/ ip1j, ip1j, im1j, ip1j /)   ;   ishtPj(5:8) = ishtPj( issnn )   ! j- shift data used for periodicity
141      !
142      ! -------------------------------- !
143      !     2. Prepare MPI exchanges     !
144      ! -------------------------------- !
145      !
146      iStag = (/ 1, 2, 3, 4, 5, 6, 7, 8 /)   ! any value but each one must be different
147      ! define iRtag with the corresponding iStag, e.g. data received at west where sent at east.
148      iRtag(jpwe) = iStag(jpea)   ;   iRtag(jpea) = iStag(jpwe)   ;   iRtag(jpso) = iStag(jpno)   ;   iRtag(jpno) = iStag(jpso)
149      iRtag(jpsw) = iStag(jpne)   ;   iRtag(jpse) = iStag(jpnw)   ;   iRtag(jpnw) = iStag(jpse)   ;   iRtag(jpne) = iStag(jpsw)
150      !
151      ! Allocate local temporary arrays to be sent/received.
152      iszall(:) = isizei(:) * isizej(:) * ipk * ipl * ipf
153      ishtS(1) = 0
154      DO jn = 2, 8
155         ishtS(jn) = ishtS(jn-1) + iszall(jn-1) * COUNT( (/llsend(jn-1)/) )
156      END DO
157      ishtR(1) = 0
158      DO jn = 2, 8
159         ishtR(jn) = ishtR(jn-1) + iszall(jn-1) * COUNT( (/llrecv(jn-1)/) )
160      END DO
161     
162      ! Allocate buffer arrays to be sent/received if needed
163      iszS = SUM(iszall, mask = llsend)                             ! send buffer size
164      IF( ALLOCATED(BUFFSND) ) THEN
165         CALL mpi_waitall(8, nreq_p2p, MPI_STATUSES_IGNORE, ierr)   ! wait for Isend from the PREVIOUS call
166         IF( SIZE(BUFFSND) < iszS )    DEALLOCATE(BUFFSND)          ! send buffer is too small
167      ENDIF
168      IF( .NOT. ALLOCATED(BUFFSND) )   ALLOCATE( BUFFSND(iszS) )
169      iszR = SUM(iszall, mask = llrecv)                             ! recv buffer size
170      IF( ALLOCATED(BUFFRCV) ) THEN
171         IF( SIZE(BUFFRCV) < iszR )    DEALLOCATE(BUFFRCV)          ! recv buffer is too small
172      ENDIF
173      IF( .NOT. ALLOCATED(BUFFRCV) )   ALLOCATE( BUFFRCV(iszR) )
174      !
175      ! default definition when no communication is done. understood by mpi_waitall
176      nreq_p2p(:) = MPI_REQUEST_NULL   ! WARNING: Must be done after the call to mpi_waitall just above
177      ireq(:) = MPI_REQUEST_NULL
178      !
179      ! ------------------------------------------------------------- !
180      !     3. Do east, west, south and north MPI_Irecv if needed     !
181      ! ------------------------------------------------------------- !
182      !
183      DO jn = 1, 8 ! TODO maybe to 8 for corners
184#define BLOCK_IRECV
185#  include "lbc_lnk_pt2pt_async.h90"
186#undef BLOCK_IRECV
187      END DO
188
189      ! ------------------------------------------------------------- !
190      !     4. Compute whole domain                                   !
191      ! ------------------------------------------------------------- !
192      !
193      call loop_fct( 2, jpi-1 &
194                   , 2, jpj-1 & !
195                   , 1, jpkm1 & ! TODO check if always jpkm1
196                   )
197      !
198      ! ------------------------------------------------------------- !
199      !     3. Do east, west, south and north MPI_Isend if needed     !
200      ! ------------------------------------------------------------- !
201      !
202      DO jn = 1, 8
203#define BLOCK_ISEND
204#  include "lbc_lnk_pt2pt_async.h90"
205#undef BLOCK_ISEND
206      END DO
207
208      !
209      ! ----------------------------------- !
210      !     5. Fill east and west halos     !
211      ! ----------------------------------- !
212      !
213      DO jn = 1, 8
214#define BLOCK_FILL
215#  include "lbc_lnk_pt2pt_async.h90"
216#undef BLOCK_FILL
217      END DO
218      !
219      ! ------------------------------- !
220      !     7. north fold treatment     !
221      ! ------------------------------- !
222      !
223      ! Must be done after receiving data from East/West neighbourgs (as it is coded in mpp_nfd, to be changed one day...)
224      ! Do it after MPI_iSend to south/north neighbourgs so they won't wait (too much) to receive their data
225      ! Do if before MPI_Recv from south/north neighbourgs so we have more time to receive data
226      !
227      IF( ll_IdoNFold ) THEN
228         IF( jpni == 1 )  THEN   ;   CALL lbc_nfd( ptab, cd_nat, psgn                  , ihls, ipf )   ! self NFold
229         ELSE                    ;   CALL mpp_nfd( ptab, cd_nat, psgn, ifill_nfd, zland, ihls, ipf )   ! mpi  NFold
230         ENDIF
231      ENDIF
232      !
233      ! ------------------------------------- !
234      !     7. Fill south and north halos     !
235      ! ------------------------------------- !
236      !
237!       DO jn = 3, 4
238! #define BLOCK_FILL
239! #  include "lbc_lnk_pt2pt_async.h90"
240! #undef BLOCK_FILL
241!       END DO
242!!      !
243!!      ! ----------------------------------------------- !
244!!      !     8. Specific problem in corner treatment     !
245!!      !              ( very rate case... )              !
246!!      ! ----------------------------------------------- !
247!!      !
248!!      DO jn = 5, 8
249!!#define BLOCK_ISEND
250!!#  include "lbc_lnk_pt2pt_async.h90"
251!!#undef BLOCK_ISEND
252!!      END DO
253!!      DO jn = 5, 8
254!!#define BLOCK_FILL
255!!#  include "lbc_lnk_pt2pt_async.h90"
256!!#undef BLOCK_FILL
257!!      END DO
258      !
259      ! -------------------------------------------- !
260      !     9. deallocate local temporary arrays     !
261      !        if they areg larger than jpi*jpj      !  <- arbitrary max size...
262      ! -------------------------------------------- !
263      !
264      IF( iszR > jpi*jpj )   DEALLOCATE(BUFFRCV)                    ! blocking receive -> can directly deallocate
265      IF( iszS > jpi*jpj ) THEN
266         CALL mpi_waitall(8, nreq_p2p, MPI_STATUSES_IGNORE, ierr)   ! must wait before deallocate send buffer
267         DEALLOCATE(BUFFSND)
268      ENDIF
269      !
270   END SUBROUTINE lbc_lnk_async_/**/PRECISION
271#endif
272
273#if defined BLOCK_ISEND
274   IF( llsend(jn) ) THEN
275      ishti = ishtSi(jn)
276      ishtj = ishtSj(jn)
277      DO jf = 1, ipf  ;  DO jl = 1, ipl  ;  DO jk = 1, ipk  ;  DO jj = 1,isizej(jn)  ;  DO ji = 1,isizei(jn)
278         BUFFSND(idxs) = ptab(jf)%pt4d(ishti+ji,ishtj+jj,jk,jl)
279         idxs = idxs + 1
280      END DO   ;   END DO   ;   END DO   ;   END DO   ;   END DO
281#if ! defined key_mpi_off
282      IF( ln_timing ) CALL tic_tac(.TRUE.)
283      ! non-blocking send of the west/east side using local buffer
284      CALL MPI_ISEND( BUFFSND(ishtS(jn)+1), iszall(jn), MPI_TYPE, mpiSnei(ihls,jn), iStag(jn), icomm, nreq_p2p(jn), ierr )
285      IF( ln_timing ) CALL tic_tac(.FALSE.)
286#endif
287   ENDIF
288#endif
289
290#if defined BLOCK_IRECV
291   ishti = ishtRi(jn)
292   ishtj = ishtRj(jn)
293   SELECT CASE ( ifill(jn) )
294   CASE ( jpfillnothing )               ! no filling
295   CASE ( jpfillmpi   )                 ! fill with data received by MPI
296#if ! defined key_mpi_off
297      IF( ln_timing ) CALL tic_tac(.TRUE.)
298      CALL MPI_IRECV( BUFFRCV(ishtR(jn)+1), iszall(jn), MPI_TYPE, mpiRnei(ihls,jn), iRtag(jn), icomm, ireq(jn), ierr )
299      IF( ln_timing ) CALL tic_tac(.FALSE.)
300#endif
301END SELECT
302#endif
303
304#if defined BLOCK_FILL
305   ishti = ishtRi(jn)
306   ishtj = ishtRj(jn)
307   SELECT CASE ( ifill(jn) )
308   CASE ( jpfillnothing )               ! no filling
309   CASE ( jpfillmpi   )                 ! fill with data received by MPI
310#if ! defined key_mpi_off
311      IF( ln_timing ) CALL tic_tac(.TRUE.)
312      !                                 ! waiting receive of the west/east halo in local temporary arrays
313      call MPI_WAIT(ireq(jn), MPI_STATUS_IGNORE, ierr)
314      IF( ln_timing ) CALL tic_tac(.FALSE.)
315#endif
316      DO jf = 1, ipf  ;  DO jl = 1, ipl  ;  DO jk = 1, ipk  ;  DO jj = 1,isizej(jn)  ;  DO ji = 1,isizei(jn)
317         ptab(jf)%pt4d(ishti+ji,ishtj+jj,jk,jl) = BUFFRCV(idxr)
318         idxr = idxr + 1
319      END DO   ;   END DO   ;   END DO   ;   END DO   ;   END DO
320   CASE ( jpfillperio )                 ! use periodicity
321      ishti2 = ishtPi(jn)
322      ishtj2 = ishtPj(jn)
323      DO jf = 1, ipf  ;  DO jl = 1, ipl  ;  DO jk = 1, ipk  ;  DO jj = 1,isizej(jn)  ;  DO ji = 1,isizei(jn)
324         ptab(jf)%pt4d(ishti+ji,ishtj+jj,jk,jl) = ptab(jf)%pt4d(ishti2+ji,ishtj2+jj,jk,jl)
325      END DO   ;   END DO   ;   END DO   ;   END DO   ;   END DO
326   CASE ( jpfillcopy  )                 ! filling with inner domain values
327      ishti2 = ishtSi(jn)
328      ishtj2 = ishtSj(jn)
329      DO jf = 1, ipf  ;  DO jl = 1, ipl  ;  DO jk = 1, ipk  ;  DO jj = 1,isizej(jn)  ;  DO ji = 1,isizei(jn)
330         ptab(jf)%pt4d(ishti+ji,ishtj+jj,jk,jl) = ptab(jf)%pt4d(ishti2+ji,ishtj2+jj,jk,jl)
331      END DO   ;   END DO   ;   END DO   ;   END DO   ;   END DO
332   CASE ( jpfillcst   )                 ! filling with constant value
333      DO jf = 1, ipf  ;  DO jl = 1, ipl  ;  DO jk = 1, ipk  ;  DO jj = 1,isizej(jn)  ;  DO ji = 1,isizei(jn)
334         ptab(jf)%pt4d(ishti+ji,ishtj+jj,jk,jl) = zland
335      END DO   ;   END DO   ;   END DO   ;   END DO   ;   END DO
336   END SELECT
337#endif
Note: See TracBrowser for help on using the repository browser.