1 | ! |
---|
2 | ! !DESCRIPTION: |
---|
3 | ! |
---|
4 | !> Load balancing methods |
---|
5 | ! |
---|
6 | !> Take wall clock time before and after selected OASIS actions |
---|
7 | !> and calculate quantities to help evaluating coupled component |
---|
8 | !> load imbalance and other performance related quantities |
---|
9 | ! |
---|
10 | ! Available routines: |
---|
11 | ! oasis_lb_init initialise global timers |
---|
12 | ! oasis_lb_allocate allocates timers |
---|
13 | ! oasis_lb_define fill timers header |
---|
14 | ! oasis_lb_measure fill timers |
---|
15 | ! oasis_lb_print calculate and output load imbalance evaluation |
---|
16 | ! |
---|
17 | ! |
---|
18 | ! !REVISION HISTORY: |
---|
19 | ! |
---|
20 | ! Date Programmer Description |
---|
21 | ! ---------- ---------- ----------- |
---|
22 | ! 22.07.2020 E. Maisonnave created (based on mod_oasis_timer.F90 from M.H.) |
---|
23 | ! |
---|
24 | !---------------------------------------------------------------------- |
---|
25 | |
---|
26 | module mod_oasis_load_balancing |
---|
27 | |
---|
28 | use mod_oasis_kinds |
---|
29 | use mod_oasis_data |
---|
30 | use mod_oasis_sys |
---|
31 | USE mod_oasis_mpi |
---|
32 | USE netcdf |
---|
33 | |
---|
34 | implicit none |
---|
35 | private |
---|
36 | |
---|
37 | public oasis_lb_init |
---|
38 | public oasis_lb_allocate |
---|
39 | public oasis_lb_define |
---|
40 | public oasis_lb_measure |
---|
41 | public oasis_lb_print |
---|
42 | public oasis_lb_stop |
---|
43 | |
---|
44 | !> Storage for timer data |
---|
45 | type timeline_lb |
---|
46 | ! |
---|
47 | ! timer |
---|
48 | real(ip_single_p) :: timer |
---|
49 | ! |
---|
50 | ! kind of the measured quantity |
---|
51 | integer(kind=ip_i4_p) :: kind |
---|
52 | ! |
---|
53 | ! id of the field |
---|
54 | integer(kind=ip_i4_p) :: field |
---|
55 | ! |
---|
56 | ! id of the counterpart coupled component |
---|
57 | integer(kind=ip_i4_p) :: cntp |
---|
58 | ! |
---|
59 | ! id of the timestep (from msec) |
---|
60 | integer(kind=ip_i4_p) :: tsid |
---|
61 | ! |
---|
62 | end type timeline_lb |
---|
63 | |
---|
64 | ! This array stores the timeline : the dates of every coupling events |
---|
65 | ! and three informations related to the event : |
---|
66 | ! - its kind, |
---|
67 | ! - the coupling field ID (if applicable) |
---|
68 | ! - the counterpart model in the coupling exchange (if applicable) |
---|
69 | |
---|
70 | type(timeline_lb) , pointer :: local_timeline(:) |
---|
71 | |
---|
72 | |
---|
73 | |
---|
74 | ! ID flags : kind of coupling events |
---|
75 | integer(kind=ip_i4_p), parameter, public :: LB_PUT = 1 ! send coupling field |
---|
76 | integer(kind=ip_i4_p), parameter, public :: LB_GET = 2 ! receive coupling field |
---|
77 | integer(kind=ip_i4_p), parameter, public :: LB_MAP = 3 ! map or interpolate |
---|
78 | integer(kind=ip_i4_p), parameter, public :: LB_OUT = 4 ! output to netCDF file |
---|
79 | integer(kind=ip_i4_p), parameter, public :: LB_READ = 5 ! input from netCDF file |
---|
80 | integer(kind=ip_i4_p), parameter, public :: LB_RST = 6 ! restart writing |
---|
81 | integer(kind=ip_i4_p), parameter, public :: LB_TRN = 7 ! partial restart writing |
---|
82 | ! Put here the maximum of field related events |
---|
83 | integer(kind=ip_i4_p), parameter, public :: LB_MAX = 7 |
---|
84 | |
---|
85 | ! Operation performed during initialisation phase ( = 2 = LB_NB_INIT_TIMING ) |
---|
86 | integer(kind=ip_i4_p), parameter, public :: LB_PART = 8 ! partition declaration |
---|
87 | integer(kind=ip_i4_p), parameter, public :: LB_ENDF = 9 ! end of definition phase |
---|
88 | ! Put here the maximum of initialisation related events |
---|
89 | integer(kind=ip_i4_p), parameter :: LB_NB_INIT_TIMING = 2 |
---|
90 | |
---|
91 | integer(kind=ip_i4_p), parameter, public :: LB_TERM = 10 ! termination phase |
---|
92 | |
---|
93 | integer(kind=ip_i4_p), parameter, public :: LB_UNDF = 0 ! undefined kind of event |
---|
94 | |
---|
95 | |
---|
96 | |
---|
97 | ! These 3 arrays store information related to the coupling events associated to |
---|
98 | ! a component : event description arrays |
---|
99 | |
---|
100 | integer(kind=ip_i4_p) , pointer :: local_table_lb(:) ! ID of the couplig field/event |
---|
101 | integer(kind=ip_i4_p) , pointer :: local_coupler_lb(:) ! ID of the coupler |
---|
102 | integer(kind=ip_i4_p) , pointer :: local_counterpart_lb(:) ! ID of the counterpart model |
---|
103 | integer(kind=ip_i4_p) , pointer :: local_tsid_lb(:) ! ID of the timestep for each coupling event |
---|
104 | |
---|
105 | |
---|
106 | real(ip_double_p) :: dl_simu_wtimer(2) |
---|
107 | |
---|
108 | integer(kind=ip_i4_p) :: ievent |
---|
109 | logical :: ldo_lb_analysis = .TRUE. |
---|
110 | |
---|
111 | contains |
---|
112 | |
---|
113 | ! -------------------------------------------------------------------------------- |
---|
114 | |
---|
115 | !> Initializes the timer methods, called once in an application |
---|
116 | |
---|
117 | subroutine oasis_lb_init |
---|
118 | |
---|
119 | implicit none |
---|
120 | |
---|
121 | character(len=*),parameter :: subname = '(oasis_timer_init)' |
---|
122 | ! |
---|
123 | ! First of all, take measurement of simulation start |
---|
124 | ! |
---|
125 | dl_simu_wtimer(1) = MPI_WTIME() |
---|
126 | |
---|
127 | end subroutine oasis_lb_init |
---|
128 | |
---|
129 | ! -------------------------------------------------------------------------------- |
---|
130 | |
---|
131 | !> Allocate event related arrays |
---|
132 | ! |
---|
133 | ! Called by oasis_coupler_setup if coupled component, by oasis_enddef if not |
---|
134 | |
---|
135 | subroutine oasis_lb_allocate (nb_event) |
---|
136 | |
---|
137 | implicit none |
---|
138 | |
---|
139 | integer(kind=ip_i4_p) ,intent(in) :: nb_event ! number of exchanged fields |
---|
140 | |
---|
141 | integer(kind=ip_i4_p) :: n, ierror |
---|
142 | |
---|
143 | character(len=*),parameter :: subname = '(oasis_lb_allocate)' |
---|
144 | |
---|
145 | |
---|
146 | |
---|
147 | ! Add 2 events for partionning and end of definition phases |
---|
148 | |
---|
149 | ALLOCATE(local_coupler_lb(nb_event+LB_NB_INIT_TIMING+1), stat=ierror) |
---|
150 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
151 | mpi_rank_local,' WARNING allocate ' |
---|
152 | ALLOCATE(local_counterpart_lb(nb_event+LB_NB_INIT_TIMING+1), stat=ierror) |
---|
153 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
154 | mpi_rank_local,' WARNING allocate ' |
---|
155 | ALLOCATE(local_table_lb(nb_event+LB_NB_INIT_TIMING+1), stat=ierror) |
---|
156 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
157 | mpi_rank_local,' WARNING allocate ' |
---|
158 | ALLOCATE(local_tsid_lb(nb_event+LB_NB_INIT_TIMING+1), stat=ierror) |
---|
159 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
160 | mpi_rank_local,' WARNING allocate ' |
---|
161 | |
---|
162 | ! Initialise arrays with undef value |
---|
163 | ! They keep this value for events not related to exchanges (init) |
---|
164 | DO n = 1, LB_NB_INIT_TIMING + 1 |
---|
165 | |
---|
166 | local_coupler_lb(1:LB_NB_INIT_TIMING+1) = LB_UNDF |
---|
167 | local_counterpart_lb(1:LB_NB_INIT_TIMING+1) = LB_UNDF |
---|
168 | local_table_lb(1:LB_NB_INIT_TIMING+1) = LB_UNDF |
---|
169 | local_tsid_lb(1:LB_NB_INIT_TIMING+1) = LB_UNDF |
---|
170 | |
---|
171 | ENDDO |
---|
172 | |
---|
173 | ! The event number is initialised to the number of initialisation related events |
---|
174 | ! and doubled to store before and after event measurement |
---|
175 | ievent = ( LB_NB_INIT_TIMING + 1 ) * 2 |
---|
176 | |
---|
177 | ! In case of uncoupled component, LB_PART event is filled with dummy |
---|
178 | ! measures |
---|
179 | IF ( nb_event == 0 ) THEN |
---|
180 | IF (OASIS_Debug >= 2) & |
---|
181 | write(nulprt,*) subname,' coupled component including uncoupled processes only' |
---|
182 | ! launch fake partitionning measurement' |
---|
183 | CALL oasis_lb_measure(-1, LB_PART) |
---|
184 | CALL oasis_lb_measure(-1, LB_PART) |
---|
185 | ENDIF |
---|
186 | |
---|
187 | |
---|
188 | end subroutine oasis_lb_allocate |
---|
189 | |
---|
190 | ! -------------------------------------------------------------------------------- |
---|
191 | |
---|
192 | !> Define the size of the timer arrays |
---|
193 | ! |
---|
194 | ! Called by oasis_coupler_setup if coupled component |
---|
195 | ! Called for each coupling field |
---|
196 | ! |
---|
197 | |
---|
198 | subroutine oasis_lb_define (icpl, & |
---|
199 | ikind_id, & |
---|
200 | icpl_id, & |
---|
201 | imodelctn_id, & |
---|
202 | itsid_id, & |
---|
203 | icpl_nb, & |
---|
204 | lmap, & |
---|
205 | lout, & |
---|
206 | lrst, & |
---|
207 | ltrn) |
---|
208 | |
---|
209 | implicit none |
---|
210 | |
---|
211 | character(len=*),parameter :: subname = '(oasis_lb_define)' |
---|
212 | |
---|
213 | integer(kind=ip_i4_p) ,intent(in) :: icpl ! coupler index |
---|
214 | integer(kind=ip_i4_p) ,intent(in) :: ikind_id ! kind of main operation |
---|
215 | integer(kind=ip_i4_p) ,intent(in) :: icpl_id ! coupler id |
---|
216 | integer(kind=ip_i4_p) ,intent(in) :: imodelctn_id ! model counterpart id |
---|
217 | integer(kind=ip_i4_p) ,intent(in) :: itsid_id ! timestep id |
---|
218 | integer(kind=ip_i4_p) ,intent(in) :: icpl_nb ! nb of cpl time steps |
---|
219 | logical ,intent(in), optional :: lmap ! additional mapping |
---|
220 | logical ,intent(in), optional :: lout ! additional output |
---|
221 | logical ,intent(in), optional :: lrst ! additional restart |
---|
222 | logical ,intent(in), optional :: ltrn ! additional partial restart |
---|
223 | |
---|
224 | |
---|
225 | ! fill event description arrays after initialisation index (4) |
---|
226 | integer(kind=ip_i4_p), SAVE :: ne = 4 |
---|
227 | |
---|
228 | ! By default, coupling events do not come with interpolation, restarts writing |
---|
229 | ! or netCDF output |
---|
230 | logical :: llmap, llout, llrst, lltrn |
---|
231 | llmap = .FALSE. ; llout = .FALSE. ; llrst = .FALSE. ; lltrn = .FALSE. |
---|
232 | |
---|
233 | if (present(lmap)) llmap = lmap |
---|
234 | if (present(lout)) llout = lout |
---|
235 | if (present(lrst)) llrst = lrst |
---|
236 | if (present(ltrn)) lltrn = ltrn |
---|
237 | |
---|
238 | |
---|
239 | ! Fill coupling event description arrays as defined in oasis_coupler_setup |
---|
240 | local_table_lb(ne) = icpl |
---|
241 | local_coupler_lb(ne) = icpl_id |
---|
242 | local_counterpart_lb(ne) = imodelctn_id |
---|
243 | local_tsid_lb(ne) = itsid_id |
---|
244 | |
---|
245 | ! The event number is incremented by the number of active coupling |
---|
246 | ! exchanges related to the coupling field |
---|
247 | ievent = ievent + icpl_nb*2 |
---|
248 | |
---|
249 | ! The event number is incremented by the number of active coupling |
---|
250 | ! exchanges another time if interpolation |
---|
251 | IF ( llmap ) & |
---|
252 | ievent = ievent + icpl_nb*2 |
---|
253 | |
---|
254 | ! The event number is incremented by the number of active coupling |
---|
255 | ! exchanges another time if netCDF output in link with EXPOUT namcouple option |
---|
256 | IF ( llout .AND. ikind_id /= LB_OUT ) & |
---|
257 | ievent = ievent + icpl_nb*2 |
---|
258 | |
---|
259 | ! The event number is incremented by two if restart writing |
---|
260 | IF ( llrst ) THEN |
---|
261 | ievent = ievent + 2 |
---|
262 | ! Two additional events if restart and EXPOUT |
---|
263 | IF ( llout ) & |
---|
264 | ievent = ievent + 2 |
---|
265 | ENDIF |
---|
266 | |
---|
267 | |
---|
268 | ! Since there is no way to count the number |
---|
269 | ! of partial restart calls before the end of the run, we prefer to |
---|
270 | ! limit our measurement to the first one |
---|
271 | IF ( lltrn ) & |
---|
272 | ievent = ievent + 2 |
---|
273 | |
---|
274 | ne = ne + 1 |
---|
275 | |
---|
276 | IF (OASIS_Debug >= 10) & |
---|
277 | write(nulprt,*) subname,' New total of timeline events : ', ievent |
---|
278 | |
---|
279 | end subroutine oasis_lb_define |
---|
280 | |
---|
281 | !> Do measurement |
---|
282 | ! |
---|
283 | ! This routine is called before and after each coupling event, |
---|
284 | ! mainly in oasis_advance_run (coupling exchange events), |
---|
285 | ! but also in oasis_enddef, oasis_def_partition and oasis_terminate (via oasis_lb_print ) |
---|
286 | |
---|
287 | subroutine oasis_lb_measure(icoupler, ikind, tsid) |
---|
288 | |
---|
289 | implicit none |
---|
290 | character(len=*),parameter :: subname = '(oasis_lb_measure)' |
---|
291 | |
---|
292 | integer(kind=ip_i4_p) ,intent(in) :: icoupler ! coupler ID |
---|
293 | integer(kind=ip_i4_p) ,intent(in) :: ikind ! kind of event |
---|
294 | |
---|
295 | integer(kind=ip_i4_p) ,intent(in) ,optional :: tsid ! timestep ID |
---|
296 | |
---|
297 | integer(kind=ip_i4_p) :: eventid, ierror |
---|
298 | integer(kind=ip_i4_p), SAVE :: event_index = 1 ! Timeline array index |
---|
299 | |
---|
300 | !Timestep id |
---|
301 | integer(kind=ip_i4_p) :: itsid |
---|
302 | |
---|
303 | ! Special array to temporarily store timings of initialisation events |
---|
304 | real(ip_single_p), SAVE :: r_init_timers(LB_NB_INIT_TIMING*2) |
---|
305 | |
---|
306 | real(ip_single_p) :: dmeas ! measurement |
---|
307 | |
---|
308 | ! kind of timing : TRUE = before event, FALSE = after event |
---|
309 | logical, SAVE :: tic_tac = .TRUE. |
---|
310 | |
---|
311 | ! Logical made necessary to identify and limit partial restart writing |
---|
312 | ! event measurement |
---|
313 | logical, SAVE :: over_partial_restart = .FALSE. |
---|
314 | |
---|
315 | ! If itsid is not provided, put the default value |
---|
316 | IF (present(tsid)) then |
---|
317 | itsid = tsid |
---|
318 | else |
---|
319 | itsid = ispval |
---|
320 | endif |
---|
321 | |
---|
322 | ! Termination of LB_ENDF can be called twice (restart case) |
---|
323 | ! Skip the second one |
---|
324 | IF ( ikind == LB_ENDF .AND. event_index >= ( LB_NB_INIT_TIMING*2 + 1 )) & |
---|
325 | return |
---|
326 | |
---|
327 | ! Special recording for init operations |
---|
328 | IF ( ikind == LB_ENDF .OR. ikind == LB_PART ) THEN |
---|
329 | r_init_timers(event_index) = MPI_WTIME() - dl_simu_wtimer(1) |
---|
330 | event_index = event_index + 1 |
---|
331 | tic_tac = .NOT. tic_tac |
---|
332 | return |
---|
333 | ENDIF |
---|
334 | |
---|
335 | ! Special treatment during the first event after intialisation events |
---|
336 | IF ( event_index == ( LB_NB_INIT_TIMING*2 + 1 ) ) THEN |
---|
337 | |
---|
338 | ! Allocate the array to store the timeline local to the MPI process |
---|
339 | ALLOCATE(local_timeline(ievent), stat=ierror) |
---|
340 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
341 | mpi_rank_local,' WARNING allocate ' |
---|
342 | |
---|
343 | ! ensure zero values if irregulat event number across processes |
---|
344 | local_timeline(:)%kind = LB_UNDF ; local_timeline(:)%field = LB_UNDF |
---|
345 | local_timeline(:)%cntp = LB_UNDF ; local_timeline(:)%timer = 0. |
---|
346 | local_timeline(:)%tsid = ispval |
---|
347 | |
---|
348 | ! Fill the timeline array with initialisation measurements |
---|
349 | local_timeline(1:4)%timer = r_init_timers(1:4) |
---|
350 | local_timeline(1:2)%kind = LB_PART |
---|
351 | local_timeline(3:4)%kind = LB_ENDF |
---|
352 | local_timeline(1:4)%field = LB_UNDF |
---|
353 | local_timeline(1:4)%cntp = LB_UNDF |
---|
354 | local_timeline(1:4)%tsid = ispval |
---|
355 | |
---|
356 | ENDIF |
---|
357 | |
---|
358 | IF ( event_index > ievent ) THEN |
---|
359 | write(nulprt,*) subname,'Warning: Event after limit ' |
---|
360 | call flush(nulprt) |
---|
361 | return |
---|
362 | ENDIF |
---|
363 | |
---|
364 | ! Detect additional partial restart operations |
---|
365 | IF ( ikind == LB_TRN .AND. over_partial_restart ) return |
---|
366 | |
---|
367 | ! Warn that partial restart writing measurement is over |
---|
368 | IF ( ikind == LB_TRN .AND. .NOT. tic_tac ) over_partial_restart = .TRUE. |
---|
369 | |
---|
370 | ! Take measurement |
---|
371 | dmeas = MPI_WTIME() - dl_simu_wtimer(1) |
---|
372 | |
---|
373 | ! Find the event ID in event description tables related to the coupler |
---|
374 | ! ID of the event |
---|
375 | IF ( ikind == LB_TERM ) THEN |
---|
376 | eventid = 3 |
---|
377 | ELSE |
---|
378 | eventid = 1 |
---|
379 | DO WHILE ( local_table_lb(eventid) /= icoupler ) |
---|
380 | eventid = eventid + 1 |
---|
381 | ENDDO |
---|
382 | ENDIF |
---|
383 | |
---|
384 | IF (OASIS_Debug >= 10) THEN |
---|
385 | write(nulprt,*) subname,' event index, coupler, kind, timestep_id ', & |
---|
386 | event_index, icoupler, ikind, itsid |
---|
387 | call flush(nulprt) |
---|
388 | ENDIF |
---|
389 | |
---|
390 | ! Fill the timeline array |
---|
391 | ! - with timings |
---|
392 | local_timeline(event_index)%timer = dmeas |
---|
393 | ! - with the kind of event as specified during the routine call |
---|
394 | local_timeline(event_index)%kind = ikind |
---|
395 | ! - with the field ID associated to the event ID |
---|
396 | local_timeline(event_index)%field = local_coupler_lb(eventid) |
---|
397 | ! - with the counterpart model ID associated to the event ID |
---|
398 | local_timeline(event_index)%cntp = local_counterpart_lb(eventid) |
---|
399 | ! - with the timestep ID associated to the event ID |
---|
400 | local_timeline(event_index)%tsid = itsid |
---|
401 | |
---|
402 | ! Increment timeline array index |
---|
403 | event_index = event_index + 1 |
---|
404 | |
---|
405 | ! Change kind of timing |
---|
406 | tic_tac = .NOT. tic_tac |
---|
407 | |
---|
408 | end subroutine oasis_lb_measure |
---|
409 | |
---|
410 | !> Print timers |
---|
411 | ! |
---|
412 | ! Called once at oasis_termination phase |
---|
413 | ! |
---|
414 | |
---|
415 | subroutine oasis_lb_print(component_name, simutime) |
---|
416 | |
---|
417 | implicit none |
---|
418 | |
---|
419 | character(len=*) ,intent(in) :: component_name |
---|
420 | integer(kind=ip_i4_p) ,intent(in) :: simutime ! simulated time |
---|
421 | |
---|
422 | character(len=*),parameter :: subname = '(oasis_lb_print)' |
---|
423 | character(ic_med) :: filename |
---|
424 | |
---|
425 | logical :: lopened, l_first_event, l_second_event |
---|
426 | |
---|
427 | integer(ip_i4_p),parameter :: diag_nb=100 |
---|
428 | integer(ip_i4_p) :: n, ierror, nf, nk, ne, np |
---|
429 | |
---|
430 | integer(ip_i4_p) :: ncid |
---|
431 | integer(ip_i4_p) :: ncvarid(6), ncdimid(2) |
---|
432 | |
---|
433 | integer(ip_i4_p) :: lb_tag = 4000 |
---|
434 | integer(ip_i4_p) :: max_field_nb |
---|
435 | integer(ip_i4_p) :: lastevent |
---|
436 | |
---|
437 | integer(ip_i4_p), allocatable :: pair_field_cntp(:) |
---|
438 | integer(ip_i4_p), allocatable :: ig_globSize(:) |
---|
439 | integer(ip_i4_p), allocatable :: write_timeline(:) |
---|
440 | |
---|
441 | real(kind=ip_double_p) :: null_r, dlb_time |
---|
442 | real(kind=ip_double_p) :: soonest_start, latest_stop, clock_spread |
---|
443 | |
---|
444 | real(kind=ip_single_p) :: timer_max |
---|
445 | real(kind=ip_single_p) :: bk_ev_min, bk_ev_max |
---|
446 | real(kind=ip_single_p) :: wtg_time, cmp_time |
---|
447 | real(kind=ip_single_p) :: lb_diag_buff(diag_nb) |
---|
448 | real(kind=ip_single_p) :: bk_ev_tmp(2) |
---|
449 | |
---|
450 | real(kind=ip_double_p), allocatable :: dg_simu_wtimer(:) |
---|
451 | real(kind=ip_double_p), allocatable :: g_lb_diag_buff(:,:) |
---|
452 | |
---|
453 | real(kind=ip_single_p), allocatable :: bk_avg(:) |
---|
454 | real(kind=ip_single_p), allocatable :: bk_minval(:), bk_maxval(:) |
---|
455 | real(kind=ip_single_p), pointer :: tl_global_timer(:,:) |
---|
456 | real(kind=ip_single_p), allocatable :: tl_sum_kind (:,:,:) |
---|
457 | |
---|
458 | logical :: lcontinue |
---|
459 | ! |
---|
460 | ! |
---|
461 | ! Make measurement of the time spend waiting the beginning of the |
---|
462 | ! LB analysis phase + the first step of this analysis. |
---|
463 | ! The analysis is synchronised with the oasis_mpi_barrier statement |
---|
464 | CALL oasis_lb_measure(-1,LB_TERM) |
---|
465 | |
---|
466 | call oasis_mpi_barrier(mpi_comm_global) |
---|
467 | |
---|
468 | ! Take performance measurement of the load balancing analysis itself |
---|
469 | ! and exclude this time from the total simulation time |
---|
470 | ! |
---|
471 | ! Done by all components, including uncoupled |
---|
472 | ! |
---|
473 | |
---|
474 | dlb_time = MPI_WTIME() |
---|
475 | |
---|
476 | dl_simu_wtimer(2) = dlb_time |
---|
477 | |
---|
478 | ! Check if the analysis is possible |
---|
479 | call MPI_AllReduce(ldo_lb_analysis, lcontinue, 1, MPI_LOGICAL, MPI_LAND, & |
---|
480 | mpi_comm_global, ierror) |
---|
481 | |
---|
482 | IF (.NOT. lcontinue ) THEN |
---|
483 | WRITE(nulprt,*) subname, ' WARNING: Load balancing analysis impossible ' |
---|
484 | return |
---|
485 | ENDIF |
---|
486 | |
---|
487 | ! ---------- |
---|
488 | ! FIRST STEP |
---|
489 | ! ---------- |
---|
490 | ! Evaluate total simulation time |
---|
491 | ! for the coupled system and all components separately |
---|
492 | |
---|
493 | ! |
---|
494 | IF (mpi_rank_global /= 0) THEN |
---|
495 | |
---|
496 | CALL MPI_Gather( dl_simu_wtimer, 2, MPI_DOUBLE, null_r, 0, MPI_DOUBLE, & |
---|
497 | 0, mpi_comm_global, ierror) |
---|
498 | |
---|
499 | ELSE |
---|
500 | |
---|
501 | ALLOCATE(dg_simu_wtimer(2*mpi_size_global), stat=ierror) |
---|
502 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
503 | mpi_rank_local,' WARNING allocating total timer' |
---|
504 | |
---|
505 | dg_simu_wtimer = 0._ip_double_p |
---|
506 | |
---|
507 | ! All processes send the total simulation time to the master |
---|
508 | CALL MPI_Gather( dl_simu_wtimer, 2, MPI_DOUBLE, dg_simu_wtimer, 2, MPI_DOUBLE, & |
---|
509 | 0, mpi_comm_global, ierror) |
---|
510 | |
---|
511 | ! The LB analysis is output on a text file |
---|
512 | INQUIRE(unit=nulet, opened=lopened) |
---|
513 | IF ( lopened ) CLOSE(nulet) |
---|
514 | OPEN(nulet, file='load_balancing_info.txt', status='REPLACE') |
---|
515 | |
---|
516 | ! |
---|
517 | !soonest_start = MINVAL(dg_simu_wtimer(1:mpi_size_global*2-1:2)) |
---|
518 | !latest_stop = MAXVAL(dg_simu_wtimer(2:mpi_size_global*2:2)) |
---|
519 | soonest_start = MINVAL(dg_simu_wtimer, mask = dg_simu_wtimer > 0._ip_double_p ) |
---|
520 | latest_stop = MAXVAL(dg_simu_wtimer, mask = dg_simu_wtimer > 0._ip_double_p ) |
---|
521 | |
---|
522 | ! |
---|
523 | ! Check clock synchronisation |
---|
524 | ! |
---|
525 | clock_spread = latest_stop - MINVAL(dg_simu_wtimer(2:mpi_size_global*2:2)) |
---|
526 | |
---|
527 | WRITE(nulet,*) ' ' |
---|
528 | WRITE(nulet,*) ' ------------------------------' |
---|
529 | WRITE(nulet,*) ' ' |
---|
530 | WRITE(nulet,'(a37,f8.3)') ' Coupled model simulation time (s): ', latest_stop-soonest_start |
---|
531 | WRITE(nulet,*) ' (coupled components only) ' |
---|
532 | WRITE(nulet,*) ' ' |
---|
533 | WRITE(nulet,*) ' ------------------------------' |
---|
534 | WRITE(nulet,*) ' ' |
---|
535 | WRITE(nulet,'(a16,f10.3)') ' Speed (SYPD) : ', DBLE(simutime) * 365. / ( latest_stop-soonest_start ) |
---|
536 | WRITE(nulet,'(a16,f8.3)') ' Cost (CHPSY) : ', DBLE(mpi_size_world) * 24 * ( latest_stop-soonest_start ) / ( DBLE(simutime) * 365. ) |
---|
537 | WRITE(nulet,*) ' ' |
---|
538 | |
---|
539 | |
---|
540 | ! The same anaysis is given per component |
---|
541 | n = 1 |
---|
542 | |
---|
543 | DO nf = 1, prism_amodels |
---|
544 | |
---|
545 | soonest_start = latest_stop |
---|
546 | latest_stop = 0. |
---|
547 | ne = 1 |
---|
548 | |
---|
549 | DO np = 1, mpi_size_world |
---|
550 | IF ( TRIM(compnmlist(np)) == TRIM(prism_modnam(nf)) ) THEN |
---|
551 | soonest_start = MIN(dg_simu_wtimer(n),soonest_start) |
---|
552 | latest_stop = MAX(dg_simu_wtimer(n+1),latest_stop) |
---|
553 | n = n + 2 |
---|
554 | ne = ne + 1 |
---|
555 | ENDIF |
---|
556 | ENDDO |
---|
557 | |
---|
558 | WRITE(nulet,'(A7,A10,A19,F8.3)') ' Model ', TRIM(prism_modnam(nf)),& |
---|
559 | ' simulation time : ', latest_stop-soonest_start |
---|
560 | WRITE(nulet,'(A32,F8.3)') ' cost (CHPSY): ', & |
---|
561 | DBLE(ne) * 24 * ( latest_stop-soonest_start ) / ( DBLE(simutime) * 365. ) |
---|
562 | |
---|
563 | ENDDO |
---|
564 | |
---|
565 | DEALLOCATE(dg_simu_wtimer) |
---|
566 | |
---|
567 | ENDIF |
---|
568 | |
---|
569 | |
---|
570 | |
---|
571 | |
---|
572 | |
---|
573 | ! ---------- |
---|
574 | ! SECOND STEP |
---|
575 | ! ---------- |
---|
576 | ! Gather timers information and produce one timeline per component |
---|
577 | |
---|
578 | |
---|
579 | |
---|
580 | |
---|
581 | ! "before" termination phase measurement |
---|
582 | if (ET_debug) CALL oasis_lb_measure(-1,LB_TERM) |
---|
583 | |
---|
584 | DEALLOCATE( local_table_lb, local_coupler_lb, local_counterpart_lb, local_tsid_lb ) |
---|
585 | |
---|
586 | IF (OASIS_Debug >= 10) THEN |
---|
587 | write(nulprt,*) subname,' gather size (events) ', ievent |
---|
588 | write(nulprt,*) subname,' communicator size ', mpi_size_local |
---|
589 | call flush(nulprt) |
---|
590 | ENDIF |
---|
591 | |
---|
592 | IF ( mpi_rank_local /= 0 ) THEN |
---|
593 | |
---|
594 | CALL MPI_Gather(ievent, 1, MPI_INT, n, 0, MPI_INT, & |
---|
595 | 0, mpi_comm_local, ierror) |
---|
596 | |
---|
597 | CALL MPI_Gather( local_timeline(:)%timer, ievent, MPI_REAL, & |
---|
598 | null_r, 0, MPI_REAL, 0, mpi_comm_local, ierror) |
---|
599 | |
---|
600 | ELSE |
---|
601 | |
---|
602 | ! to avoid unexplained error with PGI compiler |
---|
603 | ALLOCATE(write_timeline(ievent/2), stat=ierror) |
---|
604 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
605 | mpi_rank_local,' WARNING allocating tmp array ' |
---|
606 | |
---|
607 | ALLOCATE(ig_globSize(mpi_size_local), stat=ierror) |
---|
608 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
609 | mpi_rank_local,' WARNING allocating table size ' |
---|
610 | |
---|
611 | ! Every process of every component is sending the size of its |
---|
612 | ! timeline to the component master process |
---|
613 | CALL MPI_Gather(ievent, 1, MPI_INT, ig_globSize, 1, MPI_INT, & |
---|
614 | 0, mpi_comm_local, ierror) |
---|
615 | |
---|
616 | IF (OASIS_Debug >= 10) THEN |
---|
617 | write(nulprt,*) subname,' check local size of timeline ', ig_globSize |
---|
618 | call flush(nulprt) |
---|
619 | ENDIF |
---|
620 | |
---|
621 | ! Check that there is no block. |
---|
622 | ! The current version of our LB analysis does not accept incoherent |
---|
623 | ! partitionning in the same component |
---|
624 | DO n = 1, mpi_rank_local |
---|
625 | IF ( ig_globSize(n) /= ievent ) THEN |
---|
626 | write(nulprt,*) subname, ' Non identical number of events across processes of the same component. We stop ' |
---|
627 | call oasis_abort(file=__FILE__,line=__LINE__) |
---|
628 | ENDIF |
---|
629 | ENDDO |
---|
630 | |
---|
631 | DEALLOCATE(ig_globSize) |
---|
632 | |
---|
633 | ! Allocate the timeline array that gathers all process timelines of |
---|
634 | ! the same component. |
---|
635 | ! This array is the memory bound of the LB analysis : |
---|
636 | ! size = DOUBLE * nb_events * nb_process_per_component |
---|
637 | WRITE(nulprt,'(a16,a42,f13.3)') subname,' Main allocation for timeline. Size (KB): ', & |
---|
638 | DBLE(ievent*mpi_size_local*8)/1000. |
---|
639 | |
---|
640 | call flush(nulprt) |
---|
641 | |
---|
642 | |
---|
643 | ALLOCATE ( tl_global_timer(ievent,mpi_size_local), stat=ierror ) |
---|
644 | IF (ierror /= 0) then |
---|
645 | WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
646 | mpi_rank_local,' ERROR allocating component timeline ' |
---|
647 | call oasis_abort(file=__FILE__,line=__LINE__) |
---|
648 | ENDIF |
---|
649 | |
---|
650 | ! Every process of every component is sending its timeline to the |
---|
651 | ! component master process |
---|
652 | CALL MPI_Gather( local_timeline(:)%timer, ievent, MPI_REAL, & |
---|
653 | tl_global_timer, ievent, MPI_REAL, & |
---|
654 | 0, mpi_comm_local, ierror) |
---|
655 | |
---|
656 | |
---|
657 | ! Creation of the netCDF file to save timelines |
---|
658 | |
---|
659 | write(filename,'(a9,a,a3)') 'timeline_',TRIM(component_name), '.nc' |
---|
660 | ierror = nf90_create(trim(filename),NF90_CLOBBER,ncid) |
---|
661 | IF (OASIS_Debug >= 2) THEN |
---|
662 | write(nulprt,*) subname,' open netcdf file ', ierror; call flush(nulprt) |
---|
663 | ENDIF |
---|
664 | |
---|
665 | ierror = nf90_put_att(ncid, NF90_GLOBAL, 'source', & |
---|
666 | "OASIS coupler instrumented for load balancing analysis") |
---|
667 | ierror = nf90_put_att(ncid, NF90_GLOBAL, 'title', & |
---|
668 | "OASIS event (nx) timeline on every MPI process (ny) ") |
---|
669 | ierror = nf90_put_att(ncid, NF90_GLOBAL, 'component_id', compid ) |
---|
670 | |
---|
671 | IF (OASIS_Debug >= 10) THEN |
---|
672 | WRITE(nulprt,*) subname,' global timeline (max 100,100)', & |
---|
673 | tl_global_timer(1:MIN(100,ievent),1:MIN(100,mpi_size_local)) |
---|
674 | call flush(nulprt) |
---|
675 | ENDIF |
---|
676 | |
---|
677 | ! Remove from the netcdf file non attributed events |
---|
678 | IF ( ANY ( local_timeline(:)%kind == LB_TERM ) ) THEN |
---|
679 | lastevent = 1 |
---|
680 | DO WHILE ( local_timeline(lastevent)%kind /= LB_TERM ) |
---|
681 | lastevent = lastevent + 1 |
---|
682 | ENDDO |
---|
683 | ievent = MIN ( ievent, lastevent+1 ) |
---|
684 | ENDIF |
---|
685 | |
---|
686 | IF (OASIS_Debug >= 10) & |
---|
687 | WRITE(nulprt,*) subname,' Final timeline length ', ievent |
---|
688 | |
---|
689 | ierror = nf90_def_dim(ncid,'nx',ievent/2,ncdimid(1)) |
---|
690 | IF (OASIS_Debug >= 2) THEN |
---|
691 | write(nulprt,*) subname,' define netcdf dim ', ierror ; call flush(nulprt) |
---|
692 | ENDIF |
---|
693 | |
---|
694 | ierror = nf90_def_dim(ncid,'ny',mpi_size_local,ncdimid(2)) |
---|
695 | IF (OASIS_Debug >= 2) THEN |
---|
696 | write(nulprt,*) subname,' define netcdf dim ', ierror ; call flush(nulprt) |
---|
697 | ENDIF |
---|
698 | |
---|
699 | ierror = nf90_def_var(ncid,'timer_strt',NF90_FLOAT,ncdimid(:),ncvarid(1)) |
---|
700 | IF (OASIS_Debug >= 2) THEN |
---|
701 | write(nulprt,*) subname,' define netcdf var ', ierror ; call flush(nulprt) |
---|
702 | ENDIF |
---|
703 | ierror = nf90_put_att(ncid, ncvarid(1), 'long_name', "Start of OASIS event") |
---|
704 | ierror = nf90_put_att(ncid, ncvarid(1), 'units', "seconds since OASIS initialisation") |
---|
705 | |
---|
706 | ierror = nf90_def_var(ncid,'timer_stop',NF90_FLOAT,ncdimid(:),ncvarid(2)) |
---|
707 | IF (OASIS_Debug >= 2) THEN |
---|
708 | write(nulprt,*) subname,' define netcdf var ', ierror ; call flush(nulprt) |
---|
709 | ENDIF |
---|
710 | ierror = nf90_put_att(ncid, ncvarid(2), 'long_name', "End of OASIS event") |
---|
711 | ierror = nf90_put_att(ncid, ncvarid(2), 'units', "seconds since OASIS initialisation") |
---|
712 | |
---|
713 | ierror = nf90_def_var(ncid,'kind',NF90_INT,ncdimid(1),ncvarid(3)) |
---|
714 | IF (OASIS_Debug >= 2) THEN |
---|
715 | write(nulprt,*) subname,' define netcdf var ', ierror ; call flush(nulprt) |
---|
716 | ENDIF |
---|
717 | ierror = nf90_put_att(ncid, ncvarid(3), 'long_name', "Kind of OASIS event") |
---|
718 | ierror = nf90_put_att(ncid, ncvarid(3), 'standard_name', "Kind") |
---|
719 | ierror = nf90_put_att(ncid, ncvarid(3), 'flag_values', & |
---|
720 | "0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10") |
---|
721 | ierror = nf90_put_att(ncid, ncvarid(3), 'flag_meanings', & |
---|
722 | "UNDF PUT GET MAP OUT READ RST TRN PART ENDF TERM") |
---|
723 | ierror = nf90_put_att(ncid, ncvarid(3), 'comment', & |
---|
724 | "undefined send receive map file_output file_input restart_writing partial_restart_writing partition_def end_def terminate") |
---|
725 | |
---|
726 | |
---|
727 | ierror = nf90_def_var(ncid,'field',NF90_INT,ncdimid(1),ncvarid(4)) |
---|
728 | IF (OASIS_Debug >= 2) THEN |
---|
729 | write(nulprt,*) subname,' define netcdf var ', ierror ; call flush(nulprt) |
---|
730 | ENDIF |
---|
731 | ierror = nf90_put_att(ncid, ncvarid(4), 'long_name', "OASIS field name ID") |
---|
732 | ierror = nf90_put_att(ncid, ncvarid(4), 'standard_name', "Field") |
---|
733 | ierror = nf90_put_att(ncid, ncvarid(4), 'comment', "Sequence follows namcouple order ") |
---|
734 | |
---|
735 | ierror = nf90_def_var(ncid,'component',NF90_INT,ncdimid(1),ncvarid(5)) |
---|
736 | IF (OASIS_Debug >= 2) THEN |
---|
737 | write(nulprt,*) subname,' define netcdf var ', ierror ; call flush(nulprt) |
---|
738 | ENDIF |
---|
739 | ierror = nf90_put_att(ncid, ncvarid(5), 'long_name', "Counterpart coupled component ID") |
---|
740 | ierror = nf90_put_att(ncid, ncvarid(5), 'standard_name', "Component") |
---|
741 | ierror = nf90_put_att(ncid, ncvarid(5), 'comment', "Sequence follows component MPI rank in global communicator ") |
---|
742 | |
---|
743 | ierror = nf90_def_var(ncid,'tsid',NF90_INT,ncdimid(1),ncvarid(6)) |
---|
744 | ierror = nf90_def_var_fill(ncid, ncvarid(6), 0, ispval) |
---|
745 | IF (OASIS_Debug >= 2) THEN |
---|
746 | write(nulprt,*) subname,' define netcdf var ', ierror ; call flush(nulprt) |
---|
747 | ENDIF |
---|
748 | ierror = nf90_put_att(ncid, ncvarid(6), 'long_name', "Timestep ID (simulated time of compoenent calling Oasis) at this event ") |
---|
749 | ierror = nf90_put_att(ncid, ncvarid(6), 'standard_name', "tsid") |
---|
750 | ierror = nf90_put_att(ncid, ncvarid(6), 'units', "simulated time (seconds)") |
---|
751 | |
---|
752 | ierror = nf90_enddef(ncid) |
---|
753 | IF (OASIS_Debug >= 2) THEN |
---|
754 | write(nulprt,*) subname,' close netcdf definition ', ierror ; call flush(nulprt) |
---|
755 | ENDIF |
---|
756 | |
---|
757 | ! before/after event timings are saved in separate variables |
---|
758 | ierror = nf90_put_var(ncid,ncvarid(1),& |
---|
759 | tl_global_timer(1:ievent-1:2,1:mpi_size_local)) |
---|
760 | IF (OASIS_Debug >= 2) THEN |
---|
761 | write(nulprt,*) subname,' put netcdf var timeline', ierror ; call flush(nulprt) |
---|
762 | ENDIF |
---|
763 | |
---|
764 | ierror = nf90_put_var(ncid,ncvarid(2),& |
---|
765 | tl_global_timer(2:ievent:2,1:mpi_size_local)) |
---|
766 | IF (OASIS_Debug >= 2) THEN |
---|
767 | write(nulprt,*) subname,' put netcdf var timeline', ierror ; call flush(nulprt) |
---|
768 | ENDIF |
---|
769 | |
---|
770 | |
---|
771 | IF (OASIS_Debug >= 10) THEN |
---|
772 | write(nulprt,*) subname, 'check kind values', local_timeline(:)%kind |
---|
773 | write(nulprt,*) subname, 'check field values', local_timeline(:)%field |
---|
774 | write(nulprt,*) subname, 'check counterpart model values', local_timeline(:)%cntp |
---|
775 | write(nulprt,*) subname, 'check timestep ID values', local_timeline(:)%tsid |
---|
776 | call flush(nulprt) |
---|
777 | ENDIF |
---|
778 | |
---|
779 | ! only "before" event characteristics are saved ("after" have the same) |
---|
780 | write_timeline(1:ievent/2) = local_timeline(1:ievent-1:2)%kind |
---|
781 | ierror = nf90_put_var(ncid,ncvarid(3),write_timeline(1:ievent/2)) |
---|
782 | IF (OASIS_Debug >= 2) THEN |
---|
783 | write(nulprt,*) subname,' put netcdf var kind', ierror ; call flush(nulprt) |
---|
784 | ENDIF |
---|
785 | |
---|
786 | write_timeline(1:ievent/2) = local_timeline(1:ievent-1:2)%field |
---|
787 | ierror = nf90_put_var(ncid,ncvarid(4),write_timeline(1:ievent/2)) |
---|
788 | IF (OASIS_Debug >= 2) THEN |
---|
789 | write(nulprt,*) subname,' put netcdf var field', ierror ; call flush(nulprt) |
---|
790 | ENDIF |
---|
791 | |
---|
792 | write_timeline(1:ievent/2) = local_timeline(1:ievent-1:2)%cntp |
---|
793 | ierror = nf90_put_var(ncid,ncvarid(5),write_timeline(1:ievent/2)) |
---|
794 | IF (OASIS_Debug >= 2) THEN |
---|
795 | write(nulprt,*) subname,' put netcdf var counterpart model', ierror ; call flush(nulprt) |
---|
796 | ENDIF |
---|
797 | |
---|
798 | write_timeline(1:ievent/2) = local_timeline(1:ievent-1:2)%tsid |
---|
799 | ierror = nf90_put_var(ncid,ncvarid(6),write_timeline(1:ievent/2)) |
---|
800 | IF (OASIS_Debug >= 2) THEN |
---|
801 | write(nulprt,*) subname,' put netcdf var tsid', ierror ; call flush(nulprt) |
---|
802 | ENDIF |
---|
803 | |
---|
804 | DEALLOCATE(write_timeline) |
---|
805 | |
---|
806 | ierror = nf90_close(ncid) |
---|
807 | IF (OASIS_Debug >= 2) THEN |
---|
808 | write(nulprt,*) subname,' close netcdf file ', ierror ; call flush(nulprt) |
---|
809 | ENDIF |
---|
810 | |
---|
811 | |
---|
812 | |
---|
813 | |
---|
814 | |
---|
815 | ! ---------- |
---|
816 | ! THIRD STEP |
---|
817 | ! ---------- |
---|
818 | ! Produce summary numbers per component |
---|
819 | |
---|
820 | |
---|
821 | |
---|
822 | max_field_nb = MAXVAL(local_timeline(:)%field) |
---|
823 | |
---|
824 | ! if all component processes are coupled but dont exchange fields |
---|
825 | IF ( max_field_nb <= 0 ) THEN |
---|
826 | wtg_time = 0. ; cmp_time = 0. |
---|
827 | bk_ev_max = 1. ; bk_ev_min = 0. |
---|
828 | ALLOCATE( tl_sum_kind (1, LB_MAX, 3) , stat=ierror) |
---|
829 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
830 | mpi_rank_local,' WARNING allocating working buffer ' |
---|
831 | tl_sum_kind (1, LB_MAX, 3) = 0. |
---|
832 | |
---|
833 | ! if not, gather information |
---|
834 | ELSE |
---|
835 | |
---|
836 | ALLOCATE(bk_minval(ievent), stat=ierror) |
---|
837 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
838 | mpi_rank_local,' WARNING allocating working buffer ' |
---|
839 | ALLOCATE(bk_maxval(ievent), stat=ierror) |
---|
840 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
841 | mpi_rank_local,' WARNING allocating working buffer ' |
---|
842 | ALLOCATE(bk_avg(ievent), stat=ierror) |
---|
843 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
844 | mpi_rank_local,' WARNING allocating working buffer ' |
---|
845 | ! last measurement, useful to initialise minimum values |
---|
846 | timer_max = MAXVAL(tl_global_timer) |
---|
847 | |
---|
848 | ! minimum/maximum and average values across processes, for each |
---|
849 | ! event |
---|
850 | bk_minval(:) = MINVAL ( tl_global_timer(:,:), dim=2) |
---|
851 | bk_maxval(:) = MAXVAL ( tl_global_timer(:,:), dim=2) |
---|
852 | bk_avg(:) = SUM ( tl_global_timer(:,:), dim=2) / REAL(mpi_size_local,4) |
---|
853 | |
---|
854 | DEALLOCATE(tl_global_timer) |
---|
855 | |
---|
856 | ! Identify pair of field/counterpart model |
---|
857 | IF ( max_field_nb > 0 ) THEN |
---|
858 | ALLOCATE( pair_field_cntp (max_field_nb), stat=ierror ) |
---|
859 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
860 | mpi_rank_local,' WARNING allocating working buffer ' |
---|
861 | pair_field_cntp (:) = LB_UNDF |
---|
862 | do ne = 1, ievent |
---|
863 | IF ( local_timeline(ne)%kind == LB_GET ) & |
---|
864 | pair_field_cntp(local_timeline(ne)%field) = local_timeline(ne)%cntp |
---|
865 | enddo |
---|
866 | ENDIF |
---|
867 | |
---|
868 | ALLOCATE( tl_sum_kind (max_field_nb, LB_MAX, 3), stat=ierror ) |
---|
869 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
870 | mpi_rank_local,' WARNING allocating working buffer ' |
---|
871 | tl_sum_kind(:,:,:) = 0. |
---|
872 | |
---|
873 | bk_ev_min = timer_max |
---|
874 | bk_ev_max = 0. |
---|
875 | |
---|
876 | ! Loops on coupling fields and kind of events |
---|
877 | do nf = 1, max_field_nb; do nk = 1, LB_MAX |
---|
878 | |
---|
879 | ! We exclude first and last EVENT per field and per kind |
---|
880 | ! (not first and last coupling time step, as in |
---|
881 | ! LUCIA) |
---|
882 | |
---|
883 | l_first_event = .TRUE. |
---|
884 | l_second_event = .FALSE. |
---|
885 | |
---|
886 | ! Loop on "before" events |
---|
887 | do ne = 1, ievent-1, 2 |
---|
888 | |
---|
889 | ! For a given field and kind of event |
---|
890 | IF ( local_timeline(ne)%field == nf .AND. & |
---|
891 | local_timeline(ne)%kind == nk ) THEN |
---|
892 | |
---|
893 | ! max extension of event beginning (jitter) |
---|
894 | tl_sum_kind(nf,nk,1) = tl_sum_kind(nf,nk,1) + & |
---|
895 | ( bk_maxval(ne) - bk_minval(ne) ) |
---|
896 | ! max/max extension (for waiting time mainly) |
---|
897 | tl_sum_kind(nf,nk,2) = tl_sum_kind(nf,nk,2) + & |
---|
898 | ( bk_maxval(ne+1) - bk_maxval(ne) ) |
---|
899 | ! average |
---|
900 | tl_sum_kind(nf,nk,3) = tl_sum_kind(nf,nk,3) + & |
---|
901 | ( bk_avg(ne+1) - bk_avg(ne) ) |
---|
902 | |
---|
903 | ! evaluate overall bounds of the measurement for the |
---|
904 | ! component (bk_ev_min,bk_ev_max) |
---|
905 | IF ( nk /= LB_RST .AND. nk /= LB_TRN ) THEN |
---|
906 | IF ( l_first_event ) THEN |
---|
907 | l_first_event = .FALSE. |
---|
908 | l_second_event = .TRUE. |
---|
909 | bk_ev_tmp(2) = bk_maxval(ne+1) |
---|
910 | ELSEIF ( l_second_event ) THEN |
---|
911 | bk_ev_min = MIN( bk_ev_min, bk_minval(ne) ) |
---|
912 | l_second_event = .FALSE. |
---|
913 | bk_ev_tmp(2) = bk_ev_tmp(1) |
---|
914 | ELSE |
---|
915 | bk_ev_tmp(2) = bk_ev_tmp(1) |
---|
916 | ENDIF |
---|
917 | bk_ev_tmp(1) = bk_maxval(ne+1) |
---|
918 | ENDIF |
---|
919 | |
---|
920 | ENDIF |
---|
921 | |
---|
922 | enddo |
---|
923 | |
---|
924 | bk_ev_max = MAX( bk_ev_max, bk_ev_tmp(2) ) |
---|
925 | |
---|
926 | enddo; enddo |
---|
927 | |
---|
928 | ! computation of the waiting time |
---|
929 | wtg_time = SUM(tl_sum_kind(:,LB_PUT:LB_GET,2)) |
---|
930 | |
---|
931 | ! computation of the computing time |
---|
932 | cmp_time = bk_ev_max - bk_ev_min - wtg_time |
---|
933 | |
---|
934 | DEALLOCATE(bk_avg) |
---|
935 | DEALLOCATE(bk_minval) |
---|
936 | DEALLOCATE(bk_maxval) |
---|
937 | |
---|
938 | ENDIF |
---|
939 | |
---|
940 | ENDIF |
---|
941 | |
---|
942 | DEALLOCATE(local_timeline) |
---|
943 | |
---|
944 | |
---|
945 | |
---|
946 | |
---|
947 | ! ---------- |
---|
948 | ! FOURTH STEP |
---|
949 | ! ---------- |
---|
950 | ! Produce global summary numbers for the whole coupled system |
---|
951 | |
---|
952 | |
---|
953 | |
---|
954 | |
---|
955 | IF ( mpi_rank_local == 0 ) THEN |
---|
956 | |
---|
957 | |
---|
958 | lb_diag_buff(:) = 0. |
---|
959 | |
---|
960 | ! Store load balancing main information: computation vs waiting time |
---|
961 | |
---|
962 | lb_diag_buff(1) = cmp_time |
---|
963 | lb_diag_buff(2) = wtg_time |
---|
964 | |
---|
965 | ! Total jitter |
---|
966 | lb_diag_buff(3) = SUM(tl_sum_kind(:,LB_PUT:LB_GET,1)) |
---|
967 | |
---|
968 | |
---|
969 | ! Partial coupling cost (%) |
---|
970 | lb_diag_buff(4) = 100.*wtg_time / (bk_ev_max - bk_ev_min) |
---|
971 | |
---|
972 | ! Partial coupling cost including OASIS operations (%) |
---|
973 | lb_diag_buff(5) = 100.* ( wtg_time + SUM(tl_sum_kind(:,LB_MAP:LB_TRN,3)) ) / (bk_ev_max - bk_ev_min) |
---|
974 | |
---|
975 | ! Total mapping/interpolation |
---|
976 | lb_diag_buff(6) = SUM(tl_sum_kind(:,LB_MAP,3)) |
---|
977 | |
---|
978 | ! with spread |
---|
979 | lb_diag_buff(7) = SUM(tl_sum_kind(:,LB_MAP,1)) |
---|
980 | |
---|
981 | ! Total Netcdf output (OUTPUT+EXPOUT+restart) |
---|
982 | lb_diag_buff(8) = SUM(tl_sum_kind(:,LB_OUT:LB_TRN,3)) |
---|
983 | |
---|
984 | ! with spread : |
---|
985 | lb_diag_buff(9) = SUM(tl_sum_kind(:,LB_OUT:LB_TRN,1)) |
---|
986 | |
---|
987 | ! including restart |
---|
988 | lb_diag_buff(10) = SUM(tl_sum_kind(:,LB_RST:LB_TRN,3)) |
---|
989 | |
---|
990 | ! with spread : |
---|
991 | lb_diag_buff(11) = SUM(tl_sum_kind(:,LB_RST:LB_TRN,1)) |
---|
992 | |
---|
993 | ! specific waiting times (oasis_get only) |
---|
994 | do n = 1, prism_amodels |
---|
995 | do nf = 1, max_field_nb |
---|
996 | IF ( pair_field_cntp(nf) == n ) & |
---|
997 | lb_diag_buff(11+n) = lb_diag_buff(11+n) + tl_sum_kind(nf,LB_GET,2) |
---|
998 | enddo |
---|
999 | enddo |
---|
1000 | |
---|
1001 | IF (OASIS_Debug >= 10) THEN |
---|
1002 | DO nf = 1, max_field_nb; DO nk = 1, LB_MAX |
---|
1003 | IF ( SUM(tl_sum_kind(nf,nk,:)) /= 0. ) THEN |
---|
1004 | write(nulprt,*) subname,' valid field/kind results ', nf, nk |
---|
1005 | write(nulprt,*) subname,' : ', tl_sum_kind(nf,nk,:) |
---|
1006 | call flush(nulprt) |
---|
1007 | ENDIF |
---|
1008 | ENDDO; ENDDO |
---|
1009 | ENDIF |
---|
1010 | |
---|
1011 | IF ( mpi_rank_global == 0 ) THEN |
---|
1012 | |
---|
1013 | allocate ( g_lb_diag_buff ( diag_nb, prism_amodels), stat=ierror ) |
---|
1014 | IF (ierror /= 0) WRITE(nulprt,*) subname,' model :',compid,' proc :',& |
---|
1015 | mpi_rank_local,' WARNING allocating diagnostic buffer ' |
---|
1016 | |
---|
1017 | g_lb_diag_buff(:,1) = DBLE(lb_diag_buff(:)) |
---|
1018 | |
---|
1019 | ELSE |
---|
1020 | |
---|
1021 | ! Send information to master root |
---|
1022 | call oasis_mpi_send(DBLE(lb_diag_buff),0,lb_tag,mpi_comm_global,'oasis_lb_print') |
---|
1023 | ENDIF |
---|
1024 | |
---|
1025 | IF ( ALLOCATED(pair_field_cntp) ) DEALLOCATE(pair_field_cntp) |
---|
1026 | DEALLOCATE(tl_sum_kind) |
---|
1027 | |
---|
1028 | ENDIF |
---|
1029 | |
---|
1030 | |
---|
1031 | IF ( mpi_rank_global == 0 ) THEN |
---|
1032 | |
---|
1033 | |
---|
1034 | ! Receive information from local roots |
---|
1035 | DO n = 2, prism_amodels |
---|
1036 | call oasis_mpi_recv(g_lb_diag_buff(:,n), mpi_root_global(n), lb_tag,& |
---|
1037 | mpi_comm_global,'oasis_lb_print') |
---|
1038 | END DO |
---|
1039 | |
---|
1040 | WRITE(nulet,*) ' ------------------------------' |
---|
1041 | WRITE(nulet,*) ' ' |
---|
1042 | WRITE(nulet,*) ' Load balance analysis ' |
---|
1043 | WRITE(nulet,*) ' ' |
---|
1044 | WRITE(nulet,*) ' ------------------------------' |
---|
1045 | WRITE(nulet,*) ' ' |
---|
1046 | |
---|
1047 | ! Print computation time, "waiting" time |
---|
1048 | |
---|
1049 | write(nulet,*) ' Model / Computing time / Waiting time ' |
---|
1050 | WRITE(nulet,*) ' ' |
---|
1051 | |
---|
1052 | DO n = 1, prism_amodels |
---|
1053 | write(nulet,'(a10,a3,f8.3,a3,f8.3)') & |
---|
1054 | TRIM(prism_modnam(n)) , ' / ', & |
---|
1055 | g_lb_diag_buff(1,n), ' / ', & |
---|
1056 | g_lb_diag_buff(2,n) |
---|
1057 | ENDDO |
---|
1058 | |
---|
1059 | WRITE(nulet,*) ' ' |
---|
1060 | WRITE(nulet,*) ' ------------------------------' |
---|
1061 | WRITE(nulet,*) ' ' |
---|
1062 | WRITE(nulet,*) ' Additional information ' |
---|
1063 | WRITE(nulet,*) ' ' |
---|
1064 | WRITE(nulet,*) ' ------------------------------' |
---|
1065 | |
---|
1066 | DO n = 1, prism_amodels |
---|
1067 | WRITE(nulet,*) ' ' |
---|
1068 | WRITE(nulet,*) ' ------------------------------' |
---|
1069 | write(nulet,'(a4,a10)') ' ', TRIM(prism_modnam(n)) |
---|
1070 | WRITE(nulet,*) ' ------------------------------' |
---|
1071 | WRITE(nulet,*) ' ' |
---|
1072 | write(nulet,*) ' Specific oasis_get time ' |
---|
1073 | write(nulet,*) ' (n/a if no oasis_get) ' |
---|
1074 | do ne = 1, prism_amodels |
---|
1075 | if ( g_lb_diag_buff(11+ne,n) > 0. ) THEN |
---|
1076 | write(nulet,'(a20,a10)') ' from model ', TRIM(prism_modnam(ne)) |
---|
1077 | write(nulet,'(a11,f8.3)') ' : ', g_lb_diag_buff(11+ne,n) |
---|
1078 | endif |
---|
1079 | enddo |
---|
1080 | WRITE(nulet,*) ' ' |
---|
1081 | WRITE(nulet,*) ' -------------------' |
---|
1082 | WRITE(nulet,*) ' ' |
---|
1083 | write(nulet,'(a16,f8.3)') ' Total jitter : ', g_lb_diag_buff(3,n) |
---|
1084 | WRITE(nulet,*) ' ' |
---|
1085 | write(nulet,'(a29,f6.2)') ' Partial coupling cost (%) : ', g_lb_diag_buff(4,n) |
---|
1086 | write(nulet,'(a56,f6.2)') ' Partial coupling cost including OASIS operations (%) : ', g_lb_diag_buff(5,n) |
---|
1087 | WRITE(nulet,*) ' ' |
---|
1088 | WRITE(nulet,*) ' ' |
---|
1089 | WRITE(nulet,*) ' OASIS Operations : ' |
---|
1090 | WRITE(nulet,*) ' -------------------' |
---|
1091 | write(nulet,'(a31,f8.3)') ' Total mapping/interpolation : ', g_lb_diag_buff(6,n) |
---|
1092 | write(nulet,'(a19,f8.3)') ' with spread : ', g_lb_diag_buff(7,n) |
---|
1093 | WRITE(nulet,*) ' ' |
---|
1094 | write(nulet,'(a46,f8.3)') ' Total Netcdf output (OUTPUT+EXPOUT+restart): ', g_lb_diag_buff(8,n) |
---|
1095 | write(nulet,'(a19,f8.3)') ' with spread : ', g_lb_diag_buff(9,n) |
---|
1096 | WRITE(nulet,*) ' ' |
---|
1097 | WRITE(nulet,'(a40,f8.3)') ' Total Netcdf output for restart only: ', g_lb_diag_buff(10,n) |
---|
1098 | write(nulet,'(a19,f8.3)') ' with spread : ', g_lb_diag_buff(11,n) |
---|
1099 | WRITE(nulet,*) ' ' |
---|
1100 | |
---|
1101 | call flush(nulet) |
---|
1102 | |
---|
1103 | ENDDO |
---|
1104 | |
---|
1105 | WRITE(nulet,*) ' ------------------------------' |
---|
1106 | WRITE(nulet,*) ' ' |
---|
1107 | WRITE(nulet,*) ' Total time of this load balancing analysis: ' |
---|
1108 | WRITE(nulet,'(a9,f8.3)') ' : ', MPI_WTIME() - dlb_time |
---|
1109 | WRITE(nulet,*) ' ' |
---|
1110 | WRITE(nulet,*) ' ------------------------------' |
---|
1111 | WRITE(nulet,*) ' ' |
---|
1112 | WRITE(nulet,*) ' Clock spread after synchronise ' |
---|
1113 | WRITE(nulet,*) ' .i.e. node clocks synchronisation (s) : ' |
---|
1114 | WRITE(nulet,'(a9,f12.8)') ' : ', clock_spread |
---|
1115 | WRITE(nulet,*) ' ' |
---|
1116 | WRITE(nulet,*) ' ------------------------------' |
---|
1117 | |
---|
1118 | CLOSE(nulet) |
---|
1119 | |
---|
1120 | IF ( mpi_rank_global == 0 ) DEALLOCATE(g_lb_diag_buff) |
---|
1121 | |
---|
1122 | ENDIF |
---|
1123 | |
---|
1124 | |
---|
1125 | |
---|
1126 | end subroutine oasis_lb_print |
---|
1127 | |
---|
1128 | ! -------------------------------------------------------------------------------- |
---|
1129 | |
---|
1130 | !> stop measurement |
---|
1131 | |
---|
1132 | subroutine oasis_lb_stop |
---|
1133 | |
---|
1134 | implicit none |
---|
1135 | |
---|
1136 | integer(ip_i4_p) :: ierror |
---|
1137 | character(len=*),parameter :: subname = '(oasis_timer_stop)' |
---|
1138 | ! |
---|
1139 | ldo_lb_analysis = .FALSE. |
---|
1140 | |
---|
1141 | end subroutine oasis_lb_stop |
---|
1142 | |
---|
1143 | ! -------------------------------------------------------------------------------- |
---|
1144 | end module mod_oasis_load_balancing |
---|