1 | MODULE timing |
---|
2 | !!======================================================================== |
---|
3 | !! *** MODULE timing *** |
---|
4 | !!======================================================================== |
---|
5 | !! History : 4.0 ! 2001-05 (R. Benshila) |
---|
6 | !!------------------------------------------------------------------------ |
---|
7 | |
---|
8 | !!------------------------------------------------------------------------ |
---|
9 | !! timming_init : initialize timing process |
---|
10 | !! timing_start : start Timer |
---|
11 | !! timing_stop : stop Timer |
---|
12 | !! timing_reset : end timing variable creation |
---|
13 | !! timing_finalize : compute stats and write output in calling w*_info |
---|
14 | !! timing_ini_var : create timing variables |
---|
15 | !! timing_listing : print instumented subroutines in ocean.output |
---|
16 | !! wcurrent_info : compute and print detailed stats on the current CPU |
---|
17 | !! wave_info : compute and print averaged statson all processors |
---|
18 | !! wmpi_info : compute and write global stats |
---|
19 | !! supress : suppress an element of the timing linked list |
---|
20 | !! insert : insert an element of the timing linked list |
---|
21 | !!------------------------------------------------------------------------ |
---|
22 | USE in_out_manager ! I/O manager |
---|
23 | USE dom_oce ! ocean domain |
---|
24 | USE lib_mpp |
---|
25 | |
---|
26 | IMPLICIT NONE |
---|
27 | PRIVATE |
---|
28 | |
---|
29 | PUBLIC timing_init, timing_finalize ! called in nemogcm module |
---|
30 | PUBLIC timing_reset ! called in step module |
---|
31 | PUBLIC timing_start, timing_stop ! called in each routine to time |
---|
32 | |
---|
33 | #if defined key_mpp_mpi |
---|
34 | INCLUDE 'mpif.h' |
---|
35 | #endif |
---|
36 | |
---|
37 | ! Variables for fine grain timing |
---|
38 | TYPE timer |
---|
39 | CHARACTER(LEN=20) :: cname |
---|
40 | REAL(wp) :: t_cpu, t_clock, tsum_cpu, tsum_clock, tmax_cpu, tmax_clock, tmin_cpu, tmin_clock, tsub_cpu, tsub_clock |
---|
41 | INTEGER :: ncount, ncount_max, ncount_rate |
---|
42 | INTEGER :: niter |
---|
43 | LOGICAL :: l_tdone |
---|
44 | TYPE(timer), POINTER :: next => NULL() |
---|
45 | TYPE(timer), POINTER :: prev => NULL() |
---|
46 | TYPE(timer), POINTER :: parent_section => NULL() |
---|
47 | END TYPE timer |
---|
48 | |
---|
49 | TYPE alltimer |
---|
50 | CHARACTER(LEN=20), DIMENSION(:), POINTER :: cname => NULL() |
---|
51 | REAL(wp), DIMENSION(:), POINTER :: tsum_cpu => NULL() |
---|
52 | REAL(wp), DIMENSION(:), POINTER :: tsum_clock => NULL() |
---|
53 | INTEGER, DIMENSION(:), POINTER :: niter => NULL() |
---|
54 | TYPE(alltimer), POINTER :: next => NULL() |
---|
55 | TYPE(alltimer), POINTER :: prev => NULL() |
---|
56 | END TYPE alltimer |
---|
57 | |
---|
58 | TYPE(timer), POINTER :: s_timer_root => NULL() |
---|
59 | TYPE(timer), POINTER :: s_timer => NULL() |
---|
60 | TYPE(timer), POINTER :: s_wrk => NULL() |
---|
61 | REAL(wp) :: t_overclock, t_overcpu |
---|
62 | LOGICAL :: l_initdone = .FALSE. |
---|
63 | INTEGER :: nsize |
---|
64 | |
---|
65 | ! Variables for coarse grain timing |
---|
66 | REAL(wp) :: tot_etime, tot_ctime |
---|
67 | REAL(kind=wp), DIMENSION(2) :: t_elaps, t_cpu |
---|
68 | REAL(wp), ALLOCATABLE, DIMENSION(:) :: all_etime, all_ctime |
---|
69 | INTEGER :: nfinal_count, ncount, ncount_rate, ncount_max |
---|
70 | INTEGER, DIMENSION(8) :: nvalues |
---|
71 | CHARACTER(LEN=8), DIMENSION(2) :: cdate |
---|
72 | CHARACTER(LEN=10), DIMENSION(2) :: ctime |
---|
73 | CHARACTER(LEN=5) :: czone |
---|
74 | |
---|
75 | ! From of ouput file (1/proc or one global) !RB to put in nammpp or namctl |
---|
76 | LOGICAL :: ln_onefile = .TRUE. |
---|
77 | LOGICAL :: lwriter |
---|
78 | !!---------------------------------------------------------------------- |
---|
79 | !! NEMO/OPA 4.0 , NEMO Consortium (2011) |
---|
80 | !! $Id$ |
---|
81 | !! Software governed by the CeCILL licence (NEMOGCM/NEMO_CeCILL.txt) |
---|
82 | !!---------------------------------------------------------------------- |
---|
83 | CONTAINS |
---|
84 | |
---|
85 | SUBROUTINE timing_start(cdinfo) |
---|
86 | !!---------------------------------------------------------------------- |
---|
87 | !! *** ROUTINE timing_start *** |
---|
88 | !! ** Purpose : collect execution time |
---|
89 | !!---------------------------------------------------------------------- |
---|
90 | CHARACTER(len=*), INTENT(in) :: cdinfo |
---|
91 | ! |
---|
92 | |
---|
93 | ! Create timing structure at first call |
---|
94 | IF( .NOT. l_initdone ) THEN |
---|
95 | CALL timing_ini_var(cdinfo) |
---|
96 | ELSE |
---|
97 | s_timer => s_timer_root |
---|
98 | DO WHILE( TRIM(s_timer%cname) /= TRIM(cdinfo) ) |
---|
99 | IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next |
---|
100 | END DO |
---|
101 | ENDIF |
---|
102 | s_timer%l_tdone = .FALSE. |
---|
103 | s_timer%niter = s_timer%niter + 1 |
---|
104 | s_timer%t_cpu = 0. |
---|
105 | s_timer%t_clock = 0. |
---|
106 | |
---|
107 | ! CPU time collection |
---|
108 | CALL CPU_TIME( s_timer%t_cpu ) |
---|
109 | ! clock time collection |
---|
110 | #if defined key_mpp_mpi |
---|
111 | s_timer%t_clock= MPI_Wtime() |
---|
112 | #else |
---|
113 | CALL SYSTEM_CLOCK(COUNT_RATE=s_timer%ncount_rate, COUNT_MAX=s_timer%ncount_max) |
---|
114 | CALL SYSTEM_CLOCK(COUNT = s_timer%ncount) |
---|
115 | #endif |
---|
116 | ! |
---|
117 | END SUBROUTINE timing_start |
---|
118 | |
---|
119 | |
---|
120 | SUBROUTINE timing_stop(cdinfo, csection) |
---|
121 | !!---------------------------------------------------------------------- |
---|
122 | !! *** ROUTINE timing_stop *** |
---|
123 | !! ** Purpose : finalize timing and output |
---|
124 | !!---------------------------------------------------------------------- |
---|
125 | CHARACTER(len=*), INTENT(in) :: cdinfo |
---|
126 | CHARACTER(len=*), INTENT(in), OPTIONAL :: csection |
---|
127 | ! |
---|
128 | INTEGER :: ifinal_count, iperiods |
---|
129 | REAL(wp) :: zcpu_end, zmpitime |
---|
130 | ! |
---|
131 | s_wrk => NULL() |
---|
132 | |
---|
133 | ! clock time collection |
---|
134 | #if defined key_mpp_mpi |
---|
135 | zmpitime = MPI_Wtime() |
---|
136 | #else |
---|
137 | CALL SYSTEM_CLOCK(COUNT = ifinal_count) |
---|
138 | #endif |
---|
139 | ! CPU time collection |
---|
140 | CALL CPU_TIME( zcpu_end ) |
---|
141 | |
---|
142 | s_timer => s_timer_root |
---|
143 | DO WHILE( TRIM(s_timer%cname) /= TRIM(cdinfo) ) |
---|
144 | IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next |
---|
145 | END DO |
---|
146 | |
---|
147 | ! CPU time correction |
---|
148 | s_timer%t_cpu = zcpu_end - s_timer%t_cpu - t_overcpu - s_timer%tsub_cpu |
---|
149 | |
---|
150 | ! clock time correction |
---|
151 | #if defined key_mpp_mpi |
---|
152 | s_timer%t_clock = zmpitime - s_timer%t_clock - t_overclock - s_timer%tsub_clock |
---|
153 | #else |
---|
154 | iperiods = ifinal_count - s_timer%ncount |
---|
155 | IF( ifinal_count < s_timer%ncount ) & |
---|
156 | iperiods = iperiods + s_timer%ncount_max |
---|
157 | s_timer%t_clock = REAL(iperiods) / s_timer%ncount_rate - t_overclock - s_timer%tsub_clock |
---|
158 | #endif |
---|
159 | |
---|
160 | ! Correction of parent section |
---|
161 | IF( .NOT. PRESENT(csection) ) THEN |
---|
162 | s_wrk => s_timer |
---|
163 | DO WHILE ( ASSOCIATED(s_wrk%parent_section ) ) |
---|
164 | s_wrk => s_wrk%parent_section |
---|
165 | s_wrk%tsub_cpu = s_wrk%tsub_cpu + s_timer%t_cpu |
---|
166 | s_wrk%tsub_clock = s_wrk%tsub_clock + s_timer%t_clock |
---|
167 | END DO |
---|
168 | ENDIF |
---|
169 | |
---|
170 | ! time diagnostics |
---|
171 | s_timer%tsum_clock = s_timer%tsum_clock + s_timer%t_clock |
---|
172 | s_timer%tsum_cpu = s_timer%tsum_cpu + s_timer%t_cpu |
---|
173 | !RB to use to get min/max during a time integration |
---|
174 | ! IF( .NOT. l_initdone ) THEN |
---|
175 | ! s_timer%tmin_clock = s_timer%t_clock |
---|
176 | ! s_timer%tmin_cpu = s_timer%t_cpu |
---|
177 | ! ELSE |
---|
178 | ! s_timer%tmin_clock = MIN( s_timer%tmin_clock, s_timer%t_clock ) |
---|
179 | ! s_timer%tmin_cpu = MIN( s_timer%tmin_cpu , s_timer%t_cpu ) |
---|
180 | ! ENDIF |
---|
181 | ! s_timer%tmax_clock = MAX( s_timer%tmax_clock, s_timer%t_clock ) |
---|
182 | ! s_timer%tmax_cpu = MAX( s_timer%tmax_cpu , s_timer%t_cpu ) |
---|
183 | ! |
---|
184 | s_timer%tsub_clock = 0. |
---|
185 | s_timer%tsub_cpu = 0. |
---|
186 | s_timer%l_tdone = .TRUE. |
---|
187 | ! |
---|
188 | END SUBROUTINE timing_stop |
---|
189 | |
---|
190 | |
---|
191 | SUBROUTINE timing_init |
---|
192 | !!---------------------------------------------------------------------- |
---|
193 | !! *** ROUTINE timing_init *** |
---|
194 | !! ** Purpose : open timing output file |
---|
195 | !!---------------------------------------------------------------------- |
---|
196 | INTEGER :: iperiods, istart_count, ifinal_count |
---|
197 | REAL(wp) :: zdum |
---|
198 | LOGICAL :: ll_f |
---|
199 | |
---|
200 | IF( ln_onefile ) THEN |
---|
201 | IF( lwp) CALL ctl_opn( numtime, 'timing.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout,.TRUE., narea ) |
---|
202 | lwriter = lwp |
---|
203 | ELSE |
---|
204 | CALL ctl_opn( numtime, 'timing.output', 'REPLACE', 'FORMATTED', 'SEQUENTIAL', -1, numout,.FALSE., narea ) |
---|
205 | lwriter = .TRUE. |
---|
206 | ENDIF |
---|
207 | |
---|
208 | IF( lwriter) THEN |
---|
209 | WRITE(numtime,*) |
---|
210 | WRITE(numtime,*) ' CNRS - NERC - Met OFFICE - MERCATOR-ocean - CMCC - INGV' |
---|
211 | WRITE(numtime,*) ' NEMO team' |
---|
212 | WRITE(numtime,*) ' Ocean General Circulation Model' |
---|
213 | WRITE(numtime,*) ' version 3.3 (2010) ' |
---|
214 | WRITE(numtime,*) |
---|
215 | WRITE(numtime,*) ' Timing Informations ' |
---|
216 | WRITE(numtime,*) |
---|
217 | WRITE(numtime,*) |
---|
218 | ENDIF |
---|
219 | |
---|
220 | ! Compute clock function overhead |
---|
221 | #if defined key_mpp_mpi |
---|
222 | t_overclock = MPI_WTIME() |
---|
223 | t_overclock = MPI_WTIME() - t_overclock |
---|
224 | #else |
---|
225 | CALL SYSTEM_CLOCK(COUNT_RATE=ncount_rate, COUNT_MAX=ncount_max) |
---|
226 | CALL SYSTEM_CLOCK(COUNT = istart_count) |
---|
227 | CALL SYSTEM_CLOCK(COUNT = ifinal_count) |
---|
228 | iperiods = ifinal_count - istart_count |
---|
229 | IF( ifinal_count < istart_count ) & |
---|
230 | iperiods = iperiods + ncount_max |
---|
231 | t_overclock = REAL(iperiods) / ncount_rate |
---|
232 | #endif |
---|
233 | |
---|
234 | ! Compute cpu_time function overhead |
---|
235 | CALL CPU_TIME(zdum) |
---|
236 | CALL CPU_TIME(t_overcpu) |
---|
237 | |
---|
238 | ! End overhead omputation |
---|
239 | t_overcpu = t_overcpu - zdum |
---|
240 | t_overclock = t_overcpu + t_overclock |
---|
241 | |
---|
242 | ! Timing on date and time |
---|
243 | CALL DATE_AND_TIME(cdate(1),ctime(1),czone,nvalues) |
---|
244 | |
---|
245 | CALL CPU_TIME(t_cpu(1)) |
---|
246 | #if defined key_mpp_mpi |
---|
247 | ! Start elapsed and CPU time counters |
---|
248 | t_elaps(1) = MPI_WTIME() |
---|
249 | #else |
---|
250 | CALL SYSTEM_CLOCK(COUNT_RATE=ncount_rate, COUNT_MAX=ncount_max) |
---|
251 | CALL SYSTEM_CLOCK(COUNT = ncount) |
---|
252 | #endif |
---|
253 | ! |
---|
254 | END SUBROUTINE timing_init |
---|
255 | |
---|
256 | |
---|
257 | SUBROUTINE timing_finalize |
---|
258 | !!---------------------------------------------------------------------- |
---|
259 | !! *** ROUTINE timing_finalize *** |
---|
260 | !! ** Purpose : compute average time |
---|
261 | !! write timing output file |
---|
262 | !!---------------------------------------------------------------------- |
---|
263 | TYPE(timer), POINTER :: s_temp |
---|
264 | INTEGER :: idum, iperiods, icode |
---|
265 | LOGICAL :: ll_ord, ll_averep |
---|
266 | CHARACTER(len=120) :: clfmt |
---|
267 | |
---|
268 | ll_averep = .TRUE. |
---|
269 | |
---|
270 | ! total CPU and elapse |
---|
271 | CALL CPU_TIME(t_cpu(2)) |
---|
272 | t_cpu(2) = t_cpu(2) - t_cpu(1) - t_overcpu |
---|
273 | #if defined key_mpp_mpi |
---|
274 | t_elaps(2) = MPI_WTIME() - t_elaps(1) - t_overclock |
---|
275 | #else |
---|
276 | CALL SYSTEM_CLOCK(COUNT = nfinal_count) |
---|
277 | iperiods = nfinal_count - ncount |
---|
278 | IF( nfinal_count < ncount ) & |
---|
279 | iperiods = iperiods + ncount_max |
---|
280 | t_elaps(2) = REAL(iperiods) / ncount_rate - t_overclock |
---|
281 | #endif |
---|
282 | |
---|
283 | ! End of timings on date & time |
---|
284 | CALL DATE_AND_TIME(cdate(2),ctime(2),czone,nvalues) |
---|
285 | |
---|
286 | ! Compute the numer of routines |
---|
287 | nsize = 0 |
---|
288 | s_timer => s_timer_root |
---|
289 | DO WHILE( ASSOCIATED(s_timer) ) |
---|
290 | nsize = nsize + 1 |
---|
291 | s_timer => s_timer%next |
---|
292 | END DO |
---|
293 | idum = nsize |
---|
294 | IF(lk_mpp) CALL mpp_sum(idum) |
---|
295 | IF( idum/jpnij /= nsize ) THEN |
---|
296 | IF( lwriter ) WRITE(numtime,*) ' ===> W A R N I N G: ' |
---|
297 | IF( lwriter ) WRITE(numtime,*) ' Some CPU have different number of routines instrumented for timing' |
---|
298 | IF( lwriter ) WRITE(numtime,*) ' No detailed report on averaged timing can be provided' |
---|
299 | IF( lwriter ) WRITE(numtime,*) ' The following detailed report only deals with the current processor' |
---|
300 | IF( lwriter ) WRITE(numtime,*) |
---|
301 | ll_averep = .FALSE. |
---|
302 | ENDIF |
---|
303 | |
---|
304 | #if defined key_mpp_mpi |
---|
305 | ! in MPI gather some info |
---|
306 | ALLOCATE( all_etime(jpnij), all_ctime(jpnij) ) |
---|
307 | CALL MPI_ALLGATHER(t_elaps(2), 1, MPI_DOUBLE_PRECISION, & |
---|
308 | all_etime , 1, MPI_DOUBLE_PRECISION, & |
---|
309 | MPI_COMM_OPA, icode) |
---|
310 | CALL MPI_ALLGATHER(t_cpu(2) , 1, MPI_DOUBLE_PRECISION, & |
---|
311 | all_ctime, 1, MPI_DOUBLE_PRECISION, & |
---|
312 | MPI_COMM_OPA, icode) |
---|
313 | tot_etime = SUM(all_etime(:)) |
---|
314 | tot_ctime = SUM(all_ctime(:)) |
---|
315 | #else |
---|
316 | tot_etime = t_elaps(2) |
---|
317 | tot_ctime = t_cpu (2) |
---|
318 | #endif |
---|
319 | |
---|
320 | ! write output file |
---|
321 | IF( lwriter ) WRITE(numtime,*) 'Total timing (sum) :' |
---|
322 | IF( lwriter ) WRITE(numtime,*) '--------------------' |
---|
323 | IF( lwriter ) WRITE(numtime,"('Elapsed Time (s) CPU Time (s)')") |
---|
324 | IF( lwriter ) WRITE(numtime,'(5x,f12.3,1x,f12.3)') tot_etime, tot_ctime |
---|
325 | IF( lwriter ) WRITE(numtime,*) |
---|
326 | #if defined key_mpp_mpi |
---|
327 | IF( ll_averep ) CALL waver_info |
---|
328 | CALL wmpi_info |
---|
329 | #endif |
---|
330 | IF( lwriter ) CALL wcurrent_info |
---|
331 | |
---|
332 | clfmt='(1X,"Timing started on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT")' |
---|
333 | IF( lwriter ) WRITE(numtime, TRIM(clfmt)) & |
---|
334 | & cdate(1)(7:8), cdate(1)(5:6), cdate(1)(1:4), & |
---|
335 | & ctime(1)(1:2), ctime(1)(3:4), ctime(1)(5:6), & |
---|
336 | & czone(1:3), czone(4:5) |
---|
337 | clfmt='(1X, "Timing ended on ",2(A2,"/"),A4," at ",2(A2,":"),A2," MET ",A3,":",A2," from GMT")' |
---|
338 | IF( lwriter ) WRITE(numtime, TRIM(clfmt)) & |
---|
339 | & cdate(2)(7:8), cdate(2)(5:6), cdate(2)(1:4), & |
---|
340 | & ctime(2)(1:2), ctime(2)(3:4), ctime(2)(5:6), & |
---|
341 | & czone(1:3), czone(4:5) |
---|
342 | |
---|
343 | IF( lwriter ) CLOSE(numtime) |
---|
344 | ! |
---|
345 | END SUBROUTINE timing_finalize |
---|
346 | |
---|
347 | |
---|
348 | SUBROUTINE wcurrent_info |
---|
349 | !!---------------------------------------------------------------------- |
---|
350 | !! *** ROUTINE wcurrent_info *** |
---|
351 | !! ** Purpose : compute and write timing output file |
---|
352 | !!---------------------------------------------------------------------- |
---|
353 | LOGICAL :: ll_ord |
---|
354 | CHARACTER(len=2048) :: clfmt |
---|
355 | |
---|
356 | ! reorder the current list by elapse time |
---|
357 | s_wrk => NULL() |
---|
358 | s_timer => s_timer_root |
---|
359 | DO |
---|
360 | ll_ord = .TRUE. |
---|
361 | s_timer => s_timer_root |
---|
362 | DO WHILE ( ASSOCIATED( s_timer%next ) ) |
---|
363 | IF (.NOT. ASSOCIATED(s_timer%next)) EXIT |
---|
364 | IF ( s_timer%tsum_clock < s_timer%next%tsum_clock ) THEN |
---|
365 | ALLOCATE(s_wrk) |
---|
366 | s_wrk = s_timer%next |
---|
367 | CALL insert (s_timer, s_timer_root, s_wrk) |
---|
368 | CALL suppress(s_timer%next) |
---|
369 | ll_ord = .FALSE. |
---|
370 | CYCLE |
---|
371 | ENDIF |
---|
372 | IF( ASSOCIATED(s_timer%next) ) s_timer => s_timer%next |
---|
373 | END DO |
---|
374 | IF( ll_ord ) EXIT |
---|
375 | END DO |
---|
376 | |
---|
377 | ! write current info |
---|
378 | WRITE(numtime,*) 'Detailed timing for proc :', narea-1 |
---|
379 | WRITE(numtime,*) '--------------------------' |
---|
380 | WRITE(numtime,*) 'Section ', & |
---|
381 | & 'Elapsed Time (s) ','Elapsed Time (%) ', & |
---|
382 | & 'CPU Time(s) ','CPU Time (%) ','CPU/Elapsed ','Frequency' |
---|
383 | s_timer => s_timer_root |
---|
384 | clfmt = '(1x,a,4x,f12.3,6x,f12.3,x,f12.3,2x,f12.3,6x,f7.3,2x,i9)' |
---|
385 | DO WHILE ( ASSOCIATED(s_timer) ) |
---|
386 | WRITE(numtime,TRIM(clfmt)) s_timer%cname, & |
---|
387 | & s_timer%tsum_clock,s_timer%tsum_clock*100./t_elaps(2), & |
---|
388 | & s_timer%tsum_cpu ,s_timer%tsum_cpu*100./t_cpu(2) , & |
---|
389 | & s_timer%tsum_cpu/s_timer%tsum_clock, s_timer%niter |
---|
390 | s_timer => s_timer%next |
---|
391 | END DO |
---|
392 | WRITE(numtime,*) |
---|
393 | ! |
---|
394 | END SUBROUTINE wcurrent_info |
---|
395 | |
---|
396 | #if defined key_mpp_mpi |
---|
397 | SUBROUTINE waver_info |
---|
398 | !!---------------------------------------------------------------------- |
---|
399 | !! *** ROUTINE wcurrent_info *** |
---|
400 | !! ** Purpose : compute and write averaged timing informations |
---|
401 | !!---------------------------------------------------------------------- |
---|
402 | TYPE(alltimer), POINTER :: sl_timer_glob_root => NULL() |
---|
403 | TYPE(alltimer), POINTER :: sl_timer_glob => NULL() |
---|
404 | TYPE(timer), POINTER :: sl_timer_ave_root => NULL() |
---|
405 | TYPE(timer), POINTER :: sl_timer_ave => NULL() |
---|
406 | INTEGER :: icode |
---|
407 | INTEGER :: ierr |
---|
408 | LOGICAL :: ll_ord |
---|
409 | CHARACTER(len=200) :: clfmt |
---|
410 | |
---|
411 | ! Initialised the global strucutre |
---|
412 | ALLOCATE(sl_timer_glob_root, Stat=ierr) |
---|
413 | IF(ierr /= 0)THEN |
---|
414 | WRITE(numtime,*) 'Failed to allocate global timing structure in waver_info' |
---|
415 | RETURN |
---|
416 | END IF |
---|
417 | |
---|
418 | ALLOCATE(sl_timer_glob_root%cname (jpnij), & |
---|
419 | sl_timer_glob_root%tsum_cpu (jpnij), & |
---|
420 | sl_timer_glob_root%tsum_clock(jpnij), & |
---|
421 | sl_timer_glob_root%niter (jpnij), Stat=ierr) |
---|
422 | IF(ierr /= 0)THEN |
---|
423 | WRITE(numtime,*) 'Failed to allocate global timing structure in waver_info' |
---|
424 | RETURN |
---|
425 | END IF |
---|
426 | sl_timer_glob_root%cname(:) = '' |
---|
427 | sl_timer_glob_root%tsum_cpu(:) = 0._wp |
---|
428 | sl_timer_glob_root%tsum_clock(:) = 0._wp |
---|
429 | sl_timer_glob_root%niter(:) = 0 |
---|
430 | sl_timer_glob_root%next => NULL() |
---|
431 | sl_timer_glob_root%prev => NULL() |
---|
432 | !ARPDBG - don't need to allocate a pointer that's immediately then |
---|
433 | ! set to point to some other object. |
---|
434 | !ALLOCATE(sl_timer_glob) |
---|
435 | !ALLOCATE(sl_timer_glob%cname (jpnij)) |
---|
436 | !ALLOCATE(sl_timer_glob%tsum_cpu (jpnij)) |
---|
437 | !ALLOCATE(sl_timer_glob%tsum_clock(jpnij)) |
---|
438 | !ALLOCATE(sl_timer_glob%niter (jpnij)) |
---|
439 | sl_timer_glob => sl_timer_glob_root |
---|
440 | ! |
---|
441 | IF( narea .EQ. 1 ) THEN |
---|
442 | ALLOCATE(sl_timer_ave_root) |
---|
443 | sl_timer_ave_root%cname = '' |
---|
444 | sl_timer_ave_root%t_cpu = 0._wp |
---|
445 | sl_timer_ave_root%t_clock = 0._wp |
---|
446 | sl_timer_ave_root%tsum_cpu = 0._wp |
---|
447 | sl_timer_ave_root%tsum_clock = 0._wp |
---|
448 | sl_timer_ave_root%tmax_cpu = 0._wp |
---|
449 | sl_timer_ave_root%tmax_clock = 0._wp |
---|
450 | sl_timer_ave_root%tmin_cpu = 0._wp |
---|
451 | sl_timer_ave_root%tmin_clock = 0._wp |
---|
452 | sl_timer_ave_root%tsub_cpu = 0._wp |
---|
453 | sl_timer_ave_root%tsub_clock = 0._wp |
---|
454 | sl_timer_ave_root%ncount = 0 |
---|
455 | sl_timer_ave_root%ncount_rate = 0 |
---|
456 | sl_timer_ave_root%ncount_max = 0 |
---|
457 | sl_timer_ave_root%niter = 0 |
---|
458 | sl_timer_ave_root%l_tdone = .FALSE. |
---|
459 | sl_timer_ave_root%next => NULL() |
---|
460 | sl_timer_ave_root%prev => NULL() |
---|
461 | ALLOCATE(sl_timer_ave) |
---|
462 | sl_timer_ave => sl_timer_ave_root |
---|
463 | ENDIF |
---|
464 | |
---|
465 | ! Gather info from all processors |
---|
466 | s_timer => s_timer_root |
---|
467 | DO WHILE ( ASSOCIATED(s_timer) ) |
---|
468 | CALL MPI_GATHER(s_timer%cname , 20, MPI_CHARACTER, & |
---|
469 | sl_timer_glob%cname, 20, MPI_CHARACTER, & |
---|
470 | 0, MPI_COMM_OPA, icode) |
---|
471 | CALL MPI_GATHER(s_timer%tsum_clock , 1, MPI_DOUBLE_PRECISION, & |
---|
472 | sl_timer_glob%tsum_clock, 1, MPI_DOUBLE_PRECISION, & |
---|
473 | 0, MPI_COMM_OPA, icode) |
---|
474 | CALL MPI_GATHER(s_timer%tsum_cpu , 1, MPI_DOUBLE_PRECISION, & |
---|
475 | sl_timer_glob%tsum_cpu, 1, MPI_DOUBLE_PRECISION, & |
---|
476 | 0, MPI_COMM_OPA, icode) |
---|
477 | CALL MPI_GATHER(s_timer%niter , 1, MPI_INTEGER, & |
---|
478 | sl_timer_glob%niter, 1, MPI_INTEGER, & |
---|
479 | 0, MPI_COMM_OPA, icode) |
---|
480 | |
---|
481 | IF( narea == 1 .AND. ASSOCIATED(s_timer%next) ) THEN |
---|
482 | ALLOCATE(sl_timer_glob%next) |
---|
483 | ALLOCATE(sl_timer_glob%next%cname (jpnij)) |
---|
484 | ALLOCATE(sl_timer_glob%next%tsum_cpu (jpnij)) |
---|
485 | ALLOCATE(sl_timer_glob%next%tsum_clock(jpnij)) |
---|
486 | ALLOCATE(sl_timer_glob%next%niter (jpnij)) |
---|
487 | sl_timer_glob%next%prev => sl_timer_glob |
---|
488 | sl_timer_glob%next%next => NULL() |
---|
489 | sl_timer_glob => sl_timer_glob%next |
---|
490 | ENDIF |
---|
491 | s_timer => s_timer%next |
---|
492 | END DO |
---|
493 | |
---|
494 | WRITE(*,*) 'ARPDBG: timing: done gathers' |
---|
495 | |
---|
496 | IF( narea == 1 ) THEN |
---|
497 | ! Compute some stats |
---|
498 | sl_timer_glob => sl_timer_glob_root |
---|
499 | DO WHILE( ASSOCIATED(sl_timer_glob) ) |
---|
500 | sl_timer_ave%cname = sl_timer_glob%cname(1) |
---|
501 | sl_timer_ave%tsum_cpu = SUM (sl_timer_glob%tsum_cpu (:)) / jpnij |
---|
502 | sl_timer_ave%tsum_clock = SUM (sl_timer_glob%tsum_clock(:)) / jpnij |
---|
503 | sl_timer_ave%tmax_cpu = MAXVAL(sl_timer_glob%tsum_cpu (:)) |
---|
504 | sl_timer_ave%tmax_clock = MAXVAL(sl_timer_glob%tsum_clock(:)) |
---|
505 | sl_timer_ave%tmin_cpu = MINVAL(sl_timer_glob%tsum_cpu (:)) |
---|
506 | sl_timer_ave%tmin_clock = MINVAL(sl_timer_glob%tsum_clock(:)) |
---|
507 | sl_timer_ave%niter = SUM (sl_timer_glob%niter (:)) |
---|
508 | ! |
---|
509 | IF( ASSOCIATED(sl_timer_glob%next) ) THEN |
---|
510 | ALLOCATE(sl_timer_ave%next) |
---|
511 | sl_timer_ave%next%prev => sl_timer_ave |
---|
512 | sl_timer_ave%next%next => NULL() |
---|
513 | sl_timer_ave => sl_timer_ave%next |
---|
514 | ENDIF |
---|
515 | sl_timer_glob => sl_timer_glob%next |
---|
516 | END DO |
---|
517 | |
---|
518 | WRITE(*,*) 'ARPDBG: timing: done computing stats' |
---|
519 | |
---|
520 | ! reorder the averaged list by CPU time |
---|
521 | s_wrk => NULL() |
---|
522 | sl_timer_ave => sl_timer_ave_root |
---|
523 | DO |
---|
524 | ll_ord = .TRUE. |
---|
525 | sl_timer_ave => sl_timer_ave_root |
---|
526 | DO WHILE( ASSOCIATED( sl_timer_ave%next ) ) |
---|
527 | |
---|
528 | IF( .NOT. ASSOCIATED(sl_timer_ave%next) ) EXIT |
---|
529 | |
---|
530 | IF ( sl_timer_ave%tsum_clock < sl_timer_ave%next%tsum_clock ) THEN |
---|
531 | ALLOCATE(s_wrk) |
---|
532 | ! Copy data into the new object pointed to by s_wrk |
---|
533 | s_wrk = sl_timer_ave%next |
---|
534 | ! Insert this new timer object before our current position |
---|
535 | CALL insert (sl_timer_ave, sl_timer_ave_root, s_wrk) |
---|
536 | ! Remove the old object from the list |
---|
537 | CALL suppress(sl_timer_ave%next) |
---|
538 | ll_ord = .FALSE. |
---|
539 | CYCLE |
---|
540 | ENDIF |
---|
541 | IF( ASSOCIATED(sl_timer_ave%next) ) sl_timer_ave => sl_timer_ave%next |
---|
542 | END DO |
---|
543 | IF( ll_ord ) EXIT |
---|
544 | END DO |
---|
545 | |
---|
546 | ! write averaged info |
---|
547 | WRITE(numtime,"('Averaged timing on all processors :')") |
---|
548 | WRITE(numtime,"('-----------------------------------')") |
---|
549 | WRITE(numtime,"('Section',13x,'Elap. Time(s)',2x,'Elap. Time(%)',2x, & |
---|
550 | & 'CPU Time(s)',2x,'CPU Time(%)',2x,'CPU/Elap',1x, & |
---|
551 | & 'Max elap(%)',2x,'Min elap(%)',2x, & |
---|
552 | & 'Freq')") |
---|
553 | sl_timer_ave => sl_timer_ave_root |
---|
554 | clfmt = '((A),E15.7,2x,f6.2,5x,f12.2,5x,f6.2,5x,f7.2,2x,f12.2,4x,f6.2,2x,f9.2)' |
---|
555 | DO WHILE ( ASSOCIATED(sl_timer_ave) ) |
---|
556 | WRITE(numtime,TRIM(clfmt)) sl_timer_ave%cname(1:18), & |
---|
557 | & sl_timer_ave%tsum_clock,sl_timer_ave%tsum_clock*100.*jpnij/tot_etime, & |
---|
558 | & sl_timer_ave%tsum_cpu ,sl_timer_ave%tsum_cpu*100.*jpnij/tot_ctime , & |
---|
559 | & sl_timer_ave%tsum_cpu/sl_timer_ave%tsum_clock, & |
---|
560 | & sl_timer_ave%tmax_clock*100.*jpnij/tot_etime, & |
---|
561 | & sl_timer_ave%tmin_clock*100.*jpnij/tot_etime, & |
---|
562 | & sl_timer_ave%niter/REAL(jpnij) |
---|
563 | sl_timer_ave => sl_timer_ave%next |
---|
564 | END DO |
---|
565 | WRITE(numtime,*) |
---|
566 | ! |
---|
567 | DEALLOCATE(sl_timer_ave_root) |
---|
568 | ENDIF |
---|
569 | ! |
---|
570 | DEALLOCATE(sl_timer_glob_root) |
---|
571 | ! |
---|
572 | END SUBROUTINE waver_info |
---|
573 | |
---|
574 | |
---|
575 | SUBROUTINE wmpi_info |
---|
576 | !!---------------------------------------------------------------------- |
---|
577 | !! *** ROUTINE wmpi_time *** |
---|
578 | !! ** Purpose : compute and write a summary of MPI infos |
---|
579 | !!---------------------------------------------------------------------- |
---|
580 | ! |
---|
581 | INTEGER :: idum, icode |
---|
582 | INTEGER, ALLOCATABLE, DIMENSION(:) :: iall_rank |
---|
583 | REAL(wp) :: ztot_ratio |
---|
584 | REAL(wp) :: zmax_etime, zmax_ctime, zmax_ratio, zmin_etime, zmin_ctime, zmin_ratio |
---|
585 | REAL(wp) :: zavg_etime, zavg_ctime, zavg_ratio |
---|
586 | REAL(wp), ALLOCATABLE, DIMENSION(:) :: zall_ratio |
---|
587 | CHARACTER(LEN=128), dimension(8) :: cllignes |
---|
588 | CHARACTER(LEN=128) :: clhline, clstart_date, clfinal_date |
---|
589 | CHARACTER(LEN=2048) :: clfmt |
---|
590 | |
---|
591 | ! Gather all times |
---|
592 | ALLOCATE( zall_ratio(jpnij), iall_rank(jpnij) ) |
---|
593 | IF( narea == 1 ) THEN |
---|
594 | iall_rank(:) = (/ (idum,idum=0,jpnij-1) /) |
---|
595 | |
---|
596 | ! Compute elapse user time |
---|
597 | zavg_etime = tot_etime/REAL(jpnij,wp) |
---|
598 | zmax_etime = MAXVAL(all_etime(:)) |
---|
599 | zmin_etime = MINVAL(all_etime(:)) |
---|
600 | |
---|
601 | ! Compute CPU user time |
---|
602 | zavg_ctime = tot_ctime/REAL(jpnij,wp) |
---|
603 | zmax_ctime = MAXVAL(all_ctime(:)) |
---|
604 | zmin_ctime = MINVAL(all_ctime(:)) |
---|
605 | |
---|
606 | ! Compute cpu/elapsed ratio |
---|
607 | zall_ratio(:) = all_ctime(:) / all_etime(:) |
---|
608 | ztot_ratio = SUM(zall_ratio(:)) |
---|
609 | zavg_ratio = ztot_ratio/REAL(jpnij,wp) |
---|
610 | zmax_ratio = MAXVAL(zall_ratio(:)) |
---|
611 | zmin_ratio = MINVAL(zall_ratio(:)) |
---|
612 | |
---|
613 | ! Output Format |
---|
614 | clhline ='1x,13("-"),"|",18("-"),"|",14("-"),"|",18("-"),/,' |
---|
615 | cllignes(1)='(1x,"MPI summary report :",/,' |
---|
616 | cllignes(2)='1x,"--------------------",//,' |
---|
617 | cllignes(3)='1x,"Process Rank |"," Elapsed Time (s) |"," CPU Time (s) |"," Ratio CPU/Elapsed",/,' |
---|
618 | cllignes(4)=' (1x,i4,9x,"|",f12.3,6x,"|",f12.3,2x,"|",4x,f7.3,/),' |
---|
619 | WRITE(cllignes(4)(1:4),'(I4)') jpnij |
---|
620 | cllignes(5)='1x,"Total |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,' |
---|
621 | cllignes(6)='1x,"Minimum |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,' |
---|
622 | cllignes(7)='1x,"Maximum |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3,/,' |
---|
623 | cllignes(8)='1x,"Average |",f12.3,6x,"|",F12.3,2x,"|",4x,f7.3)' |
---|
624 | clfmt=TRIM(cllignes(1))// TRIM(cllignes(2))//TRIM(cllignes(3))// & |
---|
625 | & TRIM(clhline)//TRIM(cllignes(4))//TRIM(clhline)//TRIM(cllignes(5))// & |
---|
626 | & TRIM(clhline)//TRIM(cllignes(6))//TRIM(clhline)//TRIM(cllignes(7))// & |
---|
627 | & TRIM(clhline)//TRIM(cllignes(8)) |
---|
628 | WRITE(numtime, TRIM(clfmt)) & |
---|
629 | (iall_rank(idum),all_etime(idum),all_ctime(idum),zall_ratio(idum),idum=1, jpnij), & |
---|
630 | tot_etime, tot_ctime, ztot_ratio, & |
---|
631 | zmin_etime, zmin_ctime, zmin_ratio, & |
---|
632 | zmax_etime, zmax_ctime, zmax_ratio, & |
---|
633 | zavg_etime, zavg_ctime, zavg_ratio |
---|
634 | WRITE(numtime,*) |
---|
635 | END IF |
---|
636 | ! |
---|
637 | DEALLOCATE(zall_ratio, iall_rank) |
---|
638 | ! |
---|
639 | END SUBROUTINE wmpi_info |
---|
640 | #endif |
---|
641 | |
---|
642 | |
---|
643 | SUBROUTINE timing_ini_var(cdinfo) |
---|
644 | !!---------------------------------------------------------------------- |
---|
645 | !! *** ROUTINE timing_ini_var *** |
---|
646 | !! ** Purpose : create timing structure |
---|
647 | !!---------------------------------------------------------------------- |
---|
648 | CHARACTER(len=*), INTENT(in) :: cdinfo |
---|
649 | LOGICAL :: ll_section |
---|
650 | |
---|
651 | ! |
---|
652 | IF( .NOT. ASSOCIATED(s_timer_root) ) THEN |
---|
653 | ALLOCATE(s_timer_root) |
---|
654 | s_timer_root%cname = cdinfo |
---|
655 | s_timer_root%t_cpu = 0._wp |
---|
656 | s_timer_root%t_clock = 0._wp |
---|
657 | s_timer_root%tsum_cpu = 0._wp |
---|
658 | s_timer_root%tsum_clock = 0._wp |
---|
659 | s_timer_root%tmax_cpu = 0._wp |
---|
660 | s_timer_root%tmax_clock = 0._wp |
---|
661 | s_timer_root%tmin_cpu = 0._wp |
---|
662 | s_timer_root%tmin_clock = 0._wp |
---|
663 | s_timer_root%tsub_cpu = 0._wp |
---|
664 | s_timer_root%tsub_clock = 0._wp |
---|
665 | s_timer_root%ncount = 0 |
---|
666 | s_timer_root%ncount_rate = 0 |
---|
667 | s_timer_root%ncount_max = 0 |
---|
668 | s_timer_root%niter = 0 |
---|
669 | s_timer_root%l_tdone = .FALSE. |
---|
670 | s_timer_root%next => NULL() |
---|
671 | s_timer_root%prev => NULL() |
---|
672 | s_timer => s_timer_root |
---|
673 | ! |
---|
674 | ALLOCATE(s_wrk) |
---|
675 | s_wrk => NULL() |
---|
676 | |
---|
677 | ELSE |
---|
678 | s_timer => s_timer_root |
---|
679 | ! case of already existing area (typically inside a loop) |
---|
680 | DO WHILE( ASSOCIATED(s_timer) ) |
---|
681 | IF( TRIM(s_timer%cname) .EQ. TRIM(cdinfo) ) RETURN |
---|
682 | s_timer => s_timer%next |
---|
683 | END DO |
---|
684 | |
---|
685 | ! end of the chain |
---|
686 | s_timer => s_timer_root |
---|
687 | DO WHILE( ASSOCIATED(s_timer%next) ) |
---|
688 | s_timer => s_timer%next |
---|
689 | END DO |
---|
690 | |
---|
691 | ALLOCATE(s_timer%next) |
---|
692 | s_timer%next%cname = cdinfo |
---|
693 | s_timer%next%t_cpu = 0._wp |
---|
694 | s_timer%next%t_clock = 0._wp |
---|
695 | s_timer%next%tsum_cpu = 0._wp |
---|
696 | s_timer%next%tsum_clock = 0._wp |
---|
697 | s_timer%next%tmax_cpu = 0._wp |
---|
698 | s_timer%next%tmax_clock = 0._wp |
---|
699 | s_timer%next%tmin_cpu = 0._wp |
---|
700 | s_timer%next%tmin_clock = 0._wp |
---|
701 | s_timer%next%tsub_cpu = 0._wp |
---|
702 | s_timer%next%tsub_clock = 0._wp |
---|
703 | s_timer%next%ncount = 0 |
---|
704 | s_timer%next%ncount_rate = 0 |
---|
705 | s_timer%next%ncount_max = 0 |
---|
706 | s_timer%next%niter = 0 |
---|
707 | s_timer%next%l_tdone = .FALSE. |
---|
708 | s_timer%next%parent_section => NULL() |
---|
709 | s_timer%next%prev => s_timer |
---|
710 | s_timer%next%next => NULL() |
---|
711 | s_timer => s_timer%next |
---|
712 | |
---|
713 | ! are we inside a section |
---|
714 | s_wrk => s_timer%prev |
---|
715 | ll_section = .FALSE. |
---|
716 | DO WHILE( ASSOCIATED(s_wrk) .AND. .NOT. ll_section ) |
---|
717 | IF( .NOT. s_wrk%l_tdone ) THEN |
---|
718 | ll_section = .TRUE. |
---|
719 | s_timer%parent_section => s_wrk |
---|
720 | ENDIF |
---|
721 | s_wrk => s_wrk%prev |
---|
722 | END DO |
---|
723 | ENDIF |
---|
724 | ! |
---|
725 | END SUBROUTINE timing_ini_var |
---|
726 | |
---|
727 | |
---|
728 | SUBROUTINE timing_reset |
---|
729 | !!---------------------------------------------------------------------- |
---|
730 | !! *** ROUTINE timing_reset *** |
---|
731 | !! ** Purpose : go to root of timing tree |
---|
732 | !!---------------------------------------------------------------------- |
---|
733 | l_initdone = .TRUE. |
---|
734 | ! IF(lwp) WRITE(numout,*) |
---|
735 | ! IF(lwp) WRITE(numout,*) 'timing_reset : instrumented routines for timing' |
---|
736 | ! IF(lwp) WRITE(numout,*) '~~~~~~~~~~~~' |
---|
737 | ! CALL timing_list(s_timer_root) |
---|
738 | ! WRITE(numout,*) |
---|
739 | ! |
---|
740 | END SUBROUTINE timing_reset |
---|
741 | |
---|
742 | |
---|
743 | RECURSIVE SUBROUTINE timing_list(ptr) |
---|
744 | |
---|
745 | TYPE(timer), POINTER, INTENT(inout) :: ptr |
---|
746 | ! |
---|
747 | IF( ASSOCIATED(ptr%next) ) CALL timing_list(ptr%next) |
---|
748 | IF(lwp) WRITE(numout,*)' ', ptr%cname |
---|
749 | ! |
---|
750 | END SUBROUTINE timing_list |
---|
751 | |
---|
752 | |
---|
753 | SUBROUTINE insert(sd_current, sd_root ,sd_ptr) |
---|
754 | !!---------------------------------------------------------------------- |
---|
755 | !! *** ROUTINE insert *** |
---|
756 | !! ** Purpose : insert an element in timer structure |
---|
757 | !!---------------------------------------------------------------------- |
---|
758 | TYPE(timer), POINTER, INTENT(inout) :: sd_current, sd_root, sd_ptr |
---|
759 | ! |
---|
760 | |
---|
761 | IF( ASSOCIATED( sd_current, sd_root ) ) THEN |
---|
762 | ! If our current element is the root element then |
---|
763 | ! replace it with the one being inserted |
---|
764 | sd_root => sd_ptr |
---|
765 | ELSE |
---|
766 | sd_current%prev%next => sd_ptr |
---|
767 | END IF |
---|
768 | sd_ptr%next => sd_current |
---|
769 | sd_ptr%prev => sd_current%prev |
---|
770 | sd_current%prev => sd_ptr |
---|
771 | ! Nullify the pointer to the new element now that it is held |
---|
772 | ! within the list. If we don't do this then a subsequent call |
---|
773 | ! to ALLOCATE memory to this pointer will fail. |
---|
774 | sd_ptr => NULL() |
---|
775 | ! |
---|
776 | END SUBROUTINE insert |
---|
777 | |
---|
778 | |
---|
779 | SUBROUTINE suppress(sd_ptr) |
---|
780 | !!---------------------------------------------------------------------- |
---|
781 | !! *** ROUTINE suppress *** |
---|
782 | !! ** Purpose : supress an element in timer structure |
---|
783 | !!---------------------------------------------------------------------- |
---|
784 | TYPE(timer), POINTER, INTENT(inout) :: sd_ptr |
---|
785 | ! |
---|
786 | TYPE(timer), POINTER :: sl_temp |
---|
787 | |
---|
788 | sl_temp => sd_ptr |
---|
789 | sd_ptr => sd_ptr%next |
---|
790 | IF ( ASSOCIATED(sl_temp%next) ) sl_temp%next%prev => sl_temp%prev |
---|
791 | DEALLOCATE(sl_temp) |
---|
792 | sl_temp => NULL() |
---|
793 | ! |
---|
794 | END SUBROUTINE suppress |
---|
795 | |
---|
796 | !!===================================================================== |
---|
797 | END MODULE timing |
---|