MODULE data_unstructured_mod USE ISO_C_BINDING USE OMP_LIB IMPLICIT NONE SAVE #include "unstructured.h90" INTEGER, PARAMETER :: eta_mass=1, eta_lag=2, & thermo_theta=1, thermo_entropy=2, thermo_moist=3, thermo_boussinesq=4, & caldyn_vert_cons=1, max_nb_stage=5 INDEX, BIND(C) :: caldyn_thermo=thermo_theta, caldyn_eta=eta_lag, & caldyn_vert_variant=caldyn_vert_cons, nb_threads=0, nb_stage=0 LOGICAL(C_BOOL), BIND(C) :: hydrostatic=.TRUE. LOGICAL(C_BOOL), BIND(C, NAME='debug_hevi_solver') :: debug_hevi_solver_=.TRUE. #ifdef CPP_MIXED_PREC LOGICAL(C_BOOL), BIND(C) :: mixed_precision=.TRUE. #else LOGICAL(C_BOOL), BIND(C) :: mixed_precision=.FALSE. #endif INDEX, BIND(C) :: llm, nqdyn, edge_num, primal_num, dual_num, & max_primal_deg, max_dual_deg, max_trisk_deg INDEX, ALLOCATABLE :: & ! deg(ij) = nb of vertices = nb of edges of primal/dual cell ij primal_deg(:), primal_edge(:,:), primal_vertex(:,:), primal_ne(:,:), & dual_deg(:), dual_edge(:,:), dual_vertex(:,:), dual_ne(:,:), & trisk_deg(:), trisk(:,:), & left(:), right(:), up(:), down(:) ! left and right are adjacent primal cells ! flux is positive when going from left to right ! up and down are adjacent dual cells ! circulation is positive when going from down to up TIME, PARAMETER :: print_trace_interval = 1. TIME, BIND(C) :: elapsed NUM, BIND(C) :: g, ptop, cpp, cppv, Rd, Rv, preff, Treff, pbot, Phi_bot, rho_bot NUM :: kappa NUM1(max_nb_stage), BIND(C) :: tauj ! diagonal of fast Butcher tableau NUM2(max_nb_stage,max_nb_stage), BIND(C) :: cslj, cflj ! slow and fast modified Butcher tableaus NUM1(:), ALLOCATABLE :: le, le_de, fv, Av, Ai NUM2(:,:), ALLOCATABLE :: centroid, xyz_v, Riv2, wee, ap,bp, mass_bl, mass_dak, mass_dbk INTEGER(C_INT), BIND(C) :: comm_icosa, dynamico_mpi_rank=0 INTEGER, PARAMETER :: id_dev1=1, id_dev2=2, & id_pvort_only=3, id_slow_hydro=4, id_fast=5, id_coriolis=6, id_theta=7, id_geopot=8, id_vert=9, & id_solver=10, id_slow_NH=11, id_NH_geopot=12, id_vert_NH=13, id_update=14, id_halo=15, & id_scalar_laplacian=16, nb_routines=16 TIME, PRIVATE :: start_time, time_spent(nb_routines) ! time spent in each kernel INTEGER, PRIVATE :: current_id, nb_calls(nb_routines) INTEGER(KIND=8), PRIVATE :: bytes(nb_routines) ! bytes read or written by each kernel CHARACTER(len = 10) :: id_name(nb_routines) = & (/'dev1 ', 'dev2 ', & 'pvort_only', 'slow_hydro', 'fast ', 'coriolis ', 'theta ', 'geopot ', 'vert ', & 'solver ', 'slow_NH ', 'NH_geopot ', 'vert_NH ', 'update ', 'halo_xchg ', 'scalar_lap' /) INTEGER, PARAMETER ::transfer_primal=1, transfer_edge=2, transfer_dual=3, transfer_max=3 TYPE Halo_transfer INTEGER :: ranks ! size of arrays rank, len INTEGER, ALLOCATABLE :: rank(:), & ! MPI ranks to communicate with num(:), & ! number of cells to send to / receive from other MPI ranks cells(:) ! local indices of cells to send/receive NUM, ALLOCATABLE :: buf2(:,:) END TYPE Halo_transfer TYPE(Halo_transfer), TARGET :: send_info(transfer_max), recv_info(transfer_max) CONTAINS !---------------------------- PROFILING -------------------------- SUBROUTINE init_trace() !$OMP MASTER time_spent(:)=0. bytes(:)=0 nb_calls(:)=0 !$OMP END MASTER END SUBROUTINE init_trace SUBROUTINE print_trace_() BIND(C, name='dynamico_print_trace') INTEGER :: id TIME :: total_spent total_spent=SUM(time_spent) IF(dynamico_mpi_rank==0) THEN PRINT *, '========================= Performance metrics =========================' PRINT *, 'Total time spent in instrumented code (seconds) :', total_spent PRINT *, 'Name, #calls, %time, microsec/call, MB/sec' DO id=1,nb_routines IF(nb_calls(id)>0) PRINT *, id_name(id), nb_calls(id), INT(100.*time_spent(id)/total_spent), & INT(1e6*time_spent(id)/nb_calls(id)), INT(1e-6*bytes(id)/time_spent(id)) END DO END IF END SUBROUTINE print_trace_ SUBROUTINE print_trace() !$OMP MASTER IF(SUM(time_spent)>print_trace_interval) THEN CALL print_trace_ CALL init_trace() END IF !$OMP END MASTER END SUBROUTINE print_trace SUBROUTINE enter_trace(id, nbytes) INTEGER :: id, nbytes !$OMP MASTER current_id = id bytes(id) = bytes(id) + nbytes nb_calls(id)=nb_calls(id)+1 start_time = OMP_GET_WTIME() !$OMP END MASTER END SUBROUTINE enter_trace SUBROUTINE exit_trace() TIME :: elapsed !$OMP MASTER elapsed = OMP_GET_WTIME()-start_time IF(elapsed<0.) elapsed=0. time_spent(current_id) = time_spent(current_id) + elapsed !$OMP END MASTER END SUBROUTINE exit_trace !---------------------------- CONTEXT INITIALIZATION -------------------------- #define ALLOC1(v,n1) IF(ALLOCATED(v)) DEALLOCATE(v) ; ALLOCATE(v(n1)) #define ALLOC2(v,n1,n2) IF(ALLOCATED(v)) DEALLOCATE(v) ; ALLOCATE(v(n1,n2)) SUBROUTINE init_mesh( & primal_deg_, primal_edge_, primal_ne_, & dual_deg_, dual_edge_, dual_ne_, dual_vertex_, & left_, right_, up_, down_ ,& trisk_deg_, trisk_) BINDC(init_mesh) INDEX :: primal_deg_(primal_num), primal_edge_(max_primal_deg,primal_num), & primal_ne_(max_primal_deg,primal_num), & dual_deg_(dual_num), dual_edge_(max_dual_deg,dual_num), & dual_ne_(max_dual_deg,dual_num), & dual_vertex_(max_dual_deg,dual_num), & trisk_deg_(edge_num), trisk_(max_trisk_deg, edge_num) INDEX, DIMENSION(edge_num) :: left_, right_, down_, up_ PRINT *, 'init_mesh ...' PRINT *, 'Primal mesh : ', primal_num, max_primal_deg PRINT *, 'Dual mesh : ', dual_num, max_dual_deg PRINT *, 'Edge mesh : ', edge_num, max_trisk_deg PRINT *, 'Vertical levels :', llm ALLOC1(primal_deg, primal_num) ALLOC2(primal_edge, max_primal_deg,primal_num) ALLOC2(primal_ne, max_primal_deg,primal_num) ALLOC1(dual_deg,dual_num) ALLOC2(dual_edge, max_dual_deg,dual_num) ALLOC2(dual_ne, max_dual_deg,dual_num) ALLOC2(dual_vertex, max_dual_deg,dual_num) ALLOC1(trisk_deg, edge_num) ALLOC2(trisk, max_trisk_deg, edge_num) PRINT *, SHAPE(trisk), edge_num ALLOC1(left, edge_num) ALLOC1(right, edge_num) ALLOC1(up, edge_num) ALLOC1(down, edge_num) primal_deg(:) = primal_deg_(:) primal_edge(:,:) = primal_edge_(:,:) primal_ne(:,:) = primal_ne_(:,:) dual_deg(:) = dual_deg_(:) dual_edge(:,:) = dual_edge_(:,:) dual_ne(:,:) = dual_ne_(:,:) dual_vertex(:,:) = dual_vertex_(:,:) IF(MINVAL(dual_deg)<2) THEN STOP 'At least one dual cell has less than 2 vertices' END IF IF(MINVAL(primal_deg)<2) THEN STOP 'At least one primal cell has less than 2 vertices' END IF left(:)=left_(:) right(:)=right_(:) down(:)=down_(:) up=up_(:) trisk_deg(:)=trisk_deg_(:) trisk(:,:)=trisk_(:,:) PRINT *, MAXVAL(primal_edge), edge_num PRINT *, MAXVAL(dual_edge), edge_num PRINT *, MAXVAL(dual_vertex), dual_num PRINT *, MAXVAL(trisk), edge_num PRINT *, MAX(MAXVAL(left),MAXVAL(right)), primal_num PRINT *, MAX(MAXVAL(up),MAXVAL(down)), dual_num PRINT *, SHAPE(trisk), edge_num PRINT *,' ... Done.' END SUBROUTINE init_mesh ! Input arrays to init_metric and init_hybrid are declared DBL ! => always float64 on the Python side ! They are copied to Fortran arrays of type NUM (float or double) SUBROUTINE init_metric(Ai_, Av_, fv_, le_de_, Riv2_, wee_) BINDC(init_metric) DBL :: Ai_(primal_num), Av_(dual_num), fv_(dual_num), le_de_(edge_num), & Riv2_(max_dual_deg,dual_num), wee_(max_trisk_deg,edge_num) PRINT *, 'init_metric ...' ALLOC1(Ai,primal_num) ALLOC1(Av,dual_num) ALLOC1(fv,dual_num) ALLOC1(le_de,edge_num) ALLOC2(Riv2, max_dual_deg, dual_num) ALLOC2(wee, max_trisk_deg, edge_num) Ai(:) = Ai_(:) Av(:) = Av_(:) fv(:) = fv_(:) le_de(:) = le_de_(:) Riv2(:,:)=Riv2_(:,:) wee(:,:) = wee_(:,:) PRINT *, 'Max Ai : ', MAXVAL(ABS(Ai)) PRINT *, 'Max Av : ', MAXVAL(ABS(Av)) PRINT *, 'Max fv : ', MAXVAL(ABS(fv)) PRINT *, 'Max le_de : ', MAXVAL(ABS(le_de)) PRINT *, 'Max Riv2 : ', MAXVAL(ABS(Riv2)) PRINT *, 'Max wee : ', MAXVAL(ABS(wee)) PRINT *, MINVAL(right), MAXVAL(right) PRINT *, MINVAL(right), MAXVAL(left) PRINT *,' ... Done.' IF(nb_threads==0) nb_threads=OMP_GET_MAX_THREADS() PRINT *,'OpenMP : max_threads, num_procs, nb_threads', OMP_GET_MAX_THREADS(), OMP_GET_NUM_PROCS(), nb_threads END SUBROUTINE init_metric ! SUBROUTINE show_openmp() BINDC(show_openmp) PRINT *,'OpenMP : max_threads, num_procs', OMP_GET_MAX_THREADS(), OMP_GET_NUM_PROCS() END SUBROUTINE show_openmp ! SUBROUTINE init_params() BINDC(init_params) PRINT *, 'Setting physical parameters ...' IF(hydrostatic) THEN PRINT *, 'Hydrostatic dynamics (HPE)' ELSE PRINT *, 'Non-hydrostatic dynamics (Euler)' END IF kappa = Rd/cpp PRINT *, 'g = ',g PRINT *, 'preff = ',preff PRINT *, 'Treff = ',Treff PRINT *, 'Rd = ',Rd PRINT *, 'cpp = ',cpp PRINT *, 'kappa = ',kappa PRINT *, '... Done' CALL init_trace END SUBROUTINE init_params ! SUBROUTINE init_hybrid(bl,dak,dbk) BINDC(init_hybrid) DBL :: bl(llm+1, primal_num), & dak(llm, primal_num), dbk(llm, primal_num) PRINT *, 'Setting hybrid coefficients ...' ALLOC2(mass_bl, llm+1, primal_num) ALLOC2(mass_dak, llm, primal_num) ALLOC2(mass_dbk, llm, primal_num) mass_bl(:,:) = bl(:,:) mass_dak(:,:) = dak(:,:) mass_dbk(:,:) = dbk(:,:) PRINT *, '... Done, llm = ', llm END SUBROUTINE Init_hybrid END MODULE data_unstructured_mod