1 | import time |
---|
2 | import math |
---|
3 | import wrap |
---|
4 | from libs import libicosa |
---|
5 | from util import list_stencil |
---|
6 | |
---|
7 | from ctypes import c_void_p, c_int, c_double, c_float, c_bool |
---|
8 | from numpy cimport ndarray |
---|
9 | cimport numpy as np |
---|
10 | import numpy as np |
---|
11 | |
---|
12 | #-------------- choose precision of kernel computations ------------# |
---|
13 | |
---|
14 | DEF prec_double=False |
---|
15 | |
---|
16 | IF prec_double: |
---|
17 | c_num=c_double |
---|
18 | ctypedef double num |
---|
19 | np_num=np.float64 |
---|
20 | ELSE: |
---|
21 | c_num=c_float |
---|
22 | ctypedef float num |
---|
23 | np_num=np.float32 |
---|
24 | |
---|
25 | ctypedef num *num_ptr |
---|
26 | |
---|
27 | #------------- direct Cython interface to DYNAMICO routines -------------# |
---|
28 | |
---|
29 | |
---|
30 | cdef enum: max_nb_stage=5 |
---|
31 | cdef extern : |
---|
32 | cdef num tauj[max_nb_stage] |
---|
33 | cdef num cslj[max_nb_stage][max_nb_stage] |
---|
34 | cdef num cflj[max_nb_stage][max_nb_stage] |
---|
35 | cdef int nb_stage[1] |
---|
36 | |
---|
37 | cdef extern from "functions.h": |
---|
38 | cdef void dynamico_ARK_step(int nstep, |
---|
39 | num *mass_col, num *rhodz, num *theta_rhodz, |
---|
40 | num *u, num *geopot, num *w, |
---|
41 | num *theta, num *ps, num *pk, num *hflux, num *qv, |
---|
42 | num *dmass_col, num *drhodz, num *dtheta_rhodz, |
---|
43 | num *du_fast, num *du_slow, |
---|
44 | num *dPhi_fast, num *dPhi_slow, |
---|
45 | num *dW_fast, num *dW_slow) |
---|
46 | cdef void dynamico_remap(num *rhodz, num *theta_rhodz, num *u) |
---|
47 | cdef void dynamico_init_params() |
---|
48 | cpdef void dynamico_setup_xios() |
---|
49 | cpdef void dynamico_xios_set_timestep(double) |
---|
50 | cpdef void dynamico_xios_update_calendar(int) |
---|
51 | |
---|
52 | #------------- import and wrap DYNAMICO routines -------------# |
---|
53 | |
---|
54 | ker=wrap.Struct() # store imported fun X as funs.X |
---|
55 | |
---|
56 | check_args = False # use True instead of False for debugging, probably with some overhead |
---|
57 | |
---|
58 | try: |
---|
59 | kernels = wrap.SharedLib(vars(ker), libicosa, check_args=check_args) |
---|
60 | setvar, setvars, getvar, getvars = kernels.setvar, kernels.setvars, kernels.getvar, kernels.getvars |
---|
61 | except OSError: |
---|
62 | print """ |
---|
63 | Unable to load shared library 'libicosa.so' ! |
---|
64 | """ |
---|
65 | raise |
---|
66 | |
---|
67 | # providing a full prototype enables type-checking when calling |
---|
68 | # if a number n is present in the prototype, the previous type is repeated n times |
---|
69 | kernels.import_funs([ |
---|
70 | ['dynamico_setup_xios',None], |
---|
71 | ['dynamico_print_trace',None], |
---|
72 | ['dynamico_xios_set_timestep',c_double], |
---|
73 | ['dynamico_xios_update_calendar',c_int], |
---|
74 | ['dynamico_init_mesh',c_void_p,13], |
---|
75 | ['dynamico_init_metric', c_void_p,6], |
---|
76 | ['dynamico_init_hybrid', c_void_p,3], |
---|
77 | ['dynamico_caldyn_unstructured', c_num, c_void_p,20], |
---|
78 | ['dynamico_partition_graph', c_int,2, c_void_p,3, c_int, c_void_p], |
---|
79 | ['dynamico_init_transfer', c_int, c_int,2,c_void_p,3, c_int,2,c_void_p,3], |
---|
80 | ['dynamico_update_halo', c_int,3,c_void_p], |
---|
81 | ['dynamico_morton_encode', c_int,c_void_p,4] |
---|
82 | ]) |
---|
83 | |
---|
84 | # set/get global variables |
---|
85 | eta_mass,eta_lag=(1,2) |
---|
86 | thermo_theta,thermo_entropy,thermo_moist,thermo_boussinesq=(1,2,3,4) |
---|
87 | |
---|
88 | kernels.addvars( |
---|
89 | c_bool,'hydrostatic','debug_hevi_solver', |
---|
90 | c_int,'llm','nqdyn','primal_num','max_primal_deg', |
---|
91 | 'dual_num','max_dual_deg','edge_num','max_trisk_deg', |
---|
92 | 'caldyn_thermo','caldyn_eta','nb_threads','dynamico_mpi_rank', |
---|
93 | c_double,'elapsed', |
---|
94 | c_num, 'g', 'ptop', 'cpp', 'cppv', |
---|
95 | 'Rd', 'Rv', 'preff', 'Treff', 'pbot', 'rho_bot', 'Phi_bot') |
---|
96 | |
---|
97 | elapsed=0. |
---|
98 | |
---|
99 | #------------------------ Extension type performing a full ARK time step ---------------------- |
---|
100 | |
---|
101 | cdef num_ptr ptr1(num[:] data) except *: return &data[0] |
---|
102 | cdef num_ptr ptr2(num[:,:] data) except *: return &data[0,0] |
---|
103 | cdef num_ptr ptr3(num[:,:,:] data) except *: return &data[0,0,0] |
---|
104 | cdef num_ptr ptr4(num[:,:,:,:] data) except *: return &data[0,0,0,0] |
---|
105 | cdef num_ptr ptr(data) except * : |
---|
106 | n=data.ndim |
---|
107 | if n==1 : return ptr1(data) |
---|
108 | if n==2 : return ptr2(data) |
---|
109 | if n==3 : return ptr3(data) |
---|
110 | if n==4 : return ptr4(data) |
---|
111 | if n>4: raise IndexError |
---|
112 | |
---|
113 | cdef alloc(num_ptr *p, allocator, n=1): |
---|
114 | data=allocator(n) |
---|
115 | p[0]=ptr(data) |
---|
116 | return data |
---|
117 | |
---|
118 | cdef check_ptr(name, num_ptr p, ndarray data): |
---|
119 | if p != ptr(data) : print name, 'p <> ptr(data) !!' |
---|
120 | |
---|
121 | cdef class Caldyn_step: |
---|
122 | # number of time steps to do at each invocation of advance() |
---|
123 | cdef int nstep |
---|
124 | # pointer to allocated arrays |
---|
125 | cdef num_ptr p_mass, p_theta_rhodz, p_u, p_geopot, p_W # prognostic |
---|
126 | cdef num_ptr p_mass_col, p_dmass_col, p_ps, p_theta, p_pk, p_hflux, p_qv # diagnostic |
---|
127 | cdef num_ptr p_drhodz, p_dtheta_rhodz, p_du_fast, p_du_slow # tendencies |
---|
128 | cdef num_ptr p_dPhi_fast, p_dPhi_slow, p_dW_fast, p_dW_slow # tendencies |
---|
129 | # allocated arrays, must remain referenced or segfault |
---|
130 | cdef readonly ndarray mass, theta_rhodz, u, geopot, W |
---|
131 | cdef readonly ndarray mass_col, dmass_col, ps, theta, pk, hflux, qv |
---|
132 | cdef readonly ndarray drhodz, dtheta_rhodz, du_fast, du_slow |
---|
133 | cdef readonly ndarray dPhi_fast, dPhi_slow, dW_fast, dW_slow |
---|
134 | |
---|
135 | def __init__(self,mesh,time_scheme, nstep): |
---|
136 | self.nstep=nstep |
---|
137 | # self.mesh=mesh |
---|
138 | fps, ftheta, fmass = mesh.field_ps, mesh.field_theta, mesh.field_mass |
---|
139 | fw, fu, fz = mesh.field_w, mesh.field_u, mesh.field_z |
---|
140 | # collect coefficients of time scheme |
---|
141 | cdef double[:] tauj_ = time_scheme.tauj |
---|
142 | cdef double[:,:] cslj_ = time_scheme.csjl |
---|
143 | cdef double[:,:] cflj_ = time_scheme.cfjl |
---|
144 | ns = time_scheme.nstage |
---|
145 | nb_stage[0]=ns |
---|
146 | |
---|
147 | cdef int i,j |
---|
148 | for i in range(ns): |
---|
149 | tauj[i]=tauj_[i] |
---|
150 | for j in range(ns): |
---|
151 | cslj[i][j]=cslj_[i,j] |
---|
152 | cflj[i][j]=cflj_[i,j] |
---|
153 | # allocate arrays, store pointers to avoid overhead when calling dynamico |
---|
154 | # prognostic/diagnostic |
---|
155 | self.ps = alloc(&self.p_ps, fps) |
---|
156 | self.mass_col, self.dmass_col = alloc(&self.p_mass_col, fps), alloc(&self.p_dmass_col, fps,ns), |
---|
157 | self.mass, self.theta_rhodz = alloc(&self.p_mass, fmass), alloc(&self.p_theta_rhodz, fmass), |
---|
158 | self.theta, self.pk = alloc(&self.p_theta, fmass), alloc(&self.p_pk, fmass), |
---|
159 | self.geopot, self.W = alloc(&self.p_geopot, fw), alloc(&self.p_W, fw), |
---|
160 | self.hflux, self.u = alloc(&self.p_hflux, fu), alloc(&self.p_u, fu) |
---|
161 | self.qv = alloc(&self.p_qv,fz) |
---|
162 | # tendencies |
---|
163 | self.drhodz, self.dtheta_rhodz = alloc(&self.p_drhodz,fmass,ns), alloc(&self.p_dtheta_rhodz,fmass,ns) |
---|
164 | self.du_fast, self.du_slow = alloc(&self.p_du_fast,fu,ns), alloc(&self.p_du_slow,fu,ns) |
---|
165 | self.dPhi_fast, self.dPhi_slow = alloc(&self.p_dPhi_fast,fw,ns), alloc(&self.p_dPhi_slow,fw,ns) |
---|
166 | self.dW_fast, self.dW_slow = alloc(&self.p_dW_fast,fw,ns), alloc(&self.p_dW_slow,fw,ns) |
---|
167 | def next(self): |
---|
168 | # global elapsed |
---|
169 | # time1=time.time() |
---|
170 | dynamico_ARK_step(self.nstep, |
---|
171 | self.p_mass_col, self.p_mass, self.p_theta_rhodz, |
---|
172 | self.p_u, self.p_geopot, self.p_W, |
---|
173 | self.p_theta, self.p_ps, self.p_pk, self.p_hflux, self.p_qv, |
---|
174 | self.p_dmass_col, self.p_drhodz, self.p_dtheta_rhodz, |
---|
175 | self.p_du_fast, self.p_du_slow, |
---|
176 | self.p_dPhi_fast, self.p_dPhi_slow, |
---|
177 | self.p_dW_fast, self.p_dW_slow) |
---|
178 | #time2=time.time() |
---|
179 | #if time2>time1: elapsed=elapsed+time2-time1 |
---|
180 | def remap(self): |
---|
181 | dynamico_remap(self.p_mass, self.p_theta_rhodz, self.p_u) |
---|
182 | |
---|
183 | def caldyn_step_TRSW(mesh,time_scheme,nstep): |
---|
184 | setvars(('hydrostatic','caldyn_thermo','caldyn_eta'), |
---|
185 | (True,thermo_boussinesq,eta_lag)) |
---|
186 | dynamico_init_params() |
---|
187 | return Caldyn_step(mesh,time_scheme, nstep) |
---|
188 | def caldyn_step_HPE(mesh,time_scheme,nstep, caldyn_thermo,caldyn_eta, thermo,BC,g): |
---|
189 | setvars(('hydrostatic','caldyn_thermo','caldyn_eta', |
---|
190 | 'g','ptop','Rd','cpp','preff','Treff'), |
---|
191 | (True,caldyn_thermo,caldyn_eta, |
---|
192 | g,BC.ptop,thermo.Rd,thermo.Cpd,thermo.p0,thermo.T0)) |
---|
193 | dynamico_init_params() |
---|
194 | return Caldyn_step(mesh,time_scheme, nstep) |
---|
195 | def caldyn_step_NH(mesh,time_scheme,nstep, caldyn_thermo, caldyn_eta, thermo,BC,g): |
---|
196 | setvars(('hydrostatic','caldyn_thermo','caldyn_eta', |
---|
197 | 'g','ptop','Rd','cpp','preff','Treff','pbot','rho_bot'), |
---|
198 | (False,caldyn_thermo,caldyn_eta, |
---|
199 | g,BC.ptop,thermo.Rd,thermo.Cpd,thermo.p0,thermo.T0, |
---|
200 | BC.pbot.max(), BC.rho_bot.max())) |
---|
201 | dynamico_init_params() |
---|
202 | return Caldyn_step(mesh,time_scheme, nstep) |
---|
203 | |
---|
204 | #----------------------------- Base class for dynamics ------------------------ |
---|
205 | |
---|
206 | class Caldyn: |
---|
207 | def __init__(self,mesh): |
---|
208 | self.mesh=mesh |
---|
209 | fps, ftheta, fmass = mesh.field_ps, mesh.field_theta, mesh.field_mass |
---|
210 | fw, fu, fz = mesh.field_w, mesh.field_u, mesh.field_z |
---|
211 | self.ps, self.ms, self.dms = fps(), fps(), fps() |
---|
212 | self.s, self.hs, self.dhs = ftheta(), ftheta(), ftheta() |
---|
213 | self.pk, self.berni, self.geopot, self.hflux = fmass(),fmass(),fw(),fu() |
---|
214 | self.qu, self.qv = fu(),fz() |
---|
215 | self.fmass, self.ftheta, self.fu, self.fw = fmass, ftheta, fu, fw |
---|
216 | def bwd_fast_slow(self, flow, tau): |
---|
217 | global elapsed |
---|
218 | time1=time.time() |
---|
219 | flow,fast,slow = self._bwd_fast_slow_(flow,tau) |
---|
220 | time2=time.time() |
---|
221 | elapsed=elapsed+time2-time1 |
---|
222 | return flow,fast,slow |
---|
223 | |
---|
224 | # when calling caldyn_unstructured, arrays for tendencies must be re-created each time |
---|
225 | # to avoid overwriting in the same memory space when time scheme is multi-stage |
---|
226 | |
---|
227 | #-------------------------- Shallow-water dynamics --------------------- |
---|
228 | |
---|
229 | class Caldyn_RSW(Caldyn): |
---|
230 | def __init__(self,mesh): |
---|
231 | Caldyn.__init__(self,mesh) |
---|
232 | setvars(('hydrostatic','caldyn_thermo','caldyn_eta'), |
---|
233 | (True,thermo_boussinesq,eta_lag)) |
---|
234 | self.dhs = self.fmass() |
---|
235 | dynamico_init_params() |
---|
236 | def _bwd_fast_slow_(self, flow, tau): |
---|
237 | h,u = flow |
---|
238 | # h*s = h => uniform buoyancy s=1 => shallow-water |
---|
239 | dh, du_slow, du_fast, hs, buf = self.fmass(), self.fu(), self.fu(), h.copy(), self.geopot |
---|
240 | ker.dynamico_caldyn_unstructured(tau, self.ms, h, hs, u, self.geopot, buf, |
---|
241 | self.s, self.ps, self.pk, self.hflux, self.qv, |
---|
242 | self.dms, dh, self.dhs, du_fast, du_slow, |
---|
243 | buf, buf, buf, buf) |
---|
244 | return (h,u), (0.,du_fast), (dh,du_slow) |
---|
245 | |
---|
246 | #----------------------------------- HPE ------------------------------------ |
---|
247 | |
---|
248 | class Caldyn_HPE(Caldyn): |
---|
249 | def __init__(self,caldyn_thermo,caldyn_eta, mesh,thermo,BC,g): |
---|
250 | Caldyn.__init__(self,mesh) |
---|
251 | setvars(('hydrostatic','caldyn_thermo','caldyn_eta', |
---|
252 | 'g','ptop','Rd','cpp','preff','Treff'), |
---|
253 | (True,caldyn_thermo,caldyn_eta, |
---|
254 | g,BC.ptop,thermo.Rd,thermo.Cpd,thermo.p0,thermo.T0)) |
---|
255 | dynamico_init_params() |
---|
256 | def _bwd_fast_slow_(self, flow, tau): |
---|
257 | dm, dS, du_slow, du_fast, buf = self.fmass(), self.ftheta(), self.fu(), self.fu(), self.geopot |
---|
258 | m,S,u = flow |
---|
259 | ker.dynamico_caldyn_unstructured(tau, self.ms, m, S, u, self.geopot, buf, |
---|
260 | self.s, self.ps, self.pk, self.hflux, self.qv, |
---|
261 | self.dms, dm, dS, du_fast, du_slow, |
---|
262 | buf, buf, buf, buf) |
---|
263 | return (m,S,u), (0.,0.,du_fast), (dm,dS,du_slow) |
---|
264 | |
---|
265 | #----------------------------------- NH ------------------------------------ |
---|
266 | |
---|
267 | class Caldyn_NH(Caldyn): |
---|
268 | def __init__(self,caldyn_thermo,caldyn_eta, mesh,thermo,BC,g): |
---|
269 | Caldyn.__init__(self,mesh) |
---|
270 | setvars(('hydrostatic','caldyn_thermo','caldyn_eta', |
---|
271 | 'g','ptop','Rd','cpp','preff','Treff', |
---|
272 | 'pbot','rho_bot'), |
---|
273 | (False,caldyn_thermo,caldyn_eta, |
---|
274 | g,BC.ptop,thermo.Rd,thermo.Cpd,thermo.p0,thermo.T0, |
---|
275 | BC.pbot.max(), BC.rho_bot.max())) |
---|
276 | dynamico_init_params() |
---|
277 | def bwd_fast_slow(self, flow, tau): |
---|
278 | ftheta, fmass, fu, fw = self.ftheta, self.fmass, self.fu, self.fw |
---|
279 | dm, dS, du_slow, du_fast = fmass(), ftheta(), fu(), fu() |
---|
280 | dPhi_slow, dPhi_fast, dW_slow, dW_fast = fw(), fw(), fw(), fw() |
---|
281 | m,S,u,Phi,W = flow |
---|
282 | ker.dynamico_caldyn_unstructured(tau, self.ms, m, S, u, Phi, W, |
---|
283 | self.s, self.ps, self.pk, self.hflux, self.qv, |
---|
284 | self.dms, dm, dS, du_fast, du_slow, |
---|
285 | dPhi_fast, dPhi_slow, dW_fast, dW_slow) |
---|
286 | return ((m,S,u,Phi,W), (0.,0.,du_fast,dPhi_fast,dW_fast), |
---|
287 | (dm,dS,du_slow,dPhi_slow,dW_slow)) |
---|
288 | |
---|
289 | #------------------------ Copy mesh info to Fortran side ------------------- |
---|
290 | |
---|
291 | def init_mesh(llm, nqdyn, edge_num, primal_num, dual_num, |
---|
292 | max_trisk_deg, max_primal_deg, max_dual_deg, |
---|
293 | primal_nb, primal_edge, primal_ne, |
---|
294 | dual_nb,dual_edge,dual_ne,dual_vertex, |
---|
295 | left,right,down,up,trisk_deg,trisk, |
---|
296 | Ai, Av, fv, le_de, Riv2, wee): |
---|
297 | setvars( ('llm','nqdyn','edge_num','primal_num','dual_num', |
---|
298 | 'max_trisk_deg','max_primal_deg','max_dual_deg'), |
---|
299 | (llm, nqdyn, edge_num, primal_num, dual_num, |
---|
300 | max_trisk_deg, max_primal_deg, max_dual_deg) ) |
---|
301 | print('init_mesh ...') |
---|
302 | ker.dynamico_init_mesh(primal_nb,primal_edge,primal_ne, |
---|
303 | dual_nb,dual_edge,dual_ne,dual_vertex, |
---|
304 | left,right,down,up,trisk_deg,trisk) |
---|
305 | print ('...done') |
---|
306 | print('init_metric ...') |
---|
307 | ker.dynamico_init_metric(Ai,Av,fv,le_de,Riv2,wee) |
---|
308 | print ('...done') |
---|
309 | |
---|
310 | #------------------------ Mesh partitioning ------------------------ |
---|
311 | |
---|
312 | # Helper functions and interface to ParMETIS |
---|
313 | # loc_stencil returns the start/end indices (vtxdist) expected by ParMETIS |
---|
314 | # i.e. index[start:end] with start=vtxdist[cell], end=vtxdist[cell+1] lists the edges of cell 'cell' |
---|
315 | |
---|
316 | def loc_stencil(degree, stencil): |
---|
317 | loc=0 |
---|
318 | for i in range(degree.size): |
---|
319 | yield loc |
---|
320 | loc=loc+degree[i] |
---|
321 | yield loc |
---|
322 | |
---|
323 | def partition_mesh(degree, stencil, nparts): |
---|
324 | # arguments : PArray1D and PArray2D describing mesh, number of desired partitions |
---|
325 | dim_cell, degree, stencil = degree.dim, degree.data, stencil.data |
---|
326 | comm, vtxdist, idx_start, idx_end = dim_cell.comm, dim_cell.vtxdist, dim_cell.start, dim_cell.end |
---|
327 | mpi_rank, mpi_size = comm.Get_rank(), comm.Get_size() |
---|
328 | adjncy_loc, xadj_loc = list_stencil(degree, stencil), loc_stencil(degree, stencil) |
---|
329 | adjncy_loc, xadj_loc = [np.asarray(list(x), dtype=np.int32) for x in (adjncy_loc, xadj_loc)] |
---|
330 | owner = np.zeros(idx_end-idx_start, dtype=np.int32); |
---|
331 | ker.dynamico_partition_graph(mpi_rank, mpi_size, vtxdist, xadj_loc, adjncy_loc, nparts, owner) |
---|
332 | return owner |
---|