New URL for NEMO forge!   http://forge.nemo-ocean.eu

Since March 2022 along with NEMO 4.2 release, the code development moved to a self-hosted GitLab.
This present forge is now archived and remained online for history.
Developers/RebuildZooms (diff) – NEMO

Changes between Initial Version and Version 1 of Developers/RebuildZooms


Ignore:
Timestamp:
2020-08-12T18:01:27+02:00 (4 years ago)
Author:
acc
Comment:

--

Legend:

Unmodified
Added
Removed
Modified
  • Developers/RebuildZooms

    v1 v1  
     1= Does XIOS add sufficient and accurate attribute metadata to rebuild zoom datasets correctly? = 
     2 
     3There appears to be insufficient or incorrect information in zoom domain files to rebuild whole datasets when the zoom region spans more than one XIOS server and is written to multiple files. 
     4 
     5 
     6For example, consider this zoom in a 8x4 decomposition of ORCA2_ICE_PISCES defined by the following additions to the XML: 
     7 
     8{{{ 
     9domain_def_nemo.xml: 
     10     <!--   My zoom: example of hand defined zoom   --> 
     11     <domain id="myzoomT" domain_ref="grid_T" > 
     12       <zoom_domain ibegin="25" jbegin="20" ni="90" nj="45"/> 
     13     </domain> 
     14 
     15grid_def_nemo.xml: 
     16       <grid id="zoom_T_3D" > 
     17         <domain domain_ref="myzoomT" /> 
     18         <axis axis_ref="deptht" /> 
     19       </grid> 
     20 
     21file_def_nemo-oce.xml: 
     22    <file_definition type="multiple_file" name="@expname@_@freq@_@startdate@_@enddate@" sync_freq="1mo" min_digits="4"> 
     23 
     24      <file_group id="5d" output_freq="5d"  output_level="10" enabled=".TRUE.">  <!-- 5d files --> 
     25        <file id="file66" name_suffix="_zoom_T" description="ocean T grid variables" > 
     26          <field field_ref="e3t"  grid_ref="zoom_T_3D"    /> 
     27          <field field_ref="toce" grid_ref="zoom_T_3D" name="thetao"   operation="instant" freq_op="5d" > @toce_e3t / @e3t </field> 
     28        </file> 
     29        <file id="file11" .... 
     30}}} 
     31 
     32In a 8x4 decomposition using 4 external XIOS servers the following output files are produced for the 90x45 zoom region: 
     33 
     34{{{ 
     35O2L3P_LONG_5d_00010101_00010303_zoom_T_0000.nc 
     36O2L3P_LONG_5d_00010101_00010303_zoom_T_0001.nc 
     37}}} 
     38with the following attribute data in each respectively: 
     39 
     40{{{ 
     41ncdump -h O2L3P_LONG_5d_00010101_00010303_zoom_T_0000.nc 
     42// global attributes: 
     43                . 
     44                . 
     45                :ibegin = 25 ; 
     46                :ni = 90 ; 
     47                :jbegin = 20 ; 
     48                :nj = 17 ; 
     49                :DOMAIN_number_total = 4 ; 
     50                :DOMAIN_number = 0 ; 
     51                :DOMAIN_dimensions_ids = 2, 3 ; 
     52                :DOMAIN_size_global = 180, 148 ; 
     53                :DOMAIN_size_local = 90, 17 ; 
     54                :DOMAIN_position_first = 26, 21 ; 
     55                :DOMAIN_position_last = 115, 37 ; 
     56                :DOMAIN_halo_size_start = 0, 0 ; 
     57                :DOMAIN_halo_size_end = 0, 0 ; 
     58                :DOMAIN_type = "box" ; 
     59 
     60ncdump -h O2L3P_LONG_5d_00010101_00010303_zoom_T_0001.nc 
     61                . 
     62                . 
     63                :ibegin = 25 ; 
     64                :ni = 90 ; 
     65                :jbegin = 37 ; 
     66                :nj = 28 ; 
     67                :DOMAIN_number_total = 4 ; 
     68                :DOMAIN_number = 1 ; 
     69                :DOMAIN_dimensions_ids = 2, 3 ; 
     70                :DOMAIN_size_global = 180, 148 ; 
     71                :DOMAIN_size_local = 90, 28 ; 
     72                :DOMAIN_position_first = 26, 38 ; 
     73                :DOMAIN_position_last = 115, 65 ; 
     74                :DOMAIN_halo_size_start = 0, 0 ; 
     75                :DOMAIN_halo_size_end = 0, 0 ; 
     76                :DOMAIN_type = "box" ; 
     77}}} 
     78The production of two files is correct because only two of the 4 XIOS servers are dealing with the zoom region. The data within each file is also correct but two issues with the attribute metadata prevent REBUILD_NEMO (and similar tools) from rebuilding the files correctly: 
     79 
     80* DOMAIN_number_total needs to be 2 not 4 otherwise REBUILD_NEMO will fail 
     81* DOMAIN_size_global will be used to determne the size of the collated dataset. What is actually wanted is to collate these data into a dataset of the whole zoom region (90x45). This information is not contained in the metadata. 
     82 
     83The first issue could be dealt with using an ncatted command on the first dataset; for example: 
     84 
     85{{{ 
     86rebuild_nemo -n nl.reb O2L3P_LONG_5d_00010101_00010303_zoom_T 2 
     87file O2L3P_LONG_5d_00010101_00010303_zoom_T,  num_domains 2, num_threads 1 
     88 Rebuilding the following files: 
     89 O2L3P_LONG_5d_00010101_00010303_zoom_T_0000.nc 
     90 O2L3P_LONG_5d_00010101_00010303_zoom_T_0001.nc 
     91 ERROR! : number of files to rebuild in file does not agree with namelist 
     92 Attribute DOMAIN_number_total is :            4 
     93 Number of files specified in namelist is:            2 
     942 
     95}}} 
     96can be fixed with: 
     97{{{ 
     98ncatted -a DOMAIN_number_total,global,m,d,2 O2L3P_LONG_5d_00010101_00010303_zoom_T_0000.nc 
     99 
     100rebuild_nemo -n nl.reb O2L3P_LONG_5d_00010101_00010303_zoom_T 2 
     101file O2L3P_LONG_5d_00010101_00010303_zoom_T,  num_domains 2, num_threads 1 
     102 Rebuilding the following files: 
     103 O2L3P_LONG_5d_00010101_00010303_zoom_T_0000.nc 
     104 O2L3P_LONG_5d_00010101_00010303_zoom_T_0001.nc 
     105 Size of global arrays:          180         148 
     106. 
     107. 
     108 Closing input files... 
     109 Closing output file... 
     110 NEMO rebuild completed successfully 
     111}}} 
     112This successfully rebuilds the zoom but places it in an otherwise empty global domain. 
     113 
     114Fixing the second issue is trickier. Simply editing the DOMAIN_size_global settings will not suffice because REBUILD_NEMO also uses the DOMAIN_position_first information to place data within the global arrays. Changing the size but not the offset results in Bus errors. 
     115 
     116== Proposed action == 
     117 
     118Fixing the metadata at source (XIOS) may be possible. It appears to only involve one module file (see details, below) but it isn't clear how XIOS distinguishes between global domains and zooms (if it does at all). A pragmatic solution will be to add the missing zoom domain information via the XML files and to adapt REBUILD_NEMO to use this information if present. For example, adding to the file_def_nemo-oce.xml: 
     119 
     120{{{ 
     121file_def_nemo-oce.xml: 
     122    <file_definition type="multiple_file" name="@expname@_@freq@_@startdate@_@enddate@" sync_freq="1mo" min_digits="4"> 
     123 
     124      <file_group id="5d" output_freq="5d"  output_level="10" enabled=".TRUE.">  <!-- 5d files --> 
     125        <file id="file66" name_suffix="_zoom_T" description="ocean T grid variables" > 
     126          <field field_ref="e3t"  grid_ref="zoom_T_3D"    /> 
     127          <field field_ref="toce" grid_ref="zoom_T_3D" name="thetao"   operation="instant" freq_op="5d" > @toce_e3t / @e3t </field> 
     128          <variable name="DOMAIN_size_zoom_i" type="int"> 90 </variable> 
     129          <variable name="DOMAIN_size_zoom_j" type="int"> 45 </variable> 
     130        </file> 
     131        <file id="file11" .... 
     132}}} 
     133results in: 
     134{{{ 
     135ncdump -h O2L3P_LONG_5d_00010101_00010303_zoom_T_0000.nc 
     136// global attributes: 
     137                . 
     138                . 
     139                :ibegin = 25 ; 
     140                :ni = 90 ; 
     141                :jbegin = 20 ; 
     142                :nj = 17 ; 
     143                :DOMAIN_number_total = 2 ; 
     144                :DOMAIN_number = 0 ; 
     145                :DOMAIN_dimensions_ids = 2, 3 ; 
     146                :DOMAIN_size_global = 180, 148 ; 
     147                :DOMAIN_size_local = 90, 17 ; 
     148                :DOMAIN_position_first = 26, 21 ; 
     149                :DOMAIN_position_last = 115, 37 ; 
     150                :DOMAIN_halo_size_start = 0, 0 ; 
     151                :DOMAIN_halo_size_end = 0, 0 ; 
     152                :DOMAIN_type = "box" ; 
     153                :DOMAIN_size_zoom_i = 90 ; 
     154                :DOMAIN_size_zoom_j = 45 ; 
     155}}} 
     156The remaining task is then to adapt REBUILD_NEMO so that if these new attributes are present: 
     157 
     158* DOMAIN_size_zoom_i and DOMAIN_size_zoom_j are used in place of DOMAIN_size_global 
     159* The ibegin and jbegin offsets are subtracted from the DOMAIN_position_first values when deciding where to place values into the output array. 
     160 
     161== Notes for possibly tackling the problem at source == 
     162 
     163The attributes are written by XIOS in: 
     164 
     165{{{ 
     166XIOS_2.5/src/io/nc4_data_output.cpp 
     167}}} 
     168 
     169by: 
     170 
     171{{{#!c 
     172    if (server->intraCommSize > 1) 
     173    { 
     174       this->writeLocalAttributes(domain->zoom_ibegin, 
     175                                  domain->zoom_ni, 
     176                                  domain->zoom_jbegin, 
     177                                  domain->zoom_nj, 
     178                                  appendDomid); 
     179 
     180       if (singleDomain) 
     181       this->writeLocalAttributes_IOIPSL(dimXid, dimYid, 
     182                                         domain->zoom_ibegin, 
     183                                         domain->zoom_ni, 
     184                                         domain->zoom_jbegin, 
     185                                         domain->zoom_nj, 
     186                                         domain->ni_glo,domain->nj_glo, 
     187                                         server->intraCommRank,server->intraCommSize); 
     188 
     189 
     190    } 
     191}}} 
     192 
     193and these functions are: 
     194 
     195{{{#!c 
     196      void CNc4DataOutput::writeLocalAttributes 
     197         (int ibegin, int ni, int jbegin, int nj, StdString domid) 
     198      { 
     199        try 
     200        { 
     201         SuperClassWriter::addAttribute(StdString("ibegin").append(domid), ibegin); 
     202         SuperClassWriter::addAttribute(StdString("ni"    ).append(domid), ni); 
     203         SuperClassWriter::addAttribute(StdString("jbegin").append(domid), jbegin); 
     204         SuperClassWriter::addAttribute(StdString("nj"    ).append(domid), nj); 
     205        } 
     206        catch (CNetCdfException& e) 
     207        { 
     208           StdString msg("On writing Local Attributes: "); 
     209           msg.append("In the context : "); 
     210           CContext* context = CContext::getCurrent() ; 
     211           msg.append(context->getId()); msg.append("\n"); 
     212           msg.append(e.what()); 
     213           ERROR("CNc4DataOutput::writeLocalAttributes \ 
     214                  (int ibegin, int ni, int jbegin, int nj, StdString domid)", << msg); 
     215        } 
     216 
     217      } 
     218}}} 
     219and 
     220{{{#!c 
     221      void CNc4DataOutput::writeLocalAttributes_IOIPSL(const StdString& dimXid, const StdString& dimYid, 
     222                                                       int ibegin, int ni, int jbegin, int nj, int ni_glo, int nj_glo, int rank, int size) 
     223      { 
     224         CArray<int,1> array(2) ; 
     225 
     226         try 
     227         { 
     228           SuperClassWriter::addAttribute("DOMAIN_number_total",size ) ; 
     229           SuperClassWriter::addAttribute("DOMAIN_number", rank) ; 
     230           array = SuperClassWriter::getDimension(dimXid) + 1, SuperClassWriter::getDimension(dimYid) + 1; 
     231           SuperClassWriter::addAttribute("DOMAIN_dimensions_ids",array) ; 
     232           array=ni_glo,nj_glo ; 
     233           SuperClassWriter::addAttribute("DOMAIN_size_global", array) ; 
     234           array=ni,nj ; 
     235           SuperClassWriter::addAttribute("DOMAIN_size_local", array) ; 
     236           array=ibegin+1,jbegin+1 ; 
     237           SuperClassWriter::addAttribute("DOMAIN_position_first", array) ; 
     238           array=ibegin+ni-1+1,jbegin+nj-1+1 ; 
     239           SuperClassWriter::addAttribute("DOMAIN_position_last",array) ; 
     240           array=0,0 ; 
     241           SuperClassWriter::addAttribute("DOMAIN_halo_size_start", array) ; 
     242           SuperClassWriter::addAttribute("DOMAIN_halo_size_end", array); 
     243           SuperClassWriter::addAttribute("DOMAIN_type",string("box")) ; 
     244         } 
     245         catch (CNetCdfException& e) 
     246         { 
     247           StdString msg("On writing Local Attributes IOIPSL \n"); 
     248           msg.append("In the context : "); 
     249           CContext* context = CContext::getCurrent() ; 
     250           msg.append(context->getId()); msg.append("\n"); 
     251           msg.append(e.what()); 
     252           ERROR("CNc4DataOutput::writeLocalAttributes_IOIPSL \ 
     253                  (int ibegin, int ni, int jbegin, int nj, int ni_glo, int nj_glo, int rank, int size)", << msg); 
     254         } 
     255      } 
     256}}}