source: XIOS3/dev/XIOS_ATTACHED/src/distribution/grid_remote_connector.cpp @ 2482

Last change on this file since 2482 was 2482, checked in by ymipsl, 15 months ago

First guess in supression of attached mode replaced by online reader and write filters

YM

  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 34.0 KB
Line 
1#include "grid_remote_connector.hpp"
2#include "client_client_dht_template.hpp"
3#include "leader_process.hpp"
4#include "mpi.hpp"
5#include "element.hpp"
6
7
8
9namespace xios
10{
11  /**
12   * \brief class constructor.
13   * \param srcView List of sources views.
14   * \param dstView List of remotes views.
15   * \param localComm Local MPI communicator
16   * \param remoteSize Size of the remote communicator
17   */ 
18  CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, MPI_Comm localComm, int remoteSize) 
19                       : srcView_(srcView), dstView_(dstView), localComm_(localComm), remoteSize_(remoteSize) 
20  {}
21
22  /**
23   * \brief class constructor.
24   * \param srcView List of sources views.
25   * \param dstView List of remotes views.
26   * \param localComm Local MPI communicator
27   * \param remoteSize Size of the remote communicator
28   */ 
29  CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector< shared_ptr<CLocalView> >& dstView, MPI_Comm localComm, int remoteSize) 
30                       : srcView_(srcView), localComm_(localComm), remoteSize_(remoteSize) 
31  {
32    for(auto& it : dstView) dstView_.push_back((shared_ptr<CDistributedView>) it) ; 
33  }
34
35
36  /**
37   * \brief Compute if each view composing the source grid and the remote grid is distributed or not.
38   *         Result is stored on internal attributes \b isSrcViewDistributed_ and \b isDstViewDistributed_.
39   * \detail To compute this, a hash is computed for each array on indices. The hash must permutable, i.e.
40   *         the order of the list of global indices doesn't influence the value of the hash. So simply a sum of
41   *         hash of each indices is used for the whole array. After, the computed hash are compared with each other
42   *         ranks of \b localComm_ MPI communicator using an MPI_ALLReduce. If, for each ranks, the hash is the same
43   *         then the view is not distributed
44   */
45  void CGridRemoteConnector::computeViewDistribution(void)
46  {
47    HashXIOS<size_t> hashGlobalIndex; // hash function-object
48
49    int nDst = dstView_.size() ;
50    vector<size_t> hashRank(remoteSize_) ;
51    vector<size_t> sizeRank(remoteSize_) ;
52    isDstViewDistributed_.resize(nDst) ;
53
54    for(int i=0; i<nDst; i++)
55    {
56      map<int,CArray<size_t,1>> globalIndexView ;
57      dstView_[i]->getGlobalIndexView(globalIndexView) ;
58      hashRank.assign(remoteSize_,0) ; // everybody ranks to 0 except rank of the remote view I have
59                                       // that would be assign to my local hash
60      sizeRank.assign(remoteSize_,0) ;
61
62      for(auto& it : globalIndexView)
63      {
64        int rank=it.first ;
65        CArray<size_t,1>& globalIndex = it.second ;
66        size_t globalIndexSize = globalIndex.numElements();
67        size_t hashValue=0 ;
68        for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
69        hashRank[rank] += hashValue ;
70        sizeRank[rank] += globalIndexSize ;
71      }
72      // sum all the hash for every process of the local comm. The reduce is on the size of remote view (remoteSize_)
73      // after that for each rank of the remote view, we get the hash
74      MPI_Allreduce(MPI_IN_PLACE, hashRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
75      MPI_Allreduce(MPI_IN_PLACE, sizeRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
76      size_t value = hashRank[0] ;
77      size_t size = sizeRank[0] ;
78      isDstViewDistributed_[i]=false ;
79      for(int j=0 ; j<remoteSize_ ; j++) 
80        if (size!=sizeRank[j] || value != hashRank[j]) 
81        { 
82          isDstViewDistributed_[i]=true ;
83          break ;
84        }
85    }
86
87    int nSrc = srcView_.size() ;
88    int commSize,commRank ;
89    MPI_Comm_size(localComm_,&commSize) ;
90    MPI_Comm_rank(localComm_,&commRank) ;
91    hashRank.resize(commSize,0) ;
92    isSrcViewDistributed_.resize(nSrc) ;
93
94    for(int i=0; i<nSrc; i++)
95    {
96      CArray<size_t,1> globalIndex ;
97      srcView_[i]->getGlobalIndexView(globalIndex) ;
98      hashRank.assign(commSize,0) ; // 0 for everybody except my rank
99      size_t globalIndexSize = globalIndex.numElements() ;
100     
101      size_t minVal,maxVal ;
102      MPI_Allreduce(&globalIndexSize, &minVal, 1, MPI_SIZE_T, MPI_MIN, localComm_) ;
103      MPI_Allreduce(&globalIndexSize, &maxVal, 1, MPI_SIZE_T, MPI_MAX, localComm_) ;
104      if (minVal!=maxVal) 
105      {
106        isSrcViewDistributed_[i]=true ;
107        break ;
108      }
109
110      // warning : jenkins hash : 0 --> 0 : need to compare number of element for each ranks
111      size_t hashValue=0 ;
112      for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
113      MPI_Allreduce(&hashValue, &minVal, 1, MPI_SIZE_T, MPI_MIN, localComm_) ;
114      MPI_Allreduce(&hashValue, &maxVal, 1, MPI_SIZE_T, MPI_MAX, localComm_) ;
115      if (minVal!=maxVal) isSrcViewDistributed_[i]=true ;
116      else isSrcViewDistributed_[i]=false ;
117    }
118
119  }
120
121/**
122  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
123  * \detail Depending of the distributions of the view computed in the computeViewDistribution() call, the connector is computed in computeConnectorMethods(), and to achieve better optimisation
124  *         some redondant ranks can be removed from the elements_ map.
125  */
126  void CGridRemoteConnector::computeConnector(bool eliminateRedundant)
127  {
128    if (eliminateRedundant)
129    {
130      computeViewDistribution() ;
131      computeConnectorMethods() ;
132      computeRedondantRanks() ; 
133      for(auto& rank : rankToRemove_)
134        for(auto& element : elements_) element.erase(rank) ;
135    }
136    else
137    {
138       computeViewDistribution() ;
139       computeConnectorRedundant() ;
140    }
141  }
142
143/**
144  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
145  * \detail In this routine we don't eliminate redundant cells as it it performed in
146  *         computeConnectorMethods. It can be usefull to perform reduce operation over processes.
147            In future, some optimisation could be done considering full redondance of the
148            source view or the destination view.
149  */
150  void CGridRemoteConnector::computeConnectorRedundant(void)
151  {
152    vector<shared_ptr<CLocalView>> srcView ;
153    vector<shared_ptr<CDistributedView>> dstView ;
154    vector<int> indElements ;
155    elements_.resize(srcView_.size()) ;
156   
157    bool srcViewsNonDistributed=true ; // not usefull now but later for optimization
158    for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i]  ;
159   
160    bool dstViewsNonDistributed=true ;  // not usefull now but later for optimization
161    for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
162   
163    for(int i=0;i<srcView_.size();i++) 
164    {
165      srcView.push_back(srcView_[i]) ;
166      dstView.push_back(dstView_[i]) ;
167      indElements.push_back(i) ;
168    }
169
170    computeGenericMethod(srcView, dstView, indElements) ;
171   
172    map<int,bool> ranks ; 
173    for(auto& it : elements_[indElements[0]]) 
174    {
175      if (it.second.numElements()==0) ranks[it.first] = false ;
176      else  ranks[it.first] = true ;
177    }
178   
179  }
180
181
182/**
183  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
184  * \detail In order to achive better optimisation,
185  *         we distingute the case when the grid is not distributed on source grid (\bcomputeSrcNonDistributed),
186  *         or the remote grid (\b computeDstNonDistributed), or the both (\b computeSrcDstNonDistributed).
187  *         Otherwise the generic method is called computeGenericMethod. Note that in the case, if one element view
188  *         is not distributed on the source and on the remote grid, then we can used the tensorial product
189  *         property to computing it independently using \b computeSrcDstNonDistributed method.
190  *         After that, we call the \b removeRedondantRanks method to supress blocks of data that can be sent
191  *         redondantly the the remote servers
192  */
193  void CGridRemoteConnector::computeConnectorMethods(bool reverse)
194  {
195    vector<shared_ptr<CLocalView>> srcView ;
196    vector<shared_ptr<CDistributedView>> dstView ;
197    vector<int> indElements ;
198    elements_.resize(srcView_.size()) ;
199   
200    bool srcViewsNonDistributed=true ;
201    for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i]  ;
202   
203    bool dstViewsNonDistributed=true ;
204    for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
205   
206  //*****************************************************
207    if (srcViewsNonDistributed && dstViewsNonDistributed)
208    {
209      int commRank, commSize ;
210      MPI_Comm_rank(localComm_,&commRank) ;
211      MPI_Comm_size(localComm_,&commSize) ;
212 
213      map<int,bool> ranks ;
214      if (reverse)
215      {
216        int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
217        ranks[leaderRank] = true ; 
218      }
219      else
220      {
221        list<int> remoteRanks;
222        list<int> notUsed ;
223        computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
224        for(int rank : remoteRanks) ranks[rank]=true ;
225      }
226      for(int i=0; i<srcView_.size(); i++) computeSrcDstNonDistributed(i,ranks) ; 
227    }
228 
229  //*****************************************************
230    else if (srcViewsNonDistributed) 
231    {
232      int commRank, commSize ;
233      MPI_Comm_rank(localComm_,&commRank) ;
234      MPI_Comm_size(localComm_,&commSize) ;
235      list<int> remoteRanks;
236      list<int> notUsed ;
237      map<int,bool> ranks ;
238     
239      if (reverse)
240      {
241        shared_ptr<CLocalElement> voidElement = make_shared<CLocalElement>(commRank, 0, CArray<size_t,1>()) ;
242        shared_ptr<CLocalView> voidView = make_shared<CLocalView>(voidElement, CElementView::FULL, CArray<int,1>()) ;
243
244        for(int i=0;i<srcView_.size();i++) 
245          if (isDstViewDistributed_[i])
246          {
247            if (commRank==0) srcView.push_back(srcView_[i]) ;
248            else srcView.push_back(make_shared<CLocalView>(make_shared<CLocalElement>(commRank, srcView_[i]->getGlobalSize(), CArray<size_t,1>()),
249                                                           CElementView::FULL, CArray<int,1>())) ; // void view
250            dstView.push_back(dstView_[i]) ;
251            indElements.push_back(i) ;
252          }
253       
254        computeGenericMethod(srcView, dstView, indElements) ;
255
256        for(int i=0;i<srcView_.size();i++) 
257          if (isDstViewDistributed_[i])
258          {
259            size_t sizeElement ;
260            int nRank ;
261            if (commRank==0) nRank = elements_[i].size() ;
262            MPI_Bcast(&nRank, 1, MPI_INT, 0, localComm_) ;
263             
264            auto it=elements_[i].begin() ;
265            for(int j=0;j<nRank;j++)
266            {
267              int rank ;
268              size_t sizeElement ;
269              if (commRank==0) { rank = it->first ; sizeElement=it->second.numElements(); }
270              MPI_Bcast(&rank, 1, MPI_INT, 0, localComm_) ;
271              MPI_Bcast(&sizeElement, 1, MPI_SIZE_T, 0, localComm_) ;
272              if (commRank!=0) elements_[i][rank].resize(sizeElement) ;
273              MPI_Bcast(elements_[i][rank].dataFirst(), sizeElement, MPI_SIZE_T, 0, localComm_) ;
274              if (commRank==0) ++it ;
275            }
276          }
277
278        for(auto& it : elements_[indElements[0]]) 
279        {
280          if (it.second.numElements()==0) ranks[it.first] = false ;
281          else  ranks[it.first] = true ;
282        }
283   
284        for(int i=0;i<srcView_.size();i++) 
285          if (!isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
286
287      }
288      else 
289      {
290        computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
291        for(int rank : remoteRanks) ranks[rank]=true ;
292
293        for(int i=0; i<srcView_.size(); i++) 
294        {
295          if (isDstViewDistributed_[i]) computeSrcNonDistributed(i) ;
296          else computeSrcDstNonDistributed(i, ranks) ;
297        }
298      }
299
300    } 
301  //*****************************************************
302    else if (dstViewsNonDistributed)
303    {
304      int commRank, commSize ;
305      MPI_Comm_rank(localComm_,&commRank) ;
306      MPI_Comm_size(localComm_,&commSize) ;
307 
308      map<int,bool> ranks ;
309      if (reverse)
310      {
311        int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
312        ranks[leaderRank] = true ; 
313      }
314      else for(int i=0;i<remoteSize_;i++) ranks[i]=true ;
315     
316      for(int i=0; i<srcView_.size(); i++) 
317      {
318        if (isSrcViewDistributed_[i]) computeDstNonDistributed(i,ranks) ;
319        else computeSrcDstNonDistributed(i,ranks) ;
320      }
321    } 
322  //*****************************************************
323    else
324    {
325      for(int i=0;i<srcView_.size();i++) 
326        if (isSrcViewDistributed_[i] || isDstViewDistributed_[i])
327        {
328          srcView.push_back(srcView_[i]) ;
329          dstView.push_back(dstView_[i]) ;
330          indElements.push_back(i) ;
331        }
332
333      computeGenericMethod(srcView, dstView, indElements) ;
334   
335      map<int,bool> ranks ; 
336      for(auto& it : elements_[indElements[0]]) 
337      {
338        if (it.second.numElements()==0) ranks[it.first] = false ;
339        else  ranks[it.first] = true ;
340      }
341   
342      for(int i=0;i<srcView_.size();i++) 
343        if (!isSrcViewDistributed_[i] && !isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
344    }
345
346  }
347
348 
349/**
350  * \brief Compute the connector for the element \b i when the source view is not distributed.
351  *        After the call element_[i] is defined.
352  *  \param i Indice of the element composing the source grid.
353  */
354
355  void CGridRemoteConnector::computeSrcNonDistributed(int i)
356  {
357    auto& element = elements_[i] ;
358    map<int,CArray<size_t,1>> globalIndexView ;
359    dstView_[i]->getGlobalIndexView(globalIndexView) ;
360   
361    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
362   
363    for(auto& it : globalIndexView)
364    {
365      auto& globalIndex=it.second ;
366      for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
367    }
368   
369    // First we feed the distributed hash map  with key (remote global index)
370    // associated with the value of the remote rank
371    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
372    // after we feed the DHT with the local global indices of the source view
373
374    int commRank, commSize ;
375    MPI_Comm_rank(localComm_,&commRank) ;
376    MPI_Comm_size(localComm_,&commSize) ;
377    CArray<size_t,1> srcIndex ;
378    // like the source view is not distributed, then only the rank 0 need to feed the DHT
379    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
380   
381    // compute the mapping
382    DHT.computeIndexInfoMapping(srcIndex) ;
383    auto& returnInfo = DHT.getInfoIndexMap() ;
384   
385    // returnInfo contains now the map for each global indices to send to a list of remote rank
386    // only for the rank=0 because it is the one to feed the DHT
387    // so it need to send the list to each server leader i.e. the local process that handle specifically one or more
388    // servers
389   
390    // rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
391    vector<vector<size_t>> rankIndGlo(remoteSize_) ;
392    if (commRank==0) 
393      for(auto& it1 : returnInfo)
394        for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
395   
396   
397    vector<MPI_Request> requests ;
398   
399    if (commRank==0)
400    {
401      requests.resize(remoteSize_) ;
402      for(int i=0 ; i<remoteSize_;i++) 
403      {
404        // ok send only the global indices for a server to the "server leader"
405        int rank = getLeaderRank(commSize, remoteSize_, i) ;
406        MPI_Isend(rankIndGlo[i].data(), rankIndGlo[i].size(), MPI_SIZE_T, rank, i ,localComm_, &requests[i]) ;
407      }
408    } 
409   
410    list<int> remoteRanks;
411    list<int> notUsed ;
412    // I am a server leader of which remote ranks ?
413    computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
414
415    for(auto remoteRank : remoteRanks)
416    {
417      MPI_Status status ;
418      int size ;
419      MPI_Probe(0,remoteRank,localComm_, &status);
420      MPI_Get_count(&status, MPI_SIZE_T, &size) ;
421      elements_[i][remoteRank].resize(size) ;
422      // for each remote ranks receive the global indices from proc 0
423      MPI_Recv(elements_[i][remoteRank].dataFirst(),size, MPI_SIZE_T,0,remoteRank, localComm_,&status) ;
424    }
425     
426    if (commRank==0)
427    {
428      vector<MPI_Status> status(remoteSize_) ;
429      // asynchronous for sender, wait for completion
430      MPI_Waitall(remoteSize_, requests.data(), status.data()) ;
431    }
432  }
433
434/**
435  * \brief Compute the connector for the element \b i when the source view is not distributed.
436  *        After the call element_[i] is defined.
437  *  \param i Indice of the element composing the source grid.
438  */
439
440  void CGridRemoteConnector::computeSrcNonDistributedReverse(int i)
441  {
442    auto& element = elements_[i] ;
443    map<int,CArray<size_t,1>> globalIndexView ;
444    dstView_[i]->getGlobalIndexView(globalIndexView) ;
445   
446    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
447   
448    for(auto& it : globalIndexView)
449    {
450      auto& globalIndex=it.second ;
451      for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
452    }
453   
454    // First we feed the distributed hash map  with key (remote global index)
455    // associated with the value of the remote rank
456    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
457    // after we feed the DHT with the local global indices of the source view
458
459    int commRank, commSize ;
460    MPI_Comm_rank(localComm_,&commRank) ;
461    MPI_Comm_size(localComm_,&commSize) ;
462    CArray<size_t,1> srcIndex ;
463    // like the source view is not distributed, then only the rank 0 need to feed the DHT
464    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
465   
466    // compute the mapping
467    DHT.computeIndexInfoMapping(srcIndex) ;
468    auto& returnInfo = DHT.getInfoIndexMap() ;
469   
470    // returnInfo contains now the map for each global indices to send to a list of remote rank
471    // only for the rank=0 because it is the one to feed the DHT
472    // so it need to send the list to each server leader i.e. the local process that handle specifically one or more
473    // servers
474   
475    // rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
476    vector<vector<size_t>> rankIndGlo(remoteSize_) ;
477    if (commRank==0) 
478      for(auto& it1 : returnInfo)
479        for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
480   
481   // bcast the same for each client
482   for(int remoteRank=0 ; remoteRank<remoteSize_ ; remoteRank++)
483   {
484      int remoteDataSize ;
485      if (commRank==0) remoteDataSize = rankIndGlo[remoteRank].size() ;
486      MPI_Bcast(&remoteDataSize, 1, MPI_INT, 0, localComm_) ;
487
488      auto& element = elements_[i][remoteRank] ;
489      element.resize(remoteDataSize) ;
490      if (commRank==0) for(int j=0 ; j<remoteDataSize; j++) element(j)=rankIndGlo[remoteRank][j] ;
491      MPI_Bcast(element.dataFirst(), remoteDataSize, MPI_SIZE_T, 0, localComm_) ;
492   }
493  }
494
495
496
497  /**
498   * \brief Compute the remote connector for the element \b i when the remote view is not distributed.
499   *        After the call,  element_[i] is defined.
500   * \param i Indice of the element composing the remote grid.
501   * \param ranks The list of rank for which the local proc is in charge to compute the connector
502   *              (if leader server for exemple). if ranks[rank] == false the corresponding elements_
503   *              is set to void array (no data to sent) just in order to notify corresponding remote server
504   *              that the call is collective with each other one 
505   */
506  void CGridRemoteConnector::computeDstNonDistributed(int i, map<int,bool>& ranks)
507  {
508    auto& element = elements_[i] ;
509    map<int,CArray<size_t,1>> globalIndexView ;
510    dstView_[i]->getGlobalIndexView(globalIndexView) ;
511   
512   
513    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
514 
515    // First we feed the distributed hash map  with key (remote global index)
516    // associated with the value of the remote rank
517    for(auto& it : globalIndexView)
518      if (it.first==0) // since the remote view is not distributed, insert only the remote rank 0
519      {
520        auto& globalIndex=it.second ;
521        for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
522      }
523   
524    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
525    // after we feed the DHT with the local global indices of the source view
526
527    CArray<size_t,1> srcIndex ;
528    srcView_[i]->getGlobalIndexView(srcIndex) ;
529    DHT.computeIndexInfoMapping(srcIndex) ;
530    auto& returnInfo = DHT.getInfoIndexMap() ;
531   
532    // returnInfo contains now the map for each global indices to send to a list of remote rank
533    // now construct the element_ list of global indices for each rank in my list except if the erray must be empty
534    for (auto& rank : ranks)
535    {
536      if (rank.second) // non empty array => for rank that have not any data to be received
537      {
538        int size=0 ;
539        for(auto& it : returnInfo) if (!it.second.empty()) size++ ;
540        auto& array = element[rank.first] ;
541       array.resize(size) ;
542       size=0 ;
543       for(auto& it : returnInfo) 
544         if (!it.second.empty()) 
545         {
546           array(size)=it.first ;
547           size++ ;
548         }
549      }
550      else element[rank.first] = CArray<size_t,1>(0) ;  // empty array => for rank that have not any data to be received
551    }
552  }
553
554 /**
555  * \brief Compute the remote connector for the element \b i when the source and the remote view are not distributed.
556  *        After the call, element_[i] is defined.
557  * \param i Indice of the element composing the remote grid.
558  * \param ranks The list of rank for which the local proc is in charge to compute the connector
559  *              (if leader server for exemple). if ranks[rank] == false the corresponding elements_
560  *              is set to void array (no data to sent) just in order to notify corresponding remote server
561  *              that the call is collective with each other one 
562  */
563
564  void CGridRemoteConnector::computeSrcDstNonDistributed(int i, map<int,bool>& ranks)
565  {
566    auto& element = elements_[i] ;
567    map<int,CArray<size_t,1>> globalIndexView ;
568    dstView_[i]->getGlobalIndexView(globalIndexView) ;
569   
570   
571    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
572    // First we feed the distributed hash map  with key (remote global index)
573    // associated with the value of the remote rank
574
575    for(auto& it : globalIndexView)
576      if (it.first==0) // insert only the remote rank 0 since the remote view is not distributed
577      {
578        auto& globalIndex=it.second ;
579        for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
580      }
581   
582    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
583    // after we feed the DHT with the local global indices of the source view
584
585    int commRank, commSize ;
586    MPI_Comm_rank(localComm_,&commRank) ;
587    MPI_Comm_size(localComm_,&commSize) ;
588    CArray<size_t,1> srcIndex ;
589 
590    // like the source view is not distributed, then only the rank 0 need to feed the DHT
591    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
592    DHT.computeIndexInfoMapping(srcIndex) ;
593    auto& returnInfo = DHT.getInfoIndexMap() ;
594   
595    vector<size_t> indGlo ;
596    if (commRank==0) 
597      for(auto& it1 : returnInfo) 
598        for(auto& it2 : it1.second) indGlo.push_back(it1.first) ;
599
600    // now local rank 0 know which indices to send to remote rank 0, but all the server
601    // must receive the same information. So only the leader rank will sent this.
602    // So local rank 0 must broadcast the information to all leader.
603    // for this we create a new communicator composed of local process that must send data
604    // to a remote rank, data are broadcasted, and element_[i] is construction for each remote
605    // rank in charge
606    int color=0 ;
607    if (ranks.empty()) color=0 ;
608    else color=1 ;
609    if (commRank==0) color=1 ;
610    MPI_Comm newComm ;
611    MPI_Comm_split(localComm_, color, commRank, &newComm) ;
612    if (color==1)
613    {
614      // ok, I am part of the process that must send something to one or more remote server
615      // so I get the list of global indices from rank 0
616      int dataSize ;
617      if (commRank==0) dataSize=indGlo.size() ;
618      MPI_Bcast(&dataSize,1,MPI_INT, 0, newComm) ;
619      indGlo.resize(dataSize) ;
620      MPI_Bcast(indGlo.data(),dataSize,MPI_SIZE_T,0,newComm) ;
621    }
622    MPI_Comm_free(&newComm) ;
623
624    // construct element_[i] from indGlo
625    for(auto& rank : ranks)
626    {
627      if (rank.second)
628      {
629        int dataSize=indGlo.size();
630        auto& element = elements_[i][rank.first] ;
631        element.resize(dataSize) ;
632        for(int i=0;i<dataSize; i++) element(i)=indGlo[i] ;
633      }
634      else element[rank.first] = CArray<size_t,1>(0) ;
635    }   
636
637  }
638
639
640 /**
641  * \brief Generic method the compute the grid remote connector. Only distributed elements are specifed in the source view and remote view.
642  *        Connector for non distributed elements are computed separatly to improve performance and memory consumption. After the call,
643  *        \b elements_  is defined.
644  *  \param srcView List of the source views composing the grid, without non distributed views
645  *  \param dstView List of the remote views composing the grid, without non distributed views
646  *  \param indElements Index of the view making the correspondance between all views and views distributed (that are in input)
647  */
648  void CGridRemoteConnector::computeGenericMethod(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, vector<int>& indElements)
649  {
650    // generic method, every element can be distributed
651    int nDst = dstView.size() ;
652    vector<size_t> dstSliceSize(nDst) ;
653    dstSliceSize[0] = 1 ; 
654    for(int i=1; i<nDst; i++)  dstSliceSize[i] = dstView[i-1]->getGlobalSize()*dstSliceSize[i-1] ;
655 
656    CClientClientDHTTemplate<int>::Index2VectorInfoTypeMap dataInfo ;
657    CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap info ; // info map
658
659    // first, we need to feed the DHT with the global index of the remote server
660    // for that :
661    // First the first element insert the in a DHT with key as the rank and value the list of global index associated
662    // Then get the previously stored index associate with the remote rank I am in charge and reinsert the global index
663    // corresponding to the position of the element in the remote view suing tensorial product
664    // finaly we get only the list of remote global index I am in charge for the whole remote grid   
665
666    for(int pos=0; pos<nDst; pos++)
667    {
668      size_t sliceSize=dstSliceSize[pos] ;
669      map<int,CArray<size_t,1>> globalIndexView ;
670      dstView[pos]->getGlobalIndexView(globalIndexView) ;
671     
672      CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap lastInfo(info) ;
673
674      if (pos>0)
675      {
676        CArray<size_t,1> ranks(globalIndexView.size()) ;
677        auto it=globalIndexView.begin() ;
678        for(int i=0 ; i<ranks.numElements();i++,it++) ranks(i)=it->first ;
679        CClientClientDHTTemplate<size_t> dataRanks(info, localComm_) ;
680        dataRanks.computeIndexInfoMapping(ranks) ;
681        lastInfo = dataRanks.getInfoIndexMap() ;
682      }
683     
684      info.clear() ;
685      for(auto& it : globalIndexView)
686      {
687        int rank = it.first ;
688        auto& globalIndex = it.second ;
689        auto& inf = info[rank] ;
690        if (pos==0) for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)) ;
691        else
692        {
693          auto& lastGlobalIndex = lastInfo[rank] ;
694          for(size_t lastGlobalInd : lastGlobalIndex)
695          {
696            for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)*sliceSize+lastGlobalInd) ;
697          }
698        } 
699      }
700
701      if (pos==nDst-1)
702      {
703         for(auto& it : info)
704         {
705           int rank=it.first ;
706           auto& globalIndex = it.second ;
707           for(auto globalInd : globalIndex) dataInfo[globalInd].push_back(rank) ;
708         }
709      } 
710    }
711
712    // we feed the DHT with the remote global index
713    CClientClientDHTTemplate<int> dataRanks(dataInfo, localComm_) ;
714
715    // generate list of global index for src view
716    int nSrc = srcView.size() ;
717    vector<size_t> srcSliceSize(nSrc) ;
718   
719    srcSliceSize[0] = 1 ; 
720    for(int i=1; i<nSrc; i++)  srcSliceSize[i] = srcView[i-1]->getGlobalSize()*srcSliceSize[i-1] ;
721
722    vector<size_t> srcGlobalIndex ;
723    size_t sliceIndex=0 ;
724    srcView[nSrc-1]->getGlobalIndex(srcGlobalIndex, sliceIndex, srcSliceSize.data(), srcView.data(), nSrc-1) ;
725    // now we have the global index of the source grid in srcGlobalIndex
726    // we feed the DHT with the src global index (if we have)
727    if (srcGlobalIndex.size()>0)
728    {
729      CArray<size_t,1> srcGlobalIndexArray(srcGlobalIndex.data(), shape(srcGlobalIndex.size()),neverDeleteData) ;
730      dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
731    }
732    else
733    {
734      CArray<size_t,1> srcGlobalIndexArray ;
735      dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
736    }
737    const auto& returnInfo = dataRanks.getInfoIndexMap() ;
738    // returnInfo contains now the map for each global indices to send to a list of remote rank
739    // but we want to use the tensorial product property to get the same information using only global
740    // index of element view. So the idea is to reverse the information : for a global index of the grid
741    // to send to the remote server, what is the global index of each element composing the grid ?
742
743    vector<map<int, set<size_t>>> elements(nSrc) ; // internal representation of elements composing the grid
744
745    for(auto& indRanks : returnInfo)
746    {
747      size_t gridIndexGlo=indRanks.first ;
748      auto& ranks = indRanks.second ;
749      for(int i=nSrc-1; i>=0; i--)
750      {
751        auto& element = elements[i] ;
752        size_t localIndGlo = gridIndexGlo / srcSliceSize[i] ;
753        gridIndexGlo = gridIndexGlo % srcSliceSize[i] ;
754        for(int rank : ranks) element[rank].insert(localIndGlo) ;
755      }
756    }
757
758//    elements_.resize(nSrc) ;
759    for(int i=0 ; i<nSrc; i++)
760    {
761      auto& element=elements[i] ;
762      for(auto& rankInd : element)
763      {
764        int rank=rankInd.first ;
765        set<size_t>& indGlo = rankInd.second ;
766        CArray<size_t,1>& indGloArray = elements_[indElements[i]][rank] ;
767        indGloArray.resize(indGlo.size()) ;
768        int j=0 ;
769        for (auto index : indGlo) { indGloArray(j) = index ; j++; }
770      }
771    }
772   
773    // So what about when there is some server that have no data to receive
774    // they must be inform they receive an event with no data.
775    // So find remote servers with no data, and one client will take in charge
776    // that it receive global index with no data (0-size)
777    vector<int> ranks(remoteSize_,0) ;
778    for(auto& it : elements_[indElements[0]]) ranks[it.first] = 1 ;
779    MPI_Allreduce(MPI_IN_PLACE, ranks.data(), remoteSize_, MPI_INT, MPI_SUM, localComm_) ;
780    int commRank, commSize ;
781    MPI_Comm_rank(localComm_,&commRank) ;
782    MPI_Comm_size(localComm_,&commSize) ;
783    int pos=0 ;
784    for(int i=0; i<remoteSize_ ; i++)
785      if (ranks[i]==0)
786      {
787        if (pos%commSize==commRank) 
788          for(int j=0 ; j<nSrc; j++) elements_[indElements[j]][i] = CArray<size_t,1>(0) ;
789        pos++ ;
790      }
791  }
792
793 /**
794  * \brief Once the connector is computed (compute \b elements_), redondant data can be avoid to be sent to the server.
795  *        This call compute the redondant rank and store them in \b rankToRemove_ attribute.
796  *        The goal of this method is to make a hash of each block of indice that determine wich data to send to a
797  *        of a specific server rank using a hash method. So data to send to a rank is associated to a hash.
798  *        After we compare hash between local rank and remove redondant data corresponding to the same hash.
799  */
800  void CGridRemoteConnector::computeRedondantRanks(bool reverse)
801  {
802    int commRank ;
803    MPI_Comm_rank(localComm_,&commRank) ;
804
805    set<int> ranks;
806    for(auto& element : elements_) 
807      for(auto& it : element) ranks.insert(it.first) ;
808
809    for(auto& element : elements_)
810      for(auto& it : element) 
811        if (ranks.count(it.first)==0) ERROR("void CGridRemoteConnector::removeRedondantRanks(void)",<<"number of ranks in elements is not coherent between each element") ;
812   
813    HashXIOS<size_t> hashGlobalIndex;
814   
815    map<int,size_t> hashRanks ;
816    for(auto& element : elements_) 
817      for(auto& it : element)
818      {
819        auto& globalIndex=it.second ;
820        int rank=it.first ;
821        size_t hash ;
822        hash=0 ;
823        for(int i=0; i<globalIndex.numElements(); i++) hash+=hashGlobalIndex(globalIndex(i)) ;
824        if (globalIndex.numElements()>0)
825        {
826          if (hashRanks.count(rank)==0) hashRanks[rank]=hash ;
827          else hashRanks[rank]=hashGlobalIndex.hashCombine(hashRanks[rank],hash) ;
828        }
829      }
830   
831    if (reverse)
832    {
833      set<size_t> hashs ;
834      //easy because local
835      for(auto& hashRank : hashRanks)
836      {
837        if (hashs.count(hashRank.second)==0) hashs.insert(hashRank.second) ;
838        else rankToRemove_.insert(hashRank.first) ;
839      }
840   
841    }
842    else
843    {
844      // a hash is now computed for data block I will sent to the server.
845
846      CClientClientDHTTemplate<int>::Index2InfoTypeMap info ;
847 
848      map<size_t,int> hashRank ;
849      HashXIOS<int> hashGlobalIndexRank;
850      for(auto& it : hashRanks) 
851      {
852        it.second = hashGlobalIndexRank.hashCombine(it.first,it.second) ; 
853        info[it.second]=commRank ;
854        hashRank[it.second]=it.first ;
855      }
856
857      // we feed a DHT map with key : hash, value : myrank
858      CClientClientDHTTemplate<int> dataHash(info, localComm_) ;
859      CArray<size_t,1> hashList(hashRank.size()) ;
860   
861      int i=0 ;
862      for(auto& it : hashRank) { hashList(i)=it.first ; i++; }
863
864      // now who are the ranks that have the same hash : feed the DHT with my list of hash
865      dataHash.computeIndexInfoMapping(hashList) ;
866      auto& hashRankList = dataHash.getInfoIndexMap() ;
867   
868
869      for(auto& it : hashRankList)
870      {
871        size_t hash = it.first ;
872        auto& ranks = it.second ;
873     
874        bool first=true ;
875        // only the process with the lowest rank get in charge of sendinf data to remote server
876        for(int rank : ranks) if (commRank>rank) first=false ;
877        if (!first) rankToRemove_.insert(hashRank[hash]) ;
878      }
879    }
880  }
881 
882}
Note: See TracBrowser for help on using the repository browser.