source: XIOS3/trunk/src/distribution/grid_remote_connector.cpp @ 2397

Last change on this file since 2397 was 2397, checked in by ymipsl, 22 months ago
  • Optimize remote connector computation in case of read (reverse way).
  • don't compute anymore clientFromServerConnector (and all intermediate computation) for non reading case.

YM

  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 33.8 KB
Line 
1#include "grid_remote_connector.hpp"
2#include "client_client_dht_template.hpp"
3#include "leader_process.hpp"
4#include "mpi.hpp"
5#include "element.hpp"
6
7
8
9namespace xios
10{
11  /**
12   * \brief class constructor.
13   * \param srcView List of sources views.
14   * \param dstView List of remotes views.
15   * \param localComm Local MPI communicator
16   * \param remoteSize Size of the remote communicator
17   */ 
18  CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, MPI_Comm localComm, int remoteSize) 
19                       : srcView_(srcView), dstView_(dstView), localComm_(localComm), remoteSize_(remoteSize) 
20  {}
21
22  /**
23   * \brief class constructor.
24   * \param srcView List of sources views.
25   * \param dstView List of remotes views.
26   * \param localComm Local MPI communicator
27   * \param remoteSize Size of the remote communicator
28   */ 
29  CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector< shared_ptr<CLocalView> >& dstView, MPI_Comm localComm, int remoteSize) 
30                       : srcView_(srcView), localComm_(localComm), remoteSize_(remoteSize) 
31  {
32    for(auto& it : dstView) dstView_.push_back((shared_ptr<CDistributedView>) it) ; 
33  }
34
35
36  /**
37   * \brief Compute if each view composing the source grid and the remote grid is distributed or not.
38   *         Result is stored on internal attributes \b isSrcViewDistributed_ and \b isDstViewDistributed_.
39   * \detail To compute this, a hash is computed for each array on indices. The hash must permutable, i.e.
40   *         the order of the list of global indices doesn't influence the value of the hash. So simply a sum of
41   *         hash of each indices is used for the whole array. After, the computed hash are compared with each other
42   *         ranks of \b localComm_ MPI communicator using an MPI_ALLReduce. If, for each ranks, the hash is the same
43   *         then the view is not distributed
44   */
45  void CGridRemoteConnector::computeViewDistribution(void)
46  {
47    HashXIOS<size_t> hashGlobalIndex; // hash function-object
48
49    int nDst = dstView_.size() ;
50    vector<size_t> hashRank(remoteSize_) ;
51    vector<size_t> sizeRank(remoteSize_) ;
52    isDstViewDistributed_.resize(nDst) ;
53
54    for(int i=0; i<nDst; i++)
55    {
56      map<int,CArray<size_t,1>> globalIndexView ;
57      dstView_[i]->getGlobalIndexView(globalIndexView) ;
58      hashRank.assign(remoteSize_,0) ; // everybody ranks to 0 except rank of the remote view I have
59                                       // that would be assign to my local hash
60      sizeRank.assign(remoteSize_,0) ;
61
62      for(auto& it : globalIndexView)
63      {
64        int rank=it.first ;
65        CArray<size_t,1>& globalIndex = it.second ;
66        size_t globalIndexSize = globalIndex.numElements();
67        size_t hashValue=0 ;
68        for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
69        hashRank[rank] += hashValue ;
70        sizeRank[rank] += globalIndexSize ;
71      }
72      // sum all the hash for every process of the local comm. The reduce is on the size of remote view (remoteSize_)
73      // after that for each rank of the remote view, we get the hash
74      MPI_Allreduce(MPI_IN_PLACE, hashRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
75      MPI_Allreduce(MPI_IN_PLACE, sizeRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
76      size_t value = hashRank[0] ;
77      size_t size = sizeRank[0] ;
78      isDstViewDistributed_[i]=false ;
79      for(int j=0 ; j<remoteSize_ ; j++) 
80        if (size!=sizeRank[j] || value != hashRank[j]) 
81        { 
82          isDstViewDistributed_[i]=true ;
83          break ;
84        }
85    }
86
87    int nSrc = srcView_.size() ;
88    int commSize,commRank ;
89    MPI_Comm_size(localComm_,&commSize) ;
90    MPI_Comm_rank(localComm_,&commRank) ;
91    hashRank.resize(commSize,0) ;
92    isSrcViewDistributed_.resize(nSrc) ;
93
94    for(int i=0; i<nSrc; i++)
95    {
96      CArray<size_t,1> globalIndex ;
97      srcView_[i]->getGlobalIndexView(globalIndex) ;
98      hashRank.assign(commSize,0) ; // 0 for everybody except my rank
99      size_t globalIndexSize = globalIndex.numElements() ;
100     
101      size_t allEqual ;
102      MPI_Allreduce(&globalIndexSize, &allEqual, 1, MPI_SIZE_T, MPI_BXOR, localComm_) ;
103      if (allEqual!=0) 
104      {
105        isSrcViewDistributed_[i]=true ;
106        break ;
107      }
108
109      // warning : jenkins hash : 0 --> 0 : need to compare number of element for each ranks
110      size_t hashValue=0 ;
111      for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
112      MPI_Allreduce(&hashValue, &allEqual, 1, MPI_SIZE_T, MPI_BXOR, localComm_) ;
113      if (allEqual!=0) isSrcViewDistributed_[i]=true ;
114      else isSrcViewDistributed_[i]=false ;
115    }
116
117  }
118
119/**
120  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
121  * \detail Depending of the distributions of the view computed in the computeViewDistribution() call, the connector is computed in computeConnectorMethods(), and to achieve better optimisation
122  *         some redondant ranks can be removed from the elements_ map.
123  */
124  void CGridRemoteConnector::computeConnector(bool eliminateRedundant)
125  {
126    if (eliminateRedundant)
127    {
128      computeViewDistribution() ;
129      computeConnectorMethods() ;
130      computeRedondantRanks() ; 
131      for(auto& rank : rankToRemove_)
132        for(auto& element : elements_) element.erase(rank) ;
133    }
134    else
135    {
136       computeViewDistribution() ;
137       computeConnectorRedundant() ;
138    }
139  }
140
141/**
142  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
143  * \detail In this routine we don't eliminate redundant cells as it it performed in
144  *         computeConnectorMethods. It can be usefull to perform reduce operation over processes.
145            In future, some optimisation could be done considering full redondance of the
146            source view or the destination view.
147  */
148  void CGridRemoteConnector::computeConnectorRedundant(void)
149  {
150    vector<shared_ptr<CLocalView>> srcView ;
151    vector<shared_ptr<CDistributedView>> dstView ;
152    vector<int> indElements ;
153    elements_.resize(srcView_.size()) ;
154   
155    bool srcViewsNonDistributed=true ; // not usefull now but later for optimization
156    for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i]  ;
157   
158    bool dstViewsNonDistributed=true ;  // not usefull now but later for optimization
159    for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
160   
161    for(int i=0;i<srcView_.size();i++) 
162    {
163      srcView.push_back(srcView_[i]) ;
164      dstView.push_back(dstView_[i]) ;
165      indElements.push_back(i) ;
166    }
167
168    computeGenericMethod(srcView, dstView, indElements) ;
169   
170    map<int,bool> ranks ; 
171    for(auto& it : elements_[indElements[0]]) 
172    {
173      if (it.second.numElements()==0) ranks[it.first] = false ;
174      else  ranks[it.first] = true ;
175    }
176   
177  }
178
179
180/**
181  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
182  * \detail In order to achive better optimisation,
183  *         we distingute the case when the grid is not distributed on source grid (\bcomputeSrcNonDistributed),
184  *         or the remote grid (\b computeDstNonDistributed), or the both (\b computeSrcDstNonDistributed).
185  *         Otherwise the generic method is called computeGenericMethod. Note that in the case, if one element view
186  *         is not distributed on the source and on the remote grid, then we can used the tensorial product
187  *         property to computing it independently using \b computeSrcDstNonDistributed method.
188  *         After that, we call the \b removeRedondantRanks method to supress blocks of data that can be sent
189  *         redondantly the the remote servers
190  */
191  void CGridRemoteConnector::computeConnectorMethods(bool reverse)
192  {
193    vector<shared_ptr<CLocalView>> srcView ;
194    vector<shared_ptr<CDistributedView>> dstView ;
195    vector<int> indElements ;
196    elements_.resize(srcView_.size()) ;
197   
198    bool srcViewsNonDistributed=true ;
199    for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i]  ;
200   
201    bool dstViewsNonDistributed=true ;
202    for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
203   
204  //*****************************************************
205    if (srcViewsNonDistributed && dstViewsNonDistributed)
206    {
207      int commRank, commSize ;
208      MPI_Comm_rank(localComm_,&commRank) ;
209      MPI_Comm_size(localComm_,&commSize) ;
210 
211      map<int,bool> ranks ;
212      if (reverse)
213      {
214        int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
215        ranks[leaderRank] = true ; 
216      }
217      else
218      {
219        list<int> remoteRanks;
220        list<int> notUsed ;
221        computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
222        for(int rank : remoteRanks) ranks[rank]=true ;
223      }
224      for(int i=0; i<srcView_.size(); i++) computeSrcDstNonDistributed(i,ranks) ; 
225    }
226 
227  //*****************************************************
228    else if (srcViewsNonDistributed) 
229    {
230      int commRank, commSize ;
231      MPI_Comm_rank(localComm_,&commRank) ;
232      MPI_Comm_size(localComm_,&commSize) ;
233      list<int> remoteRanks;
234      list<int> notUsed ;
235      map<int,bool> ranks ;
236     
237      if (reverse)
238      {
239        shared_ptr<CLocalElement> voidElement = make_shared<CLocalElement>(commRank, 0, CArray<size_t,1>()) ;
240        shared_ptr<CLocalView> voidView = make_shared<CLocalView>(voidElement, CElementView::FULL, CArray<int,1>()) ;
241
242        for(int i=0;i<srcView_.size();i++) 
243          if (isDstViewDistributed_[i])
244          {
245            if (commRank==0) srcView.push_back(srcView_[i]) ;
246            else srcView.push_back(make_shared<CLocalView>(make_shared<CLocalElement>(commRank, srcView_[i]->getGlobalSize(), CArray<size_t,1>()),
247                                                           CElementView::FULL, CArray<int,1>())) ; // void view
248            dstView.push_back(dstView_[i]) ;
249            indElements.push_back(i) ;
250          }
251       
252        computeGenericMethod(srcView, dstView, indElements) ;
253
254        for(int i=0;i<srcView_.size();i++) 
255          if (isDstViewDistributed_[i])
256          {
257            size_t sizeElement ;
258            int nRank ;
259            if (commRank==0) nRank = elements_[i].size() ;
260            MPI_Bcast(&nRank, 1, MPI_INT, 0, localComm_) ;
261             
262            auto it=elements_[i].begin() ;
263            for(int j=0;j<nRank;j++)
264            {
265              int rank ;
266              size_t sizeElement ;
267              if (commRank==0) { rank = it->first ; sizeElement=it->second.numElements(); }
268              MPI_Bcast(&rank, 1, MPI_INT, 0, localComm_) ;
269              MPI_Bcast(&sizeElement, 1, MPI_SIZE_T, 0, localComm_) ;
270              if (commRank!=0) elements_[i][rank].resize(sizeElement) ;
271              MPI_Bcast(elements_[i][rank].dataFirst(), sizeElement, MPI_SIZE_T, 0, localComm_) ;
272              if (commRank==0) ++it ;
273            }
274          }
275
276        for(auto& it : elements_[indElements[0]]) 
277        {
278          if (it.second.numElements()==0) ranks[it.first] = false ;
279          else  ranks[it.first] = true ;
280        }
281   
282        for(int i=0;i<srcView_.size();i++) 
283          if (!isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
284
285      }
286      else 
287      {
288        computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
289        for(int rank : remoteRanks) ranks[rank]=true ;
290
291        for(int i=0; i<srcView_.size(); i++) 
292        {
293          if (isDstViewDistributed_[i]) computeSrcNonDistributed(i) ;
294          else computeSrcDstNonDistributed(i, ranks) ;
295        }
296      }
297
298    } 
299  //*****************************************************
300    else if (dstViewsNonDistributed)
301    {
302      int commRank, commSize ;
303      MPI_Comm_rank(localComm_,&commRank) ;
304      MPI_Comm_size(localComm_,&commSize) ;
305 
306      map<int,bool> ranks ;
307      if (reverse)
308      {
309        int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
310        ranks[leaderRank] = true ; 
311      }
312      else for(int i=0;i<remoteSize_;i++) ranks[i]=true ;
313     
314      for(int i=0; i<srcView_.size(); i++) 
315      {
316        if (isSrcViewDistributed_[i]) computeDstNonDistributed(i,ranks) ;
317        else computeSrcDstNonDistributed(i,ranks) ;
318      }
319    } 
320  //*****************************************************
321    else
322    {
323      for(int i=0;i<srcView_.size();i++) 
324        if (isSrcViewDistributed_[i] || isDstViewDistributed_[i])
325        {
326          srcView.push_back(srcView_[i]) ;
327          dstView.push_back(dstView_[i]) ;
328          indElements.push_back(i) ;
329        }
330
331      computeGenericMethod(srcView, dstView, indElements) ;
332   
333      map<int,bool> ranks ; 
334      for(auto& it : elements_[indElements[0]]) 
335      {
336        if (it.second.numElements()==0) ranks[it.first] = false ;
337        else  ranks[it.first] = true ;
338      }
339   
340      for(int i=0;i<srcView_.size();i++) 
341        if (!isSrcViewDistributed_[i] && !isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
342    }
343
344  }
345
346 
347/**
348  * \brief Compute the connector for the element \b i when the source view is not distributed.
349  *        After the call element_[i] is defined.
350  *  \param i Indice of the element composing the source grid.
351  */
352
353  void CGridRemoteConnector::computeSrcNonDistributed(int i)
354  {
355    auto& element = elements_[i] ;
356    map<int,CArray<size_t,1>> globalIndexView ;
357    dstView_[i]->getGlobalIndexView(globalIndexView) ;
358   
359    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
360   
361    for(auto& it : globalIndexView)
362    {
363      auto& globalIndex=it.second ;
364      for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
365    }
366   
367    // First we feed the distributed hash map  with key (remote global index)
368    // associated with the value of the remote rank
369    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
370    // after we feed the DHT with the local global indices of the source view
371
372    int commRank, commSize ;
373    MPI_Comm_rank(localComm_,&commRank) ;
374    MPI_Comm_size(localComm_,&commSize) ;
375    CArray<size_t,1> srcIndex ;
376    // like the source view is not distributed, then only the rank 0 need to feed the DHT
377    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
378   
379    // compute the mapping
380    DHT.computeIndexInfoMapping(srcIndex) ;
381    auto& returnInfo = DHT.getInfoIndexMap() ;
382   
383    // returnInfo contains now the map for each global indices to send to a list of remote rank
384    // only for the rank=0 because it is the one to feed the DHT
385    // so it need to send the list to each server leader i.e. the local process that handle specifically one or more
386    // servers
387   
388    // rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
389    vector<vector<size_t>> rankIndGlo(remoteSize_) ;
390    if (commRank==0) 
391      for(auto& it1 : returnInfo)
392        for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
393   
394   
395    vector<MPI_Request> requests ;
396   
397    if (commRank==0)
398    {
399      requests.resize(remoteSize_) ;
400      for(int i=0 ; i<remoteSize_;i++) 
401      {
402        // ok send only the global indices for a server to the "server leader"
403        int rank = getLeaderRank(commSize, remoteSize_, i) ;
404        MPI_Isend(rankIndGlo[i].data(), rankIndGlo[i].size(), MPI_SIZE_T, rank, i ,localComm_, &requests[i]) ;
405      }
406    } 
407   
408    list<int> remoteRanks;
409    list<int> notUsed ;
410    // I am a server leader of which remote ranks ?
411    computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
412
413    for(auto remoteRank : remoteRanks)
414    {
415      MPI_Status status ;
416      int size ;
417      MPI_Probe(0,remoteRank,localComm_, &status);
418      MPI_Get_count(&status, MPI_SIZE_T, &size) ;
419      elements_[i][remoteRank].resize(size) ;
420      // for each remote ranks receive the global indices from proc 0
421      MPI_Recv(elements_[i][remoteRank].dataFirst(),size, MPI_SIZE_T,0,remoteRank, localComm_,&status) ;
422    }
423     
424    if (commRank==0)
425    {
426      vector<MPI_Status> status(remoteSize_) ;
427      // asynchronous for sender, wait for completion
428      MPI_Waitall(remoteSize_, requests.data(), status.data()) ;
429    }
430  }
431
432/**
433  * \brief Compute the connector for the element \b i when the source view is not distributed.
434  *        After the call element_[i] is defined.
435  *  \param i Indice of the element composing the source grid.
436  */
437
438  void CGridRemoteConnector::computeSrcNonDistributedReverse(int i)
439  {
440    auto& element = elements_[i] ;
441    map<int,CArray<size_t,1>> globalIndexView ;
442    dstView_[i]->getGlobalIndexView(globalIndexView) ;
443   
444    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
445   
446    for(auto& it : globalIndexView)
447    {
448      auto& globalIndex=it.second ;
449      for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
450    }
451   
452    // First we feed the distributed hash map  with key (remote global index)
453    // associated with the value of the remote rank
454    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
455    // after we feed the DHT with the local global indices of the source view
456
457    int commRank, commSize ;
458    MPI_Comm_rank(localComm_,&commRank) ;
459    MPI_Comm_size(localComm_,&commSize) ;
460    CArray<size_t,1> srcIndex ;
461    // like the source view is not distributed, then only the rank 0 need to feed the DHT
462    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
463   
464    // compute the mapping
465    DHT.computeIndexInfoMapping(srcIndex) ;
466    auto& returnInfo = DHT.getInfoIndexMap() ;
467   
468    // returnInfo contains now the map for each global indices to send to a list of remote rank
469    // only for the rank=0 because it is the one to feed the DHT
470    // so it need to send the list to each server leader i.e. the local process that handle specifically one or more
471    // servers
472   
473    // rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
474    vector<vector<size_t>> rankIndGlo(remoteSize_) ;
475    if (commRank==0) 
476      for(auto& it1 : returnInfo)
477        for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
478   
479   // bcast the same for each client
480   for(int remoteRank=0 ; remoteRank<remoteSize_ ; remoteRank++)
481   {
482      int remoteDataSize ;
483      if (commRank==0) remoteDataSize = rankIndGlo[remoteRank].size() ;
484      MPI_Bcast(&remoteDataSize, 1, MPI_INT, 0, localComm_) ;
485
486      auto& element = elements_[i][remoteRank] ;
487      element.resize(remoteDataSize) ;
488      if (commRank==0) for(int j=0 ; j<remoteDataSize; j++) element(j)=rankIndGlo[remoteRank][j] ;
489      MPI_Bcast(element.dataFirst(), remoteDataSize, MPI_SIZE_T, 0, localComm_) ;
490   }
491  }
492
493
494
495  /**
496   * \brief Compute the remote connector for the element \b i when the remote view is not distributed.
497   *        After the call,  element_[i] is defined.
498   * \param i Indice of the element composing the remote grid.
499   * \param ranks The list of rank for which the local proc is in charge to compute the connector
500   *              (if leader server for exemple). if ranks[rank] == false the corresponding elements_
501   *              is set to void array (no data to sent) just in order to notify corresponding remote server
502   *              that the call is collective with each other one 
503   */
504  void CGridRemoteConnector::computeDstNonDistributed(int i, map<int,bool>& ranks)
505  {
506    auto& element = elements_[i] ;
507    map<int,CArray<size_t,1>> globalIndexView ;
508    dstView_[i]->getGlobalIndexView(globalIndexView) ;
509   
510   
511    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
512 
513    // First we feed the distributed hash map  with key (remote global index)
514    // associated with the value of the remote rank
515    for(auto& it : globalIndexView)
516      if (it.first==0) // since the remote view is not distributed, insert only the remote rank 0
517      {
518        auto& globalIndex=it.second ;
519        for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
520      }
521   
522    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
523    // after we feed the DHT with the local global indices of the source view
524
525    CArray<size_t,1> srcIndex ;
526    srcView_[i]->getGlobalIndexView(srcIndex) ;
527    DHT.computeIndexInfoMapping(srcIndex) ;
528    auto& returnInfo = DHT.getInfoIndexMap() ;
529   
530    // returnInfo contains now the map for each global indices to send to a list of remote rank
531    // now construct the element_ list of global indices for each rank in my list except if the erray must be empty
532    for (auto& rank : ranks)
533    {
534      if (rank.second) // non empty array => for rank that have not any data to be received
535      {
536        int size=0 ;
537        for(auto& it : returnInfo) if (!it.second.empty()) size++ ;
538        auto& array = element[rank.first] ;
539       array.resize(size) ;
540       size=0 ;
541       for(auto& it : returnInfo) 
542         if (!it.second.empty()) 
543         {
544           array(size)=it.first ;
545           size++ ;
546         }
547      }
548      else element[rank.first] = CArray<size_t,1>(0) ;  // empty array => for rank that have not any data to be received
549    }
550  }
551
552 /**
553  * \brief Compute the remote connector for the element \b i when the source and the remote view are not distributed.
554  *        After the call, element_[i] is defined.
555  * \param i Indice of the element composing the remote grid.
556  * \param ranks The list of rank for which the local proc is in charge to compute the connector
557  *              (if leader server for exemple). if ranks[rank] == false the corresponding elements_
558  *              is set to void array (no data to sent) just in order to notify corresponding remote server
559  *              that the call is collective with each other one 
560  */
561
562  void CGridRemoteConnector::computeSrcDstNonDistributed(int i, map<int,bool>& ranks)
563  {
564    auto& element = elements_[i] ;
565    map<int,CArray<size_t,1>> globalIndexView ;
566    dstView_[i]->getGlobalIndexView(globalIndexView) ;
567   
568   
569    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
570    // First we feed the distributed hash map  with key (remote global index)
571    // associated with the value of the remote rank
572
573    for(auto& it : globalIndexView)
574      if (it.first==0) // insert only the remote rank 0 since the remote view is not distributed
575      {
576        auto& globalIndex=it.second ;
577        for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
578      }
579   
580    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
581    // after we feed the DHT with the local global indices of the source view
582
583    int commRank, commSize ;
584    MPI_Comm_rank(localComm_,&commRank) ;
585    MPI_Comm_size(localComm_,&commSize) ;
586    CArray<size_t,1> srcIndex ;
587 
588    // like the source view is not distributed, then only the rank 0 need to feed the DHT
589    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
590    DHT.computeIndexInfoMapping(srcIndex) ;
591    auto& returnInfo = DHT.getInfoIndexMap() ;
592   
593    vector<size_t> indGlo ;
594    if (commRank==0) 
595      for(auto& it1 : returnInfo) 
596        for(auto& it2 : it1.second) indGlo.push_back(it1.first) ;
597
598    // now local rank 0 know which indices to send to remote rank 0, but all the server
599    // must receive the same information. So only the leader rank will sent this.
600    // So local rank 0 must broadcast the information to all leader.
601    // for this we create a new communicator composed of local process that must send data
602    // to a remote rank, data are broadcasted, and element_[i] is construction for each remote
603    // rank in charge
604    int color=0 ;
605    if (ranks.empty()) color=0 ;
606    else color=1 ;
607    if (commRank==0) color=1 ;
608    MPI_Comm newComm ;
609    MPI_Comm_split(localComm_, color, commRank, &newComm) ;
610    if (color==1)
611    {
612      // ok, I am part of the process that must send something to one or more remote server
613      // so I get the list of global indices from rank 0
614      int dataSize ;
615      if (commRank==0) dataSize=indGlo.size() ;
616      MPI_Bcast(&dataSize,1,MPI_INT, 0, newComm) ;
617      indGlo.resize(dataSize) ;
618      MPI_Bcast(indGlo.data(),dataSize,MPI_SIZE_T,0,newComm) ;
619    }
620    MPI_Comm_free(&newComm) ;
621
622    // construct element_[i] from indGlo
623    for(auto& rank : ranks)
624    {
625      if (rank.second)
626      {
627        int dataSize=indGlo.size();
628        auto& element = elements_[i][rank.first] ;
629        element.resize(dataSize) ;
630        for(int i=0;i<dataSize; i++) element(i)=indGlo[i] ;
631      }
632      else element[rank.first] = CArray<size_t,1>(0) ;
633    }   
634
635  }
636
637
638 /**
639  * \brief Generic method the compute the grid remote connector. Only distributed elements are specifed in the source view and remote view.
640  *        Connector for non distributed elements are computed separatly to improve performance and memory consumption. After the call,
641  *        \b elements_  is defined.
642  *  \param srcView List of the source views composing the grid, without non distributed views
643  *  \param dstView List of the remote views composing the grid, without non distributed views
644  *  \param indElements Index of the view making the correspondance between all views and views distributed (that are in input)
645  */
646  void CGridRemoteConnector::computeGenericMethod(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, vector<int>& indElements)
647  {
648    // generic method, every element can be distributed
649    int nDst = dstView.size() ;
650    vector<size_t> dstSliceSize(nDst) ;
651    dstSliceSize[0] = 1 ; 
652    for(int i=1; i<nDst; i++)  dstSliceSize[i] = dstView[i-1]->getGlobalSize()*dstSliceSize[i-1] ;
653 
654    CClientClientDHTTemplate<int>::Index2VectorInfoTypeMap dataInfo ;
655    CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap info ; // info map
656
657    // first, we need to feed the DHT with the global index of the remote server
658    // for that :
659    // First the first element insert the in a DHT with key as the rank and value the list of global index associated
660    // Then get the previously stored index associate with the remote rank I am in charge and reinsert the global index
661    // corresponding to the position of the element in the remote view suing tensorial product
662    // finaly we get only the list of remote global index I am in charge for the whole remote grid   
663
664    for(int pos=0; pos<nDst; pos++)
665    {
666      size_t sliceSize=dstSliceSize[pos] ;
667      map<int,CArray<size_t,1>> globalIndexView ;
668      dstView[pos]->getGlobalIndexView(globalIndexView) ;
669     
670      CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap lastInfo(info) ;
671
672      if (pos>0)
673      {
674        CArray<size_t,1> ranks(globalIndexView.size()) ;
675        auto it=globalIndexView.begin() ;
676        for(int i=0 ; i<ranks.numElements();i++,it++) ranks(i)=it->first ;
677        CClientClientDHTTemplate<size_t> dataRanks(info, localComm_) ;
678        dataRanks.computeIndexInfoMapping(ranks) ;
679        lastInfo = dataRanks.getInfoIndexMap() ;
680      }
681     
682      info.clear() ;
683      for(auto& it : globalIndexView)
684      {
685        int rank = it.first ;
686        auto& globalIndex = it.second ;
687        auto& inf = info[rank] ;
688        if (pos==0) for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)) ;
689        else
690        {
691          auto& lastGlobalIndex = lastInfo[rank] ;
692          for(size_t lastGlobalInd : lastGlobalIndex)
693          {
694            for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)*sliceSize+lastGlobalInd) ;
695          }
696        } 
697      }
698
699      if (pos==nDst-1)
700      {
701         for(auto& it : info)
702         {
703           int rank=it.first ;
704           auto& globalIndex = it.second ;
705           for(auto globalInd : globalIndex) dataInfo[globalInd].push_back(rank) ;
706         }
707      } 
708    }
709
710    // we feed the DHT with the remote global index
711    CClientClientDHTTemplate<int> dataRanks(dataInfo, localComm_) ;
712
713    // generate list of global index for src view
714    int nSrc = srcView.size() ;
715    vector<size_t> srcSliceSize(nSrc) ;
716   
717    srcSliceSize[0] = 1 ; 
718    for(int i=1; i<nSrc; i++)  srcSliceSize[i] = srcView[i-1]->getGlobalSize()*srcSliceSize[i-1] ;
719
720    vector<size_t> srcGlobalIndex ;
721    size_t sliceIndex=0 ;
722    srcView[nSrc-1]->getGlobalIndex(srcGlobalIndex, sliceIndex, srcSliceSize.data(), srcView.data(), nSrc-1) ;
723    // now we have the global index of the source grid in srcGlobalIndex
724    // we feed the DHT with the src global index (if we have)
725    if (srcGlobalIndex.size()>0)
726    {
727      CArray<size_t,1> srcGlobalIndexArray(srcGlobalIndex.data(), shape(srcGlobalIndex.size()),neverDeleteData) ;
728      dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
729    }
730    else
731    {
732      CArray<size_t,1> srcGlobalIndexArray ;
733      dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
734    }
735    const auto& returnInfo = dataRanks.getInfoIndexMap() ;
736    // returnInfo contains now the map for each global indices to send to a list of remote rank
737    // but we want to use the tensorial product property to get the same information using only global
738    // index of element view. So the idea is to reverse the information : for a global index of the grid
739    // to send to the remote server, what is the global index of each element composing the grid ?
740
741    vector<map<int, set<size_t>>> elements(nSrc) ; // internal representation of elements composing the grid
742
743    for(auto& indRanks : returnInfo)
744    {
745      size_t gridIndexGlo=indRanks.first ;
746      auto& ranks = indRanks.second ;
747      for(int i=nSrc-1; i>=0; i--)
748      {
749        auto& element = elements[i] ;
750        size_t localIndGlo = gridIndexGlo / srcSliceSize[i] ;
751        gridIndexGlo = gridIndexGlo % srcSliceSize[i] ;
752        for(int rank : ranks) element[rank].insert(localIndGlo) ;
753      }
754    }
755
756//    elements_.resize(nSrc) ;
757    for(int i=0 ; i<nSrc; i++)
758    {
759      auto& element=elements[i] ;
760      for(auto& rankInd : element)
761      {
762        int rank=rankInd.first ;
763        set<size_t>& indGlo = rankInd.second ;
764        CArray<size_t,1>& indGloArray = elements_[indElements[i]][rank] ;
765        indGloArray.resize(indGlo.size()) ;
766        int j=0 ;
767        for (auto index : indGlo) { indGloArray(j) = index ; j++; }
768      }
769    }
770   
771    // So what about when there is some server that have no data to receive
772    // they must be inform they receive an event with no data.
773    // So find remote servers with no data, and one client will take in charge
774    // that it receive global index with no data (0-size)
775    vector<int> ranks(remoteSize_,0) ;
776    for(auto& it : elements_[indElements[0]]) ranks[it.first] = 1 ;
777    MPI_Allreduce(MPI_IN_PLACE, ranks.data(), remoteSize_, MPI_INT, MPI_SUM, localComm_) ;
778    int commRank, commSize ;
779    MPI_Comm_rank(localComm_,&commRank) ;
780    MPI_Comm_size(localComm_,&commSize) ;
781    int pos=0 ;
782    for(int i=0; i<remoteSize_ ; i++)
783      if (ranks[i]==0)
784      {
785        if (pos%commSize==commRank) 
786          for(int j=0 ; j<nSrc; j++) elements_[indElements[j]][i] = CArray<size_t,1>(0) ;
787        pos++ ;
788      }
789  }
790
791 /**
792  * \brief Once the connector is computed (compute \b elements_), redondant data can be avoid to be sent to the server.
793  *        This call compute the redondant rank and store them in \b rankToRemove_ attribute.
794  *        The goal of this method is to make a hash of each block of indice that determine wich data to send to a
795  *        of a specific server rank using a hash method. So data to send to a rank is associated to a hash.
796  *        After we compare hash between local rank and remove redondant data corresponding to the same hash.
797  */
798  void CGridRemoteConnector::computeRedondantRanks(bool reverse)
799  {
800    int commRank ;
801    MPI_Comm_rank(localComm_,&commRank) ;
802
803    set<int> ranks;
804    for(auto& element : elements_) 
805      for(auto& it : element) ranks.insert(it.first) ;
806
807    for(auto& element : elements_)
808      for(auto& it : element) 
809        if (ranks.count(it.first)==0) ERROR("void CGridRemoteConnector::removeRedondantRanks(void)",<<"number of ranks in elements is not coherent between each element") ;
810   
811    HashXIOS<size_t> hashGlobalIndex;
812   
813    map<int,size_t> hashRanks ;
814    for(auto& element : elements_) 
815      for(auto& it : element)
816      {
817        auto& globalIndex=it.second ;
818        int rank=it.first ;
819        size_t hash ;
820        hash=0 ;
821        for(int i=0; i<globalIndex.numElements(); i++) hash+=hashGlobalIndex(globalIndex(i)) ;
822        if (globalIndex.numElements()>0)
823        {
824          if (hashRanks.count(rank)==0) hashRanks[rank]=hash ;
825          else hashRanks[rank]=hashGlobalIndex.hashCombine(hashRanks[rank],hash) ;
826        }
827      }
828   
829    if (reverse)
830    {
831      set<size_t> hashs ;
832      //easy because local
833      for(auto& hashRank : hashRanks)
834      {
835        if (hashs.count(hashRank.second)==0) hashs.insert(hashRank.second) ;
836        else rankToRemove_.insert(hashRank.first) ;
837      }
838   
839    }
840    else
841    {
842      // a hash is now computed for data block I will sent to the server.
843
844      CClientClientDHTTemplate<int>::Index2InfoTypeMap info ;
845 
846      map<size_t,int> hashRank ;
847      HashXIOS<int> hashGlobalIndexRank;
848      for(auto& it : hashRanks) 
849      {
850        it.second = hashGlobalIndexRank.hashCombine(it.first,it.second) ; 
851        info[it.second]=commRank ;
852        hashRank[it.second]=it.first ;
853      }
854
855      // we feed a DHT map with key : hash, value : myrank
856      CClientClientDHTTemplate<int> dataHash(info, localComm_) ;
857      CArray<size_t,1> hashList(hashRank.size()) ;
858   
859      int i=0 ;
860      for(auto& it : hashRank) { hashList(i)=it.first ; i++; }
861
862      // now who are the ranks that have the same hash : feed the DHT with my list of hash
863      dataHash.computeIndexInfoMapping(hashList) ;
864      auto& hashRankList = dataHash.getInfoIndexMap() ;
865   
866
867      for(auto& it : hashRankList)
868      {
869        size_t hash = it.first ;
870        auto& ranks = it.second ;
871     
872        bool first=true ;
873        // only the process with the lowest rank get in charge of sendinf data to remote server
874        for(int rank : ranks) if (commRank>rank) first=false ;
875        if (!first) rankToRemove_.insert(hashRank[hash]) ;
876      }
877    }
878  }
879 
880}
Note: See TracBrowser for help on using the repository browser.