source: XIOS3/trunk/src/distribution/grid_remote_connector.cpp @ 2507

Last change on this file since 2507 was 2507, checked in by ymipsl, 13 months ago

Merging XIOS3_ATTACHED branch into XIOS3 trunk.

YM

  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 34.0 KB
Line 
1#include "grid_remote_connector.hpp"
2#include "client_client_dht_template.hpp"
3#include "leader_process.hpp"
4#include "mpi.hpp"
5#include "element.hpp"
6
7
8
9namespace xios
10{
11  /**
12   * \brief class constructor.
13   * \param srcView List of sources views.
14   * \param dstView List of remotes views.
15   * \param localComm Local MPI communicator
16   * \param remoteSize Size of the remote communicator
17   */ 
18  CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, MPI_Comm localComm, int remoteSize) 
19                       : srcView_(srcView), dstView_(dstView), localComm_(localComm), remoteSize_(remoteSize) 
20  {}
21
22  /**
23   * \brief class constructor.
24   * \param srcView List of sources views.
25   * \param dstView List of remotes views.
26   * \param localComm Local MPI communicator
27   * \param remoteSize Size of the remote communicator
28   */ 
29  CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector< shared_ptr<CLocalView> >& dstView, MPI_Comm localComm, int remoteSize) 
30                       : srcView_(srcView), localComm_(localComm), remoteSize_(remoteSize) 
31  {
32    for(auto& it : dstView) dstView_.push_back((shared_ptr<CDistributedView>) it) ; 
33  }
34
35
36  /**
37   * \brief Compute if each view composing the source grid and the remote grid is distributed or not.
38   *         Result is stored on internal attributes \b isSrcViewDistributed_ and \b isDstViewDistributed_.
39   * \detail To compute this, a hash is computed for each array on indices. The hash must permutable, i.e.
40   *         the order of the list of global indices doesn't influence the value of the hash. So simply a sum of
41   *         hash of each indices is used for the whole array. After, the computed hash are compared with each other
42   *         ranks of \b localComm_ MPI communicator using an MPI_ALLReduce. If, for each ranks, the hash is the same
43   *         then the view is not distributed
44   */
45  void CGridRemoteConnector::computeViewDistribution(void)
46  {
47    HashXIOS<size_t> hashGlobalIndex; // hash function-object
48
49    int nDst = dstView_.size() ;
50    vector<size_t> hashRank(remoteSize_) ;
51    vector<size_t> sizeRank(remoteSize_) ;
52    isDstViewDistributed_.resize(nDst) ;
53
54    for(int i=0; i<nDst; i++)
55    {
56      map<int,CArray<size_t,1>> globalIndexView ;
57      dstView_[i]->getGlobalIndexView(globalIndexView) ;
58      hashRank.assign(remoteSize_,0) ; // everybody ranks to 0 except rank of the remote view I have
59                                       // that would be assign to my local hash
60      sizeRank.assign(remoteSize_,0) ;
61
62      for(auto& it : globalIndexView)
63      {
64        int rank=it.first ;
65        CArray<size_t,1>& globalIndex = it.second ;
66        size_t globalIndexSize = globalIndex.numElements();
67        size_t hashValue=0 ;
68        for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
69        hashRank[rank] += hashValue ;
70        sizeRank[rank] += globalIndexSize ;
71      }
72      // sum all the hash for every process of the local comm. The reduce is on the size of remote view (remoteSize_)
73      // after that for each rank of the remote view, we get the hash
74      MPI_Allreduce(MPI_IN_PLACE, hashRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
75      MPI_Allreduce(MPI_IN_PLACE, sizeRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
76      size_t value = hashRank[0] ;
77      size_t size = sizeRank[0] ;
78      isDstViewDistributed_[i]=false ;
79      for(int j=0 ; j<remoteSize_ ; j++) 
80        if (size!=sizeRank[j] || value != hashRank[j]) 
81        { 
82          isDstViewDistributed_[i]=true ;
83          break ;
84        }
85    }
86
87    int nSrc = srcView_.size() ;
88    int commSize,commRank ;
89    MPI_Comm_size(localComm_,&commSize) ;
90    MPI_Comm_rank(localComm_,&commRank) ;
91    hashRank.resize(commSize,0) ;
92    isSrcViewDistributed_.resize(nSrc) ;
93
94    for(int i=0; i<nSrc; i++)
95    {
96      CArray<size_t,1> globalIndex ;
97      srcView_[i]->getGlobalIndexView(globalIndex) ;
98      hashRank.assign(commSize,0) ; // 0 for everybody except my rank
99      size_t globalIndexSize = globalIndex.numElements() ;
100     
101      size_t minVal,maxVal ;
102      MPI_Allreduce(&globalIndexSize, &minVal, 1, MPI_SIZE_T, MPI_MIN, localComm_) ;
103      MPI_Allreduce(&globalIndexSize, &maxVal, 1, MPI_SIZE_T, MPI_MAX, localComm_) ;
104      if (minVal!=maxVal) 
105      {
106        isSrcViewDistributed_[i]=true ;
107        break ;
108      }
109
110      // warning : jenkins hash : 0 --> 0 : need to compare number of element for each ranks
111      size_t hashValue=0 ;
112      for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
113      MPI_Allreduce(&hashValue, &minVal, 1, MPI_SIZE_T, MPI_MIN, localComm_) ;
114      MPI_Allreduce(&hashValue, &maxVal, 1, MPI_SIZE_T, MPI_MAX, localComm_) ;
115      if (minVal!=maxVal) isSrcViewDistributed_[i]=true ;
116      else isSrcViewDistributed_[i]=false ;
117    }
118
119  }
120
121/**
122  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
123  * \detail Depending of the distributions of the view computed in the computeViewDistribution() call, the connector is computed in computeConnectorMethods(), and to achieve better optimisation
124  *         some redondant ranks can be removed from the elements_ map.
125  */
126  void CGridRemoteConnector::computeConnector(bool eliminateRedundant)
127  {
128    if (eliminateRedundant)
129    {
130      computeViewDistribution() ;
131      computeConnectorMethods() ;
132      computeRedondantRanks() ; 
133      for(auto& rank : rankToRemove_)
134        for(auto& element : elements_) element.erase(rank) ;
135    }
136    else
137    {
138       computeViewDistribution() ;
139       computeConnectorRedundant() ;
140    }
141  }
142
143/**
144  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
145  * \detail In this routine we don't eliminate redundant cells as it it performed in
146  *         computeConnectorMethods. It can be usefull to perform reduce operation over processes.
147            In future, some optimisation could be done considering full redondance of the
148            source view or the destination view.
149  */
150  void CGridRemoteConnector::computeConnectorRedundant(void)
151  {
152    vector<shared_ptr<CLocalView>> srcView ;
153    vector<shared_ptr<CDistributedView>> dstView ;
154    vector<int> indElements ;
155    elements_.resize(srcView_.size()) ;
156   
157    bool srcViewsNonDistributed=true ; // not usefull now but later for optimization
158    for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i]  ;
159   
160    bool dstViewsNonDistributed=true ;  // not usefull now but later for optimization
161    for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
162   
163    for(int i=0;i<srcView_.size();i++) 
164    {
165      srcView.push_back(srcView_[i]) ;
166      dstView.push_back(dstView_[i]) ;
167      indElements.push_back(i) ;
168    }
169
170    computeGenericMethod(srcView, dstView, indElements) ;
171   
172    map<int,bool> ranks ; 
173    for(auto& it : elements_[indElements[0]]) 
174    {
175      if (it.second.numElements()==0) ranks[it.first] = false ;
176      else  ranks[it.first] = true ;
177    }
178   
179  }
180
181
182/**
183  * \brief Compute the connector, i.e. compute the \b elements_ attribute.
184  * \detail In order to achive better optimisation,
185  *         we distingute the case when the grid is not distributed on source grid (\bcomputeSrcNonDistributed),
186  *         or the remote grid (\b computeDstNonDistributed), or the both (\b computeSrcDstNonDistributed).
187  *         Otherwise the generic method is called computeGenericMethod. Note that in the case, if one element view
188  *         is not distributed on the source and on the remote grid, then we can used the tensorial product
189  *         property to computing it independently using \b computeSrcDstNonDistributed method.
190  *         After that, we call the \b removeRedondantRanks method to supress blocks of data that can be sent
191  *         redondantly the the remote servers
192  */
193  void CGridRemoteConnector::computeConnectorMethods(bool reverse)
194  {
195    vector<shared_ptr<CLocalView>> srcView ;
196    vector<shared_ptr<CDistributedView>> dstView ;
197    vector<int> indElements ;
198    elements_.resize(srcView_.size()) ;
199   
200    bool srcViewsNonDistributed=true ;
201    for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i]  ;
202   
203    bool dstViewsNonDistributed=true ;
204    for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
205   
206  //*****************************************************
207    if (srcViewsNonDistributed && dstViewsNonDistributed)
208    {
209      int commRank, commSize ;
210      MPI_Comm_rank(localComm_,&commRank) ;
211      MPI_Comm_size(localComm_,&commSize) ;
212 
213      map<int,bool> ranks ;
214      if (reverse)
215      {
216        int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
217        ranks[leaderRank] = true ; 
218      }
219      else
220      {
221        list<int> remoteRanks;
222        list<int> notUsed ;
223        computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
224        for(int rank : remoteRanks) ranks[rank]=true ;
225      }
226      for(int i=0; i<srcView_.size(); i++) computeSrcDstNonDistributed(i,ranks) ; 
227    }
228 
229  //*****************************************************
230    else if (srcViewsNonDistributed) 
231    {
232      int commRank, commSize ;
233      MPI_Comm_rank(localComm_,&commRank) ;
234      MPI_Comm_size(localComm_,&commSize) ;
235      list<int> remoteRanks;
236      list<int> notUsed ;
237      map<int,bool> ranks ;
238     
239      if (reverse)
240      {
241        shared_ptr<CLocalElement> voidElement = make_shared<CLocalElement>(commRank, 0, CArray<size_t,1>()) ;
242        shared_ptr<CLocalView> voidView = make_shared<CLocalView>(voidElement, CElementView::FULL, CArray<int,1>()) ;
243
244        for(int i=0;i<srcView_.size();i++) 
245          if (isDstViewDistributed_[i])
246          {
247            if (commRank==0) srcView.push_back(srcView_[i]) ;
248            else srcView.push_back(make_shared<CLocalView>(make_shared<CLocalElement>(commRank, srcView_[i]->getGlobalSize(), CArray<size_t,1>()),
249                                                           CElementView::FULL, CArray<int,1>())) ; // void view
250            dstView.push_back(dstView_[i]) ;
251            indElements.push_back(i) ;
252          }
253       
254        computeGenericMethod(srcView, dstView, indElements) ;
255
256        for(int i=0;i<srcView_.size();i++) 
257          if (isDstViewDistributed_[i])
258          {
259            size_t sizeElement ;
260            int nRank ;
261            if (commRank==0) nRank = elements_[i].size() ;
262            MPI_Bcast(&nRank, 1, MPI_INT, 0, localComm_) ;
263             
264            auto it=elements_[i].begin() ;
265            for(int j=0;j<nRank;j++)
266            {
267              int rank ;
268              size_t sizeElement ;
269              if (commRank==0) { rank = it->first ; sizeElement=it->second.numElements(); }
270              MPI_Bcast(&rank, 1, MPI_INT, 0, localComm_) ;
271              MPI_Bcast(&sizeElement, 1, MPI_SIZE_T, 0, localComm_) ;
272              if (commRank!=0) elements_[i][rank].resize(sizeElement) ;
273              MPI_Bcast(elements_[i][rank].dataFirst(), sizeElement, MPI_SIZE_T, 0, localComm_) ;
274              if (commRank==0) ++it ;
275            }
276          }
277
278        for(auto& it : elements_[indElements[0]]) 
279        {
280          if (it.second.numElements()==0) ranks[it.first] = false ;
281          else  ranks[it.first] = true ;
282        }
283   
284        for(int i=0;i<srcView_.size();i++) 
285          if (!isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
286
287      }
288      else 
289      {
290        computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
291        for(int rank : remoteRanks) ranks[rank]=true ;
292
293        for(int i=0; i<srcView_.size(); i++) 
294        {
295          if (isDstViewDistributed_[i]) computeSrcNonDistributed(i) ;
296          else computeSrcDstNonDistributed(i, ranks) ;
297        }
298      }
299
300    } 
301  //*****************************************************
302    else if (dstViewsNonDistributed)
303    {
304      int commRank, commSize ;
305      MPI_Comm_rank(localComm_,&commRank) ;
306      MPI_Comm_size(localComm_,&commSize) ;
307 
308      map<int,bool> ranks ;
309      if (reverse)
310      {
311        int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
312        ranks[leaderRank] = true ; 
313      }
314      else for(int i=0;i<remoteSize_;i++) ranks[i]=true ;
315     
316      for(int i=0; i<srcView_.size(); i++) 
317      {
318        if (isSrcViewDistributed_[i]) computeDstNonDistributed(i,ranks) ;
319        else computeSrcDstNonDistributed(i,ranks) ;
320      }
321    } 
322  //*****************************************************
323    else
324    {
325      for(int i=0;i<srcView_.size();i++) 
326        if (isSrcViewDistributed_[i] || isDstViewDistributed_[i])
327        {
328          srcView.push_back(srcView_[i]) ;
329          dstView.push_back(dstView_[i]) ;
330          indElements.push_back(i) ;
331        }
332
333      computeGenericMethod(srcView, dstView, indElements) ;
334   
335      map<int,bool> ranks ; 
336      for(auto& it : elements_[indElements[0]]) 
337      {
338        if (it.second.numElements()==0) ranks[it.first] = false ;
339        else  ranks[it.first] = true ;
340      }
341   
342      for(int i=0;i<srcView_.size();i++) 
343        if (!isSrcViewDistributed_[i] && !isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
344    }
345
346  }
347
348 
349/**
350  * \brief Compute the connector for the element \b i when the source view is not distributed.
351  *        After the call element_[i] is defined.
352  *  \param i Indice of the element composing the source grid.
353  */
354
355  void CGridRemoteConnector::computeSrcNonDistributed(int i)
356  {
357    auto& element = elements_[i] ;
358    map<int,CArray<size_t,1>> globalIndexView ;
359    dstView_[i]->getGlobalIndexView(globalIndexView) ;
360   
361    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
362   
363    for(auto& it : globalIndexView)
364    {
365      auto& globalIndex=it.second ;
366      for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
367    }
368   
369    // First we feed the distributed hash map  with key (remote global index)
370    // associated with the value of the remote rank
371    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
372    // after we feed the DHT with the local global indices of the source view
373
374    int commRank, commSize ;
375    MPI_Comm_rank(localComm_,&commRank) ;
376    MPI_Comm_size(localComm_,&commSize) ;
377    CArray<size_t,1> srcIndex ;
378    // like the source view is not distributed, then only the rank 0 need to feed the DHT
379    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
380   
381    // compute the mapping
382    DHT.computeIndexInfoMapping(srcIndex) ;
383    auto& returnInfo = DHT.getInfoIndexMap() ;
384   
385    // returnInfo contains now the map for each global indices to send to a list of remote rank
386    // only for the rank=0 because it is the one to feed the DHT
387    // so it need to send the list to each server leader i.e. the local process that handle specifically one or more
388    // servers
389   
390    // rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
391    vector<vector<size_t>> rankIndGlo(remoteSize_) ;
392    if (commRank==0) 
393      for(auto& it1 : returnInfo)
394        for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
395   
396   
397    vector<MPI_Request> requests ;
398   
399    if (commRank==0)
400    {
401      requests.resize(remoteSize_) ;
402      for(int i=0 ; i<remoteSize_;i++) 
403      {
404        // ok send only the global indices for a server to the "server leader"
405        int rank = getLeaderRank(commSize, remoteSize_, i) ;
406        MPI_Isend(rankIndGlo[i].data(), rankIndGlo[i].size(), MPI_SIZE_T, rank, i ,localComm_, &requests[i]) ;
407      }
408    } 
409   
410    list<int> remoteRanks;
411    list<int> notUsed ;
412    // I am a server leader of which remote ranks ?
413    computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
414
415    for(auto remoteRank : remoteRanks)
416    {
417      MPI_Status status ;
418      int size ;
419      MPI_Probe(0,remoteRank,localComm_, &status);
420      MPI_Get_count(&status, MPI_SIZE_T, &size) ;
421      elements_[i][remoteRank].resize(size) ;
422      // for each remote ranks receive the global indices from proc 0
423      MPI_Recv(elements_[i][remoteRank].dataFirst(),size, MPI_SIZE_T,0,remoteRank, localComm_,&status) ;
424    }
425     
426    if (commRank==0)
427    {
428      vector<MPI_Status> status(remoteSize_) ;
429      // asynchronous for sender, wait for completion
430      MPI_Waitall(remoteSize_, requests.data(), status.data()) ;
431    }
432  }
433
434/**
435  * \brief Compute the connector for the element \b i when the source view is not distributed.
436  *        After the call element_[i] is defined.
437  *  \param i Indice of the element composing the source grid.
438  */
439
440  void CGridRemoteConnector::computeSrcNonDistributedReverse(int i)
441  {
442    auto& element = elements_[i] ;
443    map<int,CArray<size_t,1>> globalIndexView ;
444    dstView_[i]->getGlobalIndexView(globalIndexView) ;
445   
446    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
447   
448    for(auto& it : globalIndexView)
449    {
450      auto& globalIndex=it.second ;
451      for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
452    }
453   
454    // First we feed the distributed hash map  with key (remote global index)
455    // associated with the value of the remote rank
456    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
457    // after we feed the DHT with the local global indices of the source view
458
459    int commRank, commSize ;
460    MPI_Comm_rank(localComm_,&commRank) ;
461    MPI_Comm_size(localComm_,&commSize) ;
462    CArray<size_t,1> srcIndex ;
463    // like the source view is not distributed, then only the rank 0 need to feed the DHT
464    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
465   
466    // compute the mapping
467    DHT.computeIndexInfoMapping(srcIndex) ;
468    auto& returnInfo = DHT.getInfoIndexMap() ;
469   
470    // returnInfo contains now the map for each global indices to send to a list of remote rank
471    // only for the rank=0 because it is the one to feed the DHT
472    // so it need to send the list to each server leader i.e. the local process that handle specifically one or more
473    // servers
474   
475    // rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
476    vector<vector<size_t>> rankIndGlo(remoteSize_) ;
477    if (commRank==0) 
478      for(auto& it1 : returnInfo)
479        for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
480   
481   // bcast the same for each client
482   for(int remoteRank=0 ; remoteRank<remoteSize_ ; remoteRank++)
483   {
484      int remoteDataSize ;
485      if (commRank==0) remoteDataSize = rankIndGlo[remoteRank].size() ;
486      MPI_Bcast(&remoteDataSize, 1, MPI_INT, 0, localComm_) ;
487
488      auto& element = elements_[i][remoteRank] ;
489      element.resize(remoteDataSize) ;
490      if (commRank==0) for(int j=0 ; j<remoteDataSize; j++) element(j)=rankIndGlo[remoteRank][j] ;
491      MPI_Bcast(element.dataFirst(), remoteDataSize, MPI_SIZE_T, 0, localComm_) ;
492   }
493  }
494
495
496
497  /**
498   * \brief Compute the remote connector for the element \b i when the remote view is not distributed.
499   *        After the call,  element_[i] is defined.
500   * \param i Indice of the element composing the remote grid.
501   * \param ranks The list of rank for which the local proc is in charge to compute the connector
502   *              (if leader server for exemple). if ranks[rank] == false the corresponding elements_
503   *              is set to void array (no data to sent) just in order to notify corresponding remote server
504   *              that the call is collective with each other one 
505   */
506  void CGridRemoteConnector::computeDstNonDistributed(int i, map<int,bool>& ranks)
507  {
508    auto& element = elements_[i] ;
509    map<int,CArray<size_t,1>> globalIndexView ;
510    dstView_[i]->getGlobalIndexView(globalIndexView) ;
511   
512   
513    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
514 
515    // First we feed the distributed hash map  with key (remote global index)
516    // associated with the value of the remote rank
517    for(auto& it : globalIndexView)
518      if (it.first==0) // since the remote view is not distributed, insert only the remote rank 0
519      {
520        auto& globalIndex=it.second ;
521        for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
522      }
523   
524    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
525    // after we feed the DHT with the local global indices of the source view
526
527    CArray<size_t,1> srcIndex ;
528    srcView_[i]->getGlobalIndexView(srcIndex) ;
529    DHT.computeIndexInfoMapping(srcIndex) ;
530    auto& returnInfo = DHT.getInfoIndexMap() ;
531   
532    // returnInfo contains now the map for each global indices to send to a list of remote rank
533    // now construct the element_ list of global indices for each rank in my list except if the erray must be empty
534    for (auto& rank : ranks)
535    {
536      if (rank.second) // non empty array => for rank that have not any data to be received
537      {
538        int size=0 ;
539        for(auto& it : returnInfo) if (!it.second.empty()) size++ ;
540        auto& array = element[rank.first] ;
541       array.resize(size) ;
542       size=0 ;
543       for(auto& it : returnInfo) 
544         if (!it.second.empty()) 
545         {
546           array(size)=it.first ;
547           size++ ;
548         }
549      }
550      else element[rank.first] = CArray<size_t,1>(0) ;  // empty array => for rank that have not any data to be received
551    }
552  }
553
554 /**
555  * \brief Compute the remote connector for the element \b i when the source and the remote view are not distributed.
556  *        After the call, element_[i] is defined.
557  * \param i Indice of the element composing the remote grid.
558  * \param ranks The list of rank for which the local proc is in charge to compute the connector
559  *              (if leader server for exemple). if ranks[rank] == false the corresponding elements_
560  *              is set to void array (no data to sent) just in order to notify corresponding remote server
561  *              that the call is collective with each other one 
562  */
563
564  void CGridRemoteConnector::computeSrcDstNonDistributed(int i, map<int,bool>& ranks)
565  {
566    auto& element = elements_[i] ;
567    map<int,CArray<size_t,1>> globalIndexView ;
568    dstView_[i]->getGlobalIndexView(globalIndexView) ;
569   
570   
571    CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
572    // First we feed the distributed hash map  with key (remote global index)
573    // associated with the value of the remote rank
574
575    for(auto& it : globalIndexView)
576      if (it.first==0) // insert only the remote rank 0 since the remote view is not distributed
577      {
578        auto& globalIndex=it.second ;
579        for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
580      }
581   
582    CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
583    // after we feed the DHT with the local global indices of the source view
584
585    int commRank, commSize ;
586    MPI_Comm_rank(localComm_,&commRank) ;
587    MPI_Comm_size(localComm_,&commSize) ;
588    CArray<size_t,1> srcIndex ;
589 
590    // like the source view is not distributed, then only the rank 0 need to feed the DHT
591    if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
592    DHT.computeIndexInfoMapping(srcIndex) ;
593    auto& returnInfo = DHT.getInfoIndexMap() ;
594   
595    vector<size_t> indGlo ;
596    if (commRank==0) 
597      for(auto& it1 : returnInfo) 
598        for(auto& it2 : it1.second) indGlo.push_back(it1.first) ;
599
600    // now local rank 0 know which indices to send to remote rank 0, but all the server
601    // must receive the same information. So only the leader rank will sent this.
602    // So local rank 0 must broadcast the information to all leader.
603    // for this we create a new communicator composed of local process that must send data
604    // to a remote rank, data are broadcasted, and element_[i] is construction for each remote
605    // rank in charge
606    int color=0 ;
607    if (ranks.empty()) color=0 ;
608    else color=1 ;
609    if (commRank==0) color=1 ;
610    MPI_Comm newComm ;
611    MPI_Comm_split(localComm_, color, commRank, &newComm) ;
612    if (color==1)
613    {
614      // ok, I am part of the process that must send something to one or more remote server
615      // so I get the list of global indices from rank 0
616      int dataSize ;
617      if (commRank==0) dataSize=indGlo.size() ;
618      MPI_Bcast(&dataSize,1,MPI_INT, 0, newComm) ;
619      indGlo.resize(dataSize) ;
620      MPI_Bcast(indGlo.data(),dataSize,MPI_SIZE_T,0,newComm) ;
621    }
622    MPI_Comm_free(&newComm) ;
623
624    // construct element_[i] from indGlo
625    for(auto& rank : ranks)
626    {
627      if (rank.second)
628      {
629        int dataSize=indGlo.size();
630        auto& element = elements_[i][rank.first] ;
631        element.resize(dataSize) ;
632        for(int i=0;i<dataSize; i++) element(i)=indGlo[i] ;
633      }
634      else element[rank.first] = CArray<size_t,1>(0) ;
635    }   
636
637  }
638
639
640 /**
641  * \brief Generic method the compute the grid remote connector. Only distributed elements are specifed in the source view and remote view.
642  *        Connector for non distributed elements are computed separatly to improve performance and memory consumption. After the call,
643  *        \b elements_  is defined.
644  *  \param srcView List of the source views composing the grid, without non distributed views
645  *  \param dstView List of the remote views composing the grid, without non distributed views
646  *  \param indElements Index of the view making the correspondance between all views and views distributed (that are in input)
647  */
648  void CGridRemoteConnector::computeGenericMethod(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, vector<int>& indElements)
649  {
650    // generic method, every element can be distributed
651    int nDst = dstView.size() ;
652    vector<size_t> dstSliceSize(nDst) ;
653    dstSliceSize[0] = 1 ; 
654    for(int i=1; i<nDst; i++)  dstSliceSize[i] = dstView[i-1]->getGlobalSize()*dstSliceSize[i-1] ;
655 
656    CClientClientDHTTemplate<int>::Index2VectorInfoTypeMap dataInfo ;
657    CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap info ; // info map
658
659    // first, we need to feed the DHT with the global index of the remote server
660    // for that :
661    // First the first element insert the in a DHT with key as the rank and value the list of global index associated
662    // Then get the previously stored index associate with the remote rank I am in charge and reinsert the global index
663    // corresponding to the position of the element in the remote view suing tensorial product
664    // finaly we get only the list of remote global index I am in charge for the whole remote grid   
665
666    for(int pos=0; pos<nDst; pos++)
667    {
668      size_t sliceSize=dstSliceSize[pos] ;
669      map<int,CArray<size_t,1>> globalIndexView ;
670      dstView[pos]->getGlobalIndexView(globalIndexView) ;
671     
672      CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap lastInfo(info) ;
673
674      if (pos>0)
675      {
676        CArray<size_t,1> ranks(globalIndexView.size()) ;
677        auto it=globalIndexView.begin() ;
678        for(int i=0 ; i<ranks.numElements();i++,it++) ranks(i)=it->first ;
679        CClientClientDHTTemplate<size_t> dataRanks(info, localComm_) ;
680        dataRanks.computeIndexInfoMapping(ranks) ;
681        lastInfo = dataRanks.getInfoIndexMap() ;
682      }
683     
684      info.clear() ;
685      for(auto& it : globalIndexView)
686      {
687        int rank = it.first ;
688        auto& globalIndex = it.second ;
689        auto& inf = info[rank] ;
690        if (pos==0) for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)) ;
691        else
692        {
693          auto& lastGlobalIndex = lastInfo[rank] ;
694          for(size_t lastGlobalInd : lastGlobalIndex)
695          {
696            for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)*sliceSize+lastGlobalInd) ;
697          }
698        } 
699      }
700
701      if (pos==nDst-1)
702      {
703         for(auto& it : info)
704         {
705           int rank=it.first ;
706           auto& globalIndex = it.second ;
707           for(auto globalInd : globalIndex) dataInfo[globalInd].push_back(rank) ;
708         }
709      } 
710    }
711
712    // we feed the DHT with the remote global index
713    CClientClientDHTTemplate<int> dataRanks(dataInfo, localComm_) ;
714
715    // generate list of global index for src view
716    int nSrc = srcView.size() ;
717    vector<size_t> srcSliceSize(nSrc) ;
718   
719    srcSliceSize[0] = 1 ; 
720    for(int i=1; i<nSrc; i++)  srcSliceSize[i] = srcView[i-1]->getGlobalSize()*srcSliceSize[i-1] ;
721
722    vector<size_t> srcGlobalIndex ;
723    size_t sliceIndex=0 ;
724    srcView[nSrc-1]->getGlobalIndex(srcGlobalIndex, sliceIndex, srcSliceSize.data(), srcView.data(), nSrc-1) ;
725    // now we have the global index of the source grid in srcGlobalIndex
726    // we feed the DHT with the src global index (if we have)
727    if (srcGlobalIndex.size()>0)
728    {
729      CArray<size_t,1> srcGlobalIndexArray(srcGlobalIndex.data(), shape(srcGlobalIndex.size()),neverDeleteData) ;
730      dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
731    }
732    else
733    {
734      CArray<size_t,1> srcGlobalIndexArray ;
735      dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
736    }
737    const auto& returnInfo = dataRanks.getInfoIndexMap() ;
738    // returnInfo contains now the map for each global indices to send to a list of remote rank
739    // but we want to use the tensorial product property to get the same information using only global
740    // index of element view. So the idea is to reverse the information : for a global index of the grid
741    // to send to the remote server, what is the global index of each element composing the grid ?
742
743    vector<map<int, set<size_t>>> elements(nSrc) ; // internal representation of elements composing the grid
744
745    for(auto& indRanks : returnInfo)
746    {
747      size_t gridIndexGlo=indRanks.first ;
748      auto& ranks = indRanks.second ;
749      for(int i=nSrc-1; i>=0; i--)
750      {
751        auto& element = elements[i] ;
752        size_t localIndGlo = gridIndexGlo / srcSliceSize[i] ;
753        gridIndexGlo = gridIndexGlo % srcSliceSize[i] ;
754        for(int rank : ranks) element[rank].insert(localIndGlo) ;
755      }
756    }
757
758//    elements_.resize(nSrc) ;
759    for(int i=0 ; i<nSrc; i++)
760    {
761      auto& element=elements[i] ;
762      for(auto& rankInd : element)
763      {
764        int rank=rankInd.first ;
765        set<size_t>& indGlo = rankInd.second ;
766        CArray<size_t,1>& indGloArray = elements_[indElements[i]][rank] ;
767        indGloArray.resize(indGlo.size()) ;
768        int j=0 ;
769        for (auto index : indGlo) { indGloArray(j) = index ; j++; }
770      }
771    }
772   
773    // So what about when there is some server that have no data to receive
774    // they must be inform they receive an event with no data.
775    // So find remote servers with no data, and one client will take in charge
776    // that it receive global index with no data (0-size)
777    vector<int> ranks(remoteSize_,0) ;
778    for(auto& it : elements_[indElements[0]]) ranks[it.first] = 1 ;
779    MPI_Allreduce(MPI_IN_PLACE, ranks.data(), remoteSize_, MPI_INT, MPI_SUM, localComm_) ;
780    int commRank, commSize ;
781    MPI_Comm_rank(localComm_,&commRank) ;
782    MPI_Comm_size(localComm_,&commSize) ;
783    int pos=0 ;
784    for(int i=0; i<remoteSize_ ; i++)
785      if (ranks[i]==0)
786      {
787        if (pos%commSize==commRank) 
788          for(int j=0 ; j<nSrc; j++) elements_[indElements[j]][i] = CArray<size_t,1>(0) ;
789        pos++ ;
790      }
791  }
792
793 /**
794  * \brief Once the connector is computed (compute \b elements_), redondant data can be avoid to be sent to the server.
795  *        This call compute the redondant rank and store them in \b rankToRemove_ attribute.
796  *        The goal of this method is to make a hash of each block of indice that determine wich data to send to a
797  *        of a specific server rank using a hash method. So data to send to a rank is associated to a hash.
798  *        After we compare hash between local rank and remove redondant data corresponding to the same hash.
799  */
800  void CGridRemoteConnector::computeRedondantRanks(bool reverse)
801  {
802    int commRank ;
803    MPI_Comm_rank(localComm_,&commRank) ;
804
805    set<int> ranks;
806    for(auto& element : elements_) 
807      for(auto& it : element) ranks.insert(it.first) ;
808
809    for(auto& element : elements_)
810      for(auto& it : element) 
811        if (ranks.count(it.first)==0) ERROR("void CGridRemoteConnector::removeRedondantRanks(void)",<<"number of ranks in elements is not coherent between each element") ;
812   
813    HashXIOS<size_t> hashGlobalIndex;
814   
815    map<int,size_t> hashRanks ;
816    for(auto& element : elements_) 
817      for(auto& it : element)
818      {
819        auto& globalIndex=it.second ;
820        int rank=it.first ;
821        size_t hash ;
822        hash=0 ;
823        for(int i=0; i<globalIndex.numElements(); i++) hash+=hashGlobalIndex(globalIndex(i)) ;
824        if (globalIndex.numElements()>0)
825        {
826          if (hashRanks.count(rank)==0) hashRanks[rank]=hash ;
827          else hashRanks[rank]=hashGlobalIndex.hashCombine(hashRanks[rank],hash) ;
828        }
829      }
830   
831    if (reverse)
832    {
833      set<size_t> hashs ;
834      //easy because local
835      for(auto& hashRank : hashRanks)
836      {
837        if (hashs.count(hashRank.second)==0) hashs.insert(hashRank.second) ;
838        else rankToRemove_.insert(hashRank.first) ;
839      }
840   
841    }
842    else
843    {
844      // a hash is now computed for data block I will sent to the server.
845
846      CClientClientDHTTemplate<int>::Index2InfoTypeMap info ;
847 
848      map<size_t,int> hashRank ;
849      HashXIOS<int> hashGlobalIndexRank;
850      for(auto& it : hashRanks) 
851      {
852        it.second = hashGlobalIndexRank.hashCombine(it.first,it.second) ; 
853        info[it.second]=commRank ;
854        hashRank[it.second]=it.first ;
855      }
856
857      // we feed a DHT map with key : hash, value : myrank
858      CClientClientDHTTemplate<int> dataHash(info, localComm_) ;
859      CArray<size_t,1> hashList(hashRank.size()) ;
860   
861      int i=0 ;
862      for(auto& it : hashRank) { hashList(i)=it.first ; i++; }
863
864      // now who are the ranks that have the same hash : feed the DHT with my list of hash
865      dataHash.computeIndexInfoMapping(hashList) ;
866      auto& hashRankList = dataHash.getInfoIndexMap() ;
867   
868
869      for(auto& it : hashRankList)
870      {
871        size_t hash = it.first ;
872        auto& ranks = it.second ;
873     
874        bool first=true ;
875        // only the process with the lowest rank get in charge of sendinf data to remote server
876        for(int rank : ranks) if (commRank>rank) first=false ;
877        if (!first) rankToRemove_.insert(hashRank[hash]) ;
878      }
879    }
880  }
881 
882}
Note: See TracBrowser for help on using the repository browser.