source: XIOS3/branches/xios-3.0-beta/src/transport/context_client.cpp.old @ 2527

Last change on this file since 2527 was 2343, checked in by ymipsl, 2 years ago
  • Implement new infrastructure for transfert protocol.
  • new purelly one sided protocol is now available, the previous protocol (legacy, mix send/recv and one sided) is still available. Other specific protocol could be implemented more easilly in future.
  • switch can be operate with "transport_protocol" variable in XIOS context :

ex:
<variable id="transport_protocol" type="string">one_sided</variable>

Available protocols are : one_sided, legacy or default. The default protocol is "legacy".

YM

  • Property svn:executable set to *
File size: 18.0 KB
Line 
1#include "xios_spl.hpp"
2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
10#include "mpi.hpp"
11#include "timer.hpp"
12#include "cxios.hpp"
13#include "server.hpp"
14#include "services.hpp"
15#include <boost/functional/hash.hpp>
16#include <random>
17#include <chrono>
18
19namespace xios
20{
21    /*!
22    \param [in] parent Pointer to context on client side
23    \param [in] intraComm_ communicator of group client
24    \param [in] interComm_ communicator of group server
25    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode).
26    */
27    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
28     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4), associatedServer_(nullptr)
29    {
30     
31      context_ = parent;
32      intraComm = intraComm_;
33      interComm = interComm_;
34      MPI_Comm_rank(intraComm, &clientRank);
35      MPI_Comm_size(intraComm, &clientSize);
36
37      int flag;
38      MPI_Comm_test_inter(interComm, &flag);
39      if (flag) isAttached_=false ;
40      else  isAttached_=true ;
41
42      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
43      if (isAttachedModeEnabled()) pureOneSided=false ; // no one sided in attach mode
44     
45
46
47      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
48      else  MPI_Comm_size(interComm, &serverSize);
49
50      computeLeader(clientRank, clientSize, serverSize, ranksServerLeader, ranksServerNotLeader);
51
52      if (flag) MPI_Intercomm_merge(interComm_,false, &interCommMerged_) ;
53     
54      MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf_) ; // for windows
55
56      auto time=chrono::system_clock::now().time_since_epoch().count() ;
57      std::default_random_engine rd(time); // not reproducible from a run to another
58      std::uniform_int_distribution<size_t> dist;
59      hashId_=dist(rd) ;
60      MPI_Bcast(&hashId_,1,MPI_SIZE_T,0,intraComm) ; // Bcast to all server of the context
61
62      timeLine = 1;
63    }
64
65    void CContextClient::computeLeader(int clientRank, int clientSize, int serverSize,
66                                       std::list<int>& rankRecvLeader,
67                                       std::list<int>& rankRecvNotLeader)
68    {
69      if ((0 == clientSize) || (0 == serverSize)) return;
70
71      if (clientSize < serverSize)
72      {
73        int serverByClient = serverSize / clientSize;
74        int remain = serverSize % clientSize;
75        int rankStart = serverByClient * clientRank;
76
77        if (clientRank < remain)
78        {
79          serverByClient++;
80          rankStart += clientRank;
81        }
82        else
83          rankStart += remain;
84
85        for (int i = 0; i < serverByClient; i++)
86          rankRecvLeader.push_back(rankStart + i);
87
88        rankRecvNotLeader.resize(0);
89      }
90      else
91      {
92        int clientByServer = clientSize / serverSize;
93        int remain = clientSize % serverSize;
94
95        if (clientRank < (clientByServer + 1) * remain)
96        {
97          if (clientRank % (clientByServer + 1) == 0)
98            rankRecvLeader.push_back(clientRank / (clientByServer + 1));
99          else
100            rankRecvNotLeader.push_back(clientRank / (clientByServer + 1));
101        }
102        else
103        {
104          int rank = clientRank - (clientByServer + 1) * remain;
105          if (rank % clientByServer == 0)
106            rankRecvLeader.push_back(remain + rank / clientByServer);
107          else
108            rankRecvNotLeader.push_back(remain + rank / clientByServer);
109        }
110      }
111    }
112
113    /*!
114    In case of attached mode, the current context must be reset to context for client
115    \param [in] event Event sent to server
116    */
117
118    void CContextClient::sendEvent(CEventClient& event)
119    {
120      list<int> ranks = event.getRanks();
121 
122//      ostringstream str ;
123//      for(auto& rank : ranks) str<<rank<<" ; " ;
124//      info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<"  for ranks : "<<str.str()<<endl ;
125
126      if (CXios::checkEventSync)
127      {
128        int typeId, classId, typeId_in, classId_in;
129        long long timeLine_out;
130        long long timeLine_in( timeLine );
131        typeId_in=event.getTypeId() ;
132        classId_in=event.getClassId() ;
133//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
134        MPI_Allreduce(&timeLine_in,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ;
135        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
136        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
137        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
138        {
139           ERROR("void CContextClient::sendEvent(CEventClient& event)",
140               << "Event are not coherent between client for timeline = "<<timeLine);
141        }
142       
143        vector<int> servers(serverSize,0) ;
144        auto ranks=event.getRanks() ;
145        for(auto& rank : ranks) servers[rank]=1 ;
146        MPI_Allreduce(MPI_IN_PLACE, servers.data(), serverSize,MPI_INT,MPI_SUM,intraComm) ;
147        ostringstream osstr ;
148        for(int i=0;i<serverSize;i++)  if (servers[i]==0) osstr<<i<<" , " ;
149        if (!osstr.str().empty())
150        {
151          ERROR("void CContextClient::sendEvent(CEventClient& event)",
152                 <<" Some servers will not receive the message for timeline = "<<timeLine<<endl
153                 <<"Servers are : "<<osstr.str()) ;
154        }
155
156
157      }
158
159      if (!event.isEmpty())
160      {
161        list<int> sizes = event.getSizes();
162
163         // We force the getBuffers call to be non-blocking on classical servers
164        list<CBufferOut*> buffList;
165        getBuffers(timeLine, ranks, sizes, buffList) ;
166
167        event.send(timeLine, sizes, buffList);
168       
169        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
170
171        unlockBuffers(ranks) ;
172        checkBuffers(ranks);
173       
174      }
175     
176      if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
177      {
178        while (checkBuffers(ranks)) callGlobalEventLoop() ;
179     
180        CXios::getDaemonsManager()->scheduleContext(hashId_) ;
181        while (CXios::getDaemonsManager()->isScheduledContext(hashId_)) callGlobalEventLoop() ;
182      }
183     
184      timeLine++;
185    }
186
187 
188    /*!
189     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
190     * it is explicitly requested to be non-blocking.
191     *
192     *
193     * \param [in] timeLine time line of the event which will be sent to servers
194     * \param [in] serverList list of rank of connected server
195     * \param [in] sizeList size of message corresponding to each connection
196     * \param [out] retBuffers list of buffers that can be used to store an event
197     * \param [in] nonBlocking whether this function should be non-blocking
198     * \return whether the already allocated buffers could be used
199    */
200    bool CContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers,
201                                    bool nonBlocking /*= false*/)
202    {
203      list<int>::const_iterator itServer, itSize;
204      list<CClientBuffer*> bufferList;
205      map<int,CClientBuffer*>::const_iterator it;
206      list<CClientBuffer*>::iterator itBuffer;
207      bool areBuffersFree;
208
209      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
210      {
211        it = buffers.find(*itServer);
212        if (it == buffers.end())
213        {
214          newBuffer(*itServer);
215          it = buffers.find(*itServer);
216        }
217        bufferList.push_back(it->second);
218      }
219
220      double lastTimeBuffersNotFree=0. ;
221      double time ;
222      bool doUnlockBuffers ;
223      CTimer::get("Blocking time").resume();
224      do
225      {
226        areBuffersFree = true;
227        doUnlockBuffers=false ;
228        time=MPI_Wtime() ;
229        if (time-lastTimeBuffersNotFree > latency_)
230        {
231          for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
232          {
233            areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
234          }
235          if (!areBuffersFree)
236          {
237            lastTimeBuffersNotFree = time ;
238            doUnlockBuffers=true ;
239          }         
240        }
241        else areBuffersFree = false ;
242
243        if (!areBuffersFree)
244        {
245          if (doUnlockBuffers) for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
246          checkBuffers();
247
248          callGlobalEventLoop() ;
249        }
250
251      } while (!areBuffersFree && !nonBlocking);
252      CTimer::get("Blocking time").suspend();
253
254      if (areBuffersFree)
255      {
256        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
257          retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
258      }
259      return areBuffersFree;
260   }
261
262   void CContextClient::eventLoop(void)
263   {
264      if (!locked_) checkBuffers() ;
265   }
266
267   void CContextClient::callGlobalEventLoop(void)
268   {
269     locked_=true ;
270     context_->globalEventLoop() ;
271     locked_=false ;
272   }
273   /*!
274   Make a new buffer for a certain connection to server with specific rank
275   \param [in] rank rank of connected server
276   */
277   void CContextClient::newBuffer(int rank)
278   {
279      if (!mapBufferSize_.count(rank))
280      {
281        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
282        mapBufferSize_[rank] = CXios::minBufferSize;
283        maxEventSizes[rank] = CXios::minBufferSize;
284      }
285     
286      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, rank, mapBufferSize_[rank], maxEventSizes[rank]);
287      if (isGrowableBuffer_) buffer->setGrowableBuffer(1.2) ;
288      else buffer->fixBuffer() ;
289      // Notify the server
290      CBufferOut* bufOut = buffer->getBuffer(0, 4*sizeof(MPI_Aint));
291      MPI_Aint sendBuff[4] ;
292      sendBuff[0]=hashId_;
293      sendBuff[1]=mapBufferSize_[rank];
294      sendBuff[2]=buffers[rank]->getWinAddress(0);
295      sendBuff[3]=buffers[rank]->getWinAddress(1);
296      info(100)<<"CContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinAddress(1)<<endl;
297      bufOut->put(sendBuff, 4);
298      buffer->checkBuffer(true);
299     
300       // create windows dynamically for one-sided
301      if (!isAttachedModeEnabled())
302      {
303        CTimer::get("create Windows").resume() ;
304        MPI_Comm interComm ;
305        MPI_Intercomm_create(commSelf_, 0, interCommMerged_, clientSize+rank, 0, &interComm) ;
306        MPI_Intercomm_merge(interComm, false, &winComm_[rank]) ;
307        CXios::getMpiGarbageCollector().registerCommunicator(winComm_[rank]) ;
308        MPI_Comm_free(&interComm) ;
309        windows_[rank].resize(2) ;
310       
311        MPI_Win_create_dynamic(MPI_INFO_NULL, winComm_[rank], &windows_[rank][0]);
312        CXios::getMpiGarbageCollector().registerWindow(windows_[rank][0]) ;
313       
314        MPI_Win_create_dynamic(MPI_INFO_NULL, winComm_[rank], &windows_[rank][1]);   
315        CXios::getMpiGarbageCollector().registerWindow(windows_[rank][1]) ;
316
317        CTimer::get("create Windows").suspend() ;
318      }
319      else
320      {
321        winComm_[rank] = MPI_COMM_NULL ;
322        windows_[rank].resize(2) ;
323        windows_[rank][0] = MPI_WIN_NULL ;
324        windows_[rank][1] = MPI_WIN_NULL ;
325      }
326      buffer->attachWindows(windows_[rank]) ;
327      if (!isAttachedModeEnabled()) MPI_Barrier(winComm_[rank]) ;
328       
329   }
330
331   /*!
332   Verify state of buffers. Buffer is under pending state if there is no message on it
333   \return state of buffers, pending(true), ready(false)
334   */
335   bool CContextClient::checkBuffers(void)
336   {
337      map<int,CClientBuffer*>::iterator itBuff;
338      bool pending = false;
339      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
340        pending |= itBuff->second->checkBuffer(!pureOneSided);
341      return pending;
342   }
343
344   //! Release all buffers
345   void CContextClient::releaseBuffers()
346   {
347      map<int,CClientBuffer*>::iterator itBuff;
348      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
349      {
350         delete itBuff->second;
351      }
352      buffers.clear();
353
354// don't know when release windows
355
356      //if (!isAttachedModeEnabled())
357      //{ 
358      //  for(auto& it : winComm_)
359      //  {
360      //    int rank = it.first ;
361      //    MPI_Win_free(&windows_[rank][0]);
362      //    MPI_Win_free(&windows_[rank][1]);
363      //    MPI_Comm_free(&winComm_[rank]) ;
364      //  }
365      //}
366   }
367
368     
369  /*!
370   Lock the buffers for one sided communications
371   \param [in] ranks list rank of server to which client connects to
372   */
373   void CContextClient::lockBuffers(list<int>& ranks)
374   {
375      list<int>::iterator it;
376      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
377   }
378
379  /*!
380   Unlock the buffers for one sided communications
381   \param [in] ranks list rank of server to which client connects to
382   */
383   void CContextClient::unlockBuffers(list<int>& ranks)
384   {
385      list<int>::iterator it;
386      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
387   }
388     
389   /*!
390   Verify state of buffers corresponding to a connection
391   \param [in] ranks list rank of server to which client connects to
392   \return state of buffers, pending(true), ready(false)
393   */
394   bool CContextClient::checkBuffers(list<int>& ranks)
395   {
396      list<int>::iterator it;
397      bool pending = false;
398      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
399      return pending;
400   }
401
402   /*!
403    * Set the buffer size for each connection. Warning: This function is collective.
404    *
405    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
406    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
407   */
408   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize)
409   {
410     setFixedBuffer() ;
411     for(auto& it : mapSize)
412     {
413      size_t size=std::max(CXios::minBufferSize*1.0,std::min(it.second*CXios::bufferSizeFactor*1.01,CXios::maxBufferSize*1.0)) ;
414      mapBufferSize_[it.first]=size ;
415      if (buffers.count(it.first)>0) buffers[it.first]->fixBufferSize(size);
416     }
417   }
418
419  /*!
420  Get leading server in the group of connected server
421  \return ranks of leading servers
422  */
423  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
424  {
425    return ranksServerNotLeader;
426  }
427
428  /*!
429  Check if client connects to leading server
430  \return connected(true), not connected (false)
431  */
432  bool CContextClient::isServerNotLeader(void) const
433  {
434    return !ranksServerNotLeader.empty();
435  }
436
437  /*!
438  Get leading server in the group of connected server
439  \return ranks of leading servers
440  */
441  const std::list<int>& CContextClient::getRanksServerLeader(void) const
442  {
443    return ranksServerLeader;
444  }
445
446  /*!
447  Check if client connects to leading server
448  \return connected(true), not connected (false)
449  */
450  bool CContextClient::isServerLeader(void) const
451  {
452    return !ranksServerLeader.empty();
453  }
454
455   /*!
456   * Finalize context client and do some reports. Function is non-blocking.
457   */
458  void CContextClient::finalize(void)
459  {
460    map<int,CClientBuffer*>::iterator itBuff;
461    std::list<int>::iterator ItServerLeader;
462   
463    bool stop = false;
464
465    int* nbServerConnectionLocal  = new int[serverSize] ;
466    int* nbServerConnectionGlobal  = new int[serverSize] ;
467    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
468    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
469    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
470   
471    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
472   
473    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
474    CMessage msg;
475
476    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
477    sendEvent(event);
478
479    delete[] nbServerConnectionLocal ;
480    delete[] nbServerConnectionGlobal ;
481
482
483    CTimer::get("Blocking time").resume();
484    checkBuffers();
485    CTimer::get("Blocking time").suspend();
486
487    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
488                                          iteMap = mapBufferSize_.end(), itMap;
489
490    StdSize totalBuf = 0;
491    for (itMap = itbMap; itMap != iteMap; ++itMap)
492    {
493      report(10) << " Memory report : Context <" << context_->getId() << "> : client side : memory used for buffer of each connection to server" << endl
494                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
495      totalBuf += itMap->second;
496    }
497    report(0) << " Memory report : Context <" << context_->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
498
499  }
500
501
502  /*!
503  */
504  bool CContextClient::havePendingRequests(void)
505  {
506    bool pending = false;
507    map<int,CClientBuffer*>::iterator itBuff;
508    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
509      pending |= itBuff->second->hasPendingRequest();
510    return pending;
511  }
512 
513  bool CContextClient::havePendingRequests(list<int>& ranks)
514  {
515      list<int>::iterator it;
516      bool pending = false;
517      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->hasPendingRequest();
518      return pending;
519  }
520
521  bool CContextClient::isNotifiedFinalized(void)
522  {
523    if (isAttachedModeEnabled()) return true ;
524
525    bool finalized = true;
526    map<int,CClientBuffer*>::iterator itBuff;
527    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
528      finalized &= itBuff->second->isNotifiedFinalized();
529    return finalized;
530  }
531
532}
Note: See TracBrowser for help on using the repository browser.