source: XIOS/dev/dev_ym/XIOS_ONE_SIDED/src/context_client.cpp @ 1757

Last change on this file since 1757 was 1757, checked in by ymipsl, 5 years ago

Implement one sided communication in client/server protocol to avoid dead-lock when some buffer are full.

YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 16.9 KB
RevLine 
[591]1#include "xios_spl.hpp"
[300]2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
[382]10#include "mpi.hpp"
[347]11#include "timer.hpp"
[401]12#include "cxios.hpp"
[1130]13#include "server.hpp"
[300]14
[335]15namespace xios
[300]16{
[512]17    /*!
18    \param [in] parent Pointer to context on client side
19    \param [in] intraComm_ communicator of group client
20    \param [in] interComm_ communicator of group server
[983]21    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode).
[512]22    */
[1639]23    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
[917]24     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4)
[300]25    {
[1757]26      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
27      if (isAttachedModeEnabled()) pureOneSided=false ; // no one sided in attach mode
28     
[595]29      context = parent;
30      intraComm = intraComm_;
31      interComm = interComm_;
[1639]32      MPI_Comm_rank(intraComm, &clientRank);
33      MPI_Comm_size(intraComm, &clientSize);
[509]34
[595]35      int flag;
[1639]36      MPI_Comm_test_inter(interComm, &flag);
37      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
38      else  MPI_Comm_size(interComm, &serverSize);
[509]39
[1232]40      computeLeader(clientRank, clientSize, serverSize, ranksServerLeader, ranksServerNotLeader);
41
[1757]42      if (flag) MPI_Intercomm_merge(interComm_,false,&interCommMerged) ;
43     
44      if (!isAttachedModeEnabled())
45      { 
46        windows.resize(serverSize) ;
47        MPI_Comm winComm ;
48        for(int rank=0; rank<serverSize; rank++)
49        {
50          windows[rank].resize(2) ;
51          MPI_Comm_split(interCommMerged, rank, clientRank, &winComm);
52          int myRank ;
53          MPI_Comm_rank(winComm,&myRank);
54          MPI_Win_create_dynamic(MPI_INFO_NULL, winComm, &windows[rank][0]);
55          MPI_Win_create_dynamic(MPI_INFO_NULL, winComm, &windows[rank][1]);
56          MPI_Comm_free(&winComm) ;
57        }
58      }
59
60      MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf) ;
61
62      timeLine = 1;
[1232]63    }
64
65    void CContextClient::computeLeader(int clientRank, int clientSize, int serverSize,
66                                       std::list<int>& rankRecvLeader,
67                                       std::list<int>& rankRecvNotLeader)
68    {
69      if ((0 == clientSize) || (0 == serverSize)) return;
70
[595]71      if (clientSize < serverSize)
72      {
73        int serverByClient = serverSize / clientSize;
74        int remain = serverSize % clientSize;
75        int rankStart = serverByClient * clientRank;
[300]76
[595]77        if (clientRank < remain)
78        {
79          serverByClient++;
80          rankStart += clientRank;
81        }
82        else
83          rankStart += remain;
84
85        for (int i = 0; i < serverByClient; i++)
[1232]86          rankRecvLeader.push_back(rankStart + i);
[1021]87
[1232]88        rankRecvNotLeader.resize(0);
[1158]89      }
[595]90      else
91      {
92        int clientByServer = clientSize / serverSize;
93        int remain = clientSize % serverSize;
94
95        if (clientRank < (clientByServer + 1) * remain)
96        {
97          if (clientRank % (clientByServer + 1) == 0)
[1232]98            rankRecvLeader.push_back(clientRank / (clientByServer + 1));
[1021]99          else
[1232]100            rankRecvNotLeader.push_back(clientRank / (clientByServer + 1));
[595]101        }
102        else
103        {
104          int rank = clientRank - (clientByServer + 1) * remain;
105          if (rank % clientByServer == 0)
[1232]106            rankRecvLeader.push_back(remain + rank / clientByServer);
[1021]107          else
[1232]108            rankRecvNotLeader.push_back(remain + rank / clientByServer);
[595]109        }
110      }
[300]111    }
112
[512]113    /*!
114    In case of attached mode, the current context must be reset to context for client
115    \param [in] event Event sent to server
116    */
[300]117    void CContextClient::sendEvent(CEventClient& event)
118    {
[731]119      list<int> ranks = event.getRanks();
[1615]120      info(100)<<"Event "<<timeLine<<" of context "<<context->getId()<<endl ;
[1377]121      if (CXios::checkEventSync)
122      {
123        int typeId, classId, typeId_in, classId_in, timeLine_out;
124        typeId_in=event.getTypeId() ;
125        classId_in=event.getClassId() ;
[1475]126//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
[1639]127        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ; 
128        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
129        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
[1377]130        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
131        {
132           ERROR("void CContextClient::sendEvent(CEventClient& event)",
133               << "Event are not coherent between client.");
134        }
135      }
136
[595]137      if (!event.isEmpty())
[300]138      {
[731]139        list<int> sizes = event.getSizes();
[300]140
[1757]141         // We force the getBuffers call to be non-blocking on classical servers
[1054]142        list<CBufferOut*> buffList;
[1757]143        getBuffers(timeLine, ranks, sizes, buffList) ;
[509]144
[1757]145        event.send(timeLine, sizes, buffList);
146       
147        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
[731]148
[1757]149        unlockBuffers(ranks) ;
150        info(100)<<"Event "<<timeLine<<" of context "<<context->getId()<<"  sent"<<endl ;
151         
152        checkBuffers(ranks);
[1054]153
[1757]154        if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
[1054]155        {
[1757]156          waitEvent(ranks);
157          CContext::setCurrent(context->getId());
[1054]158        }
[300]159      }
160
[1054]161      timeLine++;
162    }
163
164    /*!
[512]165    If client is also server (attached mode), after sending event, it should process right away
166    the incoming event.
167    \param [in] ranks list rank of server connected this client
168    */
[300]169    void CContextClient::waitEvent(list<int>& ranks)
170    {
[595]171      parentServer->server->setPendingEvent();
172      while (checkBuffers(ranks))
[300]173      {
[595]174        parentServer->server->listen();
175        parentServer->server->checkPendingRequest();
[300]176      }
[386]177
[595]178      while (parentServer->server->hasPendingEvent())
[386]179      {
[595]180       parentServer->server->eventLoop();
[386]181      }
[300]182    }
183
[512]184    /*!
[1054]185     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
186     * it is explicitly requested to be non-blocking.
187     *
[1757]188     *
189     * \param [in] timeLine time line of the event which will be sent to servers
[1054]190     * \param [in] serverList list of rank of connected server
191     * \param [in] sizeList size of message corresponding to each connection
192     * \param [out] retBuffers list of buffers that can be used to store an event
193     * \param [in] nonBlocking whether this function should be non-blocking
194     * \return whether the already allocated buffers could be used
[512]195    */
[1757]196    bool CContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers,
[1071]197                                    bool nonBlocking /*= false*/)
[300]198    {
[1054]199      list<int>::const_iterator itServer, itSize;
[595]200      list<CClientBuffer*> bufferList;
[1054]201      map<int,CClientBuffer*>::const_iterator it;
[595]202      list<CClientBuffer*>::iterator itBuffer;
[884]203      bool areBuffersFree;
[300]204
[595]205      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
[300]206      {
[595]207        it = buffers.find(*itServer);
208        if (it == buffers.end())
[300]209        {
[595]210          newBuffer(*itServer);
211          it = buffers.find(*itServer);
[509]212        }
[595]213        bufferList.push_back(it->second);
[300]214      }
[347]215
216      CTimer::get("Blocking time").resume();
[884]217      do
[300]218      {
[884]219        areBuffersFree = true;
[595]220        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
[1757]221        {
[884]222          areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
[1757]223        }
[884]224
225        if (!areBuffersFree)
[300]226        {
[1757]227          for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
[884]228          checkBuffers();
[1757]229          if (CServer::serverLevel == 0)  context->server->listen();
[1130]230          else if (CServer::serverLevel == 1)
231          {
232            context->server->listen();
[1757]233            for (int i = 0; i < context->serverPrimServer.size(); ++i)  context->serverPrimServer[i]->listen();
[1378]234            CServer::contextEventLoop(false) ; // avoid dead-lock at finalize...
[1130]235          }
236
[1757]237          else if (CServer::serverLevel == 2) context->server->listen();
[1130]238
[300]239        }
[1054]240      } while (!areBuffersFree && !nonBlocking);
[347]241      CTimer::get("Blocking time").suspend();
242
[1054]243      if (areBuffersFree)
[300]244      {
[1054]245        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
[1757]246          retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
[300]247      }
[1054]248      return areBuffersFree;
[300]249   }
[509]250
[512]251   /*!
252   Make a new buffer for a certain connection to server with specific rank
253   \param [in] rank rank of connected server
254   */
[300]255   void CContextClient::newBuffer(int rank)
256   {
[1201]257      if (!mapBufferSize_.count(rank))
258      {
259        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
260        mapBufferSize_[rank] = CXios::minBufferSize;
261        maxEventSizes[rank] = CXios::minBufferSize;
262      }
[1757]263     
264      vector<MPI_Win> Wins(2,MPI_WIN_NULL) ;
265      if (!isAttachedModeEnabled()) Wins=windows[rank] ;
266 
267      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, Wins, clientRank, rank, mapBufferSize_[rank], maxEventSizes[rank]);
[1201]268      // Notify the server
[1757]269      CBufferOut* bufOut = buffer->getBuffer(0, 3*sizeof(MPI_Aint));
270      MPI_Aint sendBuff[3] ;
271      sendBuff[0]=mapBufferSize_[rank]; // Stupid C++
272      sendBuff[1]=buffers[rank]->getWinAddress(0); 
273      sendBuff[2]=buffers[rank]->getWinAddress(1); 
274      info(100)<<"CContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinAddress(1)<<endl;
275      bufOut->put(sendBuff, 3); // Stupid C++
276      buffer->checkBuffer(true);
277
278/*
279      if (!isAttachedModeEnabled()) // create windows only in server mode
280      {
281        MPI_Comm OneSidedInterComm, oneSidedComm ;
282        MPI_Intercomm_create(commSelf, 0, interCommMerged, clientSize+rank, 0, &OneSidedInterComm );
283        MPI_Intercomm_merge(OneSidedInterComm,false,&oneSidedComm);
284        buffer->createWindows(oneSidedComm) ;
285      }
286 */     
[509]287   }
[300]288
[512]289   /*!
290   Verify state of buffers. Buffer is under pending state if there is no message on it
291   \return state of buffers, pending(true), ready(false)
292   */
[300]293   bool CContextClient::checkBuffers(void)
294   {
[595]295      map<int,CClientBuffer*>::iterator itBuff;
296      bool pending = false;
[1130]297      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
[1757]298        pending |= itBuff->second->checkBuffer(!pureOneSided);
[595]299      return pending;
[509]300   }
[300]301
[512]302   //! Release all buffers
[1071]303   void CContextClient::releaseBuffers()
[300]304   {
[595]305      map<int,CClientBuffer*>::iterator itBuff;
[1077]306      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
[1139]307      {
[1757]308         delete itBuff->second;
[1139]309      }
[1077]310      buffers.clear();
[1757]311
312/* don't know when release windows
313
314      if (!isAttachedModeEnabled())
315      { 
316        for(int rank=0; rank<serverSize; rank++)
317        {
318          MPI_Win_free(&windows[rank][0]);
319          MPI_Win_free(&windows[rank][1]);
320        }
321      }
[509]322   }
[1757]323*/
324     
325  /*!
326   Lock the buffers for one sided communications
327   \param [in] ranks list rank of server to which client connects to
328   */
329   void CContextClient::lockBuffers(list<int>& ranks)
330   {
331      list<int>::iterator it;
332      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
333   }
[300]334
[1757]335  /*!
336   Unlock the buffers for one sided communications
337   \param [in] ranks list rank of server to which client connects to
338   */
339   void CContextClient::unlockBuffers(list<int>& ranks)
340   {
341      list<int>::iterator it;
342      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
343   }
344     
[512]345   /*!
346   Verify state of buffers corresponding to a connection
347   \param [in] ranks list rank of server to which client connects to
348   \return state of buffers, pending(true), ready(false)
349   */
[300]350   bool CContextClient::checkBuffers(list<int>& ranks)
351   {
[595]352      list<int>::iterator it;
353      bool pending = false;
[1757]354      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
[595]355      return pending;
[509]356   }
[300]357
[512]358   /*!
[917]359    * Set the buffer size for each connection. Warning: This function is collective.
360    *
361    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
362    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
[512]363   */
[917]364   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize, const std::map<int,StdSize>& maxEventSize)
[509]365   {
366     mapBufferSize_ = mapSize;
[1201]367     maxEventSizes = maxEventSize;
[509]368   }
369
[1158]370  /*!
371  Get leading server in the group of connected server
372  \return ranks of leading servers
373  */
374  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
375  {
376    return ranksServerNotLeader;
377  }
[1021]378
[1158]379  /*!
380  Check if client connects to leading server
381  \return connected(true), not connected (false)
382  */
383  bool CContextClient::isServerNotLeader(void) const
384  {
385    return !ranksServerNotLeader.empty();
386  }
[1021]387
[595]388  /*!
389  Get leading server in the group of connected server
390  \return ranks of leading servers
391  */
392  const std::list<int>& CContextClient::getRanksServerLeader(void) const
393  {
394    return ranksServerLeader;
395  }
[509]396
[595]397  /*!
398  Check if client connects to leading server
399  \return connected(true), not connected (false)
400  */
401  bool CContextClient::isServerLeader(void) const
402  {
403    return !ranksServerLeader.empty();
404  }
[300]405
[704]406  /*!
407   * Check if the attached mode is used.
408   *
409   * \return true if and only if attached mode is used
410   */
411  bool CContextClient::isAttachedModeEnabled() const
412  {
413    return (parentServer != 0);
414  }
[697]415
[512]416   /*!
[1130]417   * Finalize context client and do some reports. Function is non-blocking.
[512]418   */
[1130]419  void CContextClient::finalize(void)
[1054]420  {
421    map<int,CClientBuffer*>::iterator itBuff;
[1757]422    std::list<int>::iterator ItServerLeader; 
423   
[1054]424    bool stop = false;
[731]425
[1757]426    int* nbServerConnectionLocal  = new int[serverSize] ;
427    int* nbServerConnectionGlobal  = new int[serverSize] ;
428    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
429    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
430    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
431   
432    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
433   
434    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
435    CMessage msg;
[509]436
[1757]437    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
438    sendEvent(event);
439
440    delete[] nbServerConnectionLocal ;
441    delete[] nbServerConnectionGlobal ;
442/*   
[1054]443    if (isServerLeader())
444    {
445      CMessage msg;
446      const std::list<int>& ranks = getRanksServerLeader();
447      for (std::list<int>::const_iterator itRank = ranks.begin(), itRankEnd = ranks.end(); itRank != itRankEnd; ++itRank)
[1377]448      {
449        info(100)<<"DEBUG : Sent context Finalize event to rank "<<*itRank<<endl ;
[1054]450        event.push(*itRank, 1, msg);
[1377]451      }
[1054]452      sendEvent(event);
453    }
454    else sendEvent(event);
[1757]455*/
[509]456
[1054]457    CTimer::get("Blocking time").resume();
[1757]458    checkBuffers();
[1054]459    CTimer::get("Blocking time").suspend();
460
461    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
462                                          iteMap = mapBufferSize_.end(), itMap;
[1071]463
[1054]464    StdSize totalBuf = 0;
465    for (itMap = itbMap; itMap != iteMap; ++itMap)
466    {
467      report(10) << " Memory report : Context <" << context->getId() << "> : client side : memory used for buffer of each connection to server" << endl
468                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
469      totalBuf += itMap->second;
470    }
471    report(0) << " Memory report : Context <" << context->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
472
[1130]473    //releaseBuffers(); // moved to CContext::finalize()
[1054]474  }
[1130]475
[1139]476
477  /*!
478  */
[1130]479  bool CContextClient::havePendingRequests(void)
480  {
481    bool pending = false;
482    map<int,CClientBuffer*>::iterator itBuff;
483    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
484      pending |= itBuff->second->hasPendingRequest();
485    return pending;
486  }
[1757]487 
488  bool CContextClient::isNotifiedFinalized(void)
489  {
490    if (isAttachedModeEnabled()) return true ;
[1130]491
[1757]492    bool finalized = true;
493    map<int,CClientBuffer*>::iterator itBuff;
494    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
495      finalized &= itBuff->second->isNotifiedFinalized();
496    return finalized;
497  }
[1130]498
[509]499}
Note: See TracBrowser for help on using the repository browser.