source: XIOS/dev/dev_ym/XIOS_COUPLING/src/context_client.cpp @ 2259

Last change on this file since 2259 was 2259, checked in by ymipsl, 3 years ago

Improvment of one-sided protocol.
Windows are now created in the flight for each client-server connection.
YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
  • Property svn:eol-style set to native
File size: 18.0 KB
Line 
1#include "xios_spl.hpp"
2#include "context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
10#include "mpi.hpp"
11#include "timer.hpp"
12#include "cxios.hpp"
13#include "server.hpp"
14#include "services.hpp"
15#include <boost/functional/hash.hpp>
16#include <random>
17#include <chrono>
18
19namespace xios
20{
21    /*!
22    \param [in] parent Pointer to context on client side
23    \param [in] intraComm_ communicator of group client
24    \param [in] interComm_ communicator of group server
25    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode).
26    */
27    CContextClient::CContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
28     : mapBufferSize_(), parentServer(cxtSer), maxBufferedEvents(4), associatedServer_(nullptr)
29    {
30     
31      context_ = parent;
32      intraComm = intraComm_;
33      interComm = interComm_;
34      MPI_Comm_rank(intraComm, &clientRank);
35      MPI_Comm_size(intraComm, &clientSize);
36
37      int flag;
38      MPI_Comm_test_inter(interComm, &flag);
39      if (flag) isAttached_=false ;
40      else  isAttached_=true ;
41
42      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
43      if (isAttachedModeEnabled()) pureOneSided=false ; // no one sided in attach mode
44     
45
46
47      if (flag) MPI_Comm_remote_size(interComm, &serverSize);
48      else  MPI_Comm_size(interComm, &serverSize);
49
50      computeLeader(clientRank, clientSize, serverSize, ranksServerLeader, ranksServerNotLeader);
51
52      if (flag) MPI_Intercomm_merge(interComm_,false, &interCommMerged_) ;
53     
54      MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf_) ; // for windows
55
56      auto time=chrono::system_clock::now().time_since_epoch().count() ;
57      std::default_random_engine rd(time); // not reproducible from a run to another
58      std::uniform_int_distribution<size_t> dist;
59      hashId_=dist(rd) ;
60      MPI_Bcast(&hashId_,1,MPI_SIZE_T,0,intraComm) ; // Bcast to all server of the context
61
62      timeLine = 1;
63    }
64
65    void CContextClient::computeLeader(int clientRank, int clientSize, int serverSize,
66                                       std::list<int>& rankRecvLeader,
67                                       std::list<int>& rankRecvNotLeader)
68    {
69      if ((0 == clientSize) || (0 == serverSize)) return;
70
71      if (clientSize < serverSize)
72      {
73        int serverByClient = serverSize / clientSize;
74        int remain = serverSize % clientSize;
75        int rankStart = serverByClient * clientRank;
76
77        if (clientRank < remain)
78        {
79          serverByClient++;
80          rankStart += clientRank;
81        }
82        else
83          rankStart += remain;
84
85        for (int i = 0; i < serverByClient; i++)
86          rankRecvLeader.push_back(rankStart + i);
87
88        rankRecvNotLeader.resize(0);
89      }
90      else
91      {
92        int clientByServer = clientSize / serverSize;
93        int remain = clientSize % serverSize;
94
95        if (clientRank < (clientByServer + 1) * remain)
96        {
97          if (clientRank % (clientByServer + 1) == 0)
98            rankRecvLeader.push_back(clientRank / (clientByServer + 1));
99          else
100            rankRecvNotLeader.push_back(clientRank / (clientByServer + 1));
101        }
102        else
103        {
104          int rank = clientRank - (clientByServer + 1) * remain;
105          if (rank % clientByServer == 0)
106            rankRecvLeader.push_back(remain + rank / clientByServer);
107          else
108            rankRecvNotLeader.push_back(remain + rank / clientByServer);
109        }
110      }
111    }
112
113    /*!
114    In case of attached mode, the current context must be reset to context for client
115    \param [in] event Event sent to server
116    */
117    void CContextClient::sendEvent(CEventClient& event)
118    {
119      list<int> ranks = event.getRanks();
120      info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<endl ;
121      if (CXios::checkEventSync)
122      {
123        int typeId, classId, typeId_in, classId_in;
124        long long timeLine_out;
125        long long timeLine_in( timeLine );
126        typeId_in=event.getTypeId() ;
127        classId_in=event.getClassId() ;
128//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
129        MPI_Allreduce(&timeLine_in,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ; 
130        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
131        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
132        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
133        {
134           ERROR("void CContextClient::sendEvent(CEventClient& event)",
135               << "Event are not coherent between client for timeline = "<<timeLine);
136        }
137       
138        vector<int> servers(serverSize,0) ;
139        auto ranks=event.getRanks() ;
140        for(auto& rank : ranks) servers[rank]=1 ;
141        MPI_Allreduce(MPI_IN_PLACE, servers.data(), serverSize,MPI_INT,MPI_SUM,intraComm) ;
142        ostringstream osstr ;
143        for(int i=0;i<serverSize;i++)  if (servers[i]==0) osstr<<i<<" , " ;
144        if (!osstr.str().empty())
145        {
146          ERROR("void CContextClient::sendEvent(CEventClient& event)",
147                 <<" Some servers will not receive the message for timeline = "<<timeLine<<endl
148                 <<"Servers are : "<<osstr.str()) ;
149        }
150
151
152      }
153
154      if (!event.isEmpty())
155      {
156        list<int> sizes = event.getSizes();
157
158         // We force the getBuffers call to be non-blocking on classical servers
159        list<CBufferOut*> buffList;
160        getBuffers(timeLine, ranks, sizes, buffList) ;
161
162        event.send(timeLine, sizes, buffList);
163       
164        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
165
166        unlockBuffers(ranks) ;
167        info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<"  sent"<<endl ;
168         
169        checkBuffers(ranks);
170      }
171     
172      if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode
173      {
174        while (checkBuffers(ranks)) context_->globalEventLoop() ;
175     
176        CXios::getDaemonsManager()->scheduleContext(hashId_) ;
177        while (CXios::getDaemonsManager()->isScheduledContext(hashId_)) context_->globalEventLoop() ;
178      }
179     
180      timeLine++;
181    }
182
183    /*!
184    If client is also server (attached mode), after sending event, it should process right away
185    the incoming event.
186    \param [in] ranks list rank of server connected this client
187    */
188    void CContextClient::waitEvent(list<int>& ranks)
189    {
190      while (checkBuffers(ranks))
191      {
192        context_->eventLoop() ;
193      }
194
195      MPI_Request req ;
196      MPI_Status status ;
197
198      MPI_Ibarrier(intraComm,&req) ;
199      int flag=false ;
200
201      do 
202      {
203        CXios::getDaemonsManager()->eventLoop() ;
204        MPI_Test(&req,&flag,&status) ;
205      } while (!flag) ;
206
207
208    }
209
210
211    void CContextClient::waitEvent_old(list<int>& ranks)
212    {
213      parentServer->server->setPendingEvent();
214      while (checkBuffers(ranks))
215      {
216        parentServer->server->listen();
217        parentServer->server->checkPendingRequest();
218      }
219
220      while (parentServer->server->hasPendingEvent())
221      {
222       parentServer->server->eventLoop();
223      }
224    }
225
226    /*!
227     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
228     * it is explicitly requested to be non-blocking.
229     *
230     *
231     * \param [in] timeLine time line of the event which will be sent to servers
232     * \param [in] serverList list of rank of connected server
233     * \param [in] sizeList size of message corresponding to each connection
234     * \param [out] retBuffers list of buffers that can be used to store an event
235     * \param [in] nonBlocking whether this function should be non-blocking
236     * \return whether the already allocated buffers could be used
237    */
238    bool CContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers,
239                                    bool nonBlocking /*= false*/)
240    {
241      list<int>::const_iterator itServer, itSize;
242      list<CClientBuffer*> bufferList;
243      map<int,CClientBuffer*>::const_iterator it;
244      list<CClientBuffer*>::iterator itBuffer;
245      bool areBuffersFree;
246
247      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
248      {
249        it = buffers.find(*itServer);
250        if (it == buffers.end())
251        {
252          newBuffer(*itServer);
253          it = buffers.find(*itServer);
254        }
255        bufferList.push_back(it->second);
256      }
257
258      double lastTimeBuffersNotFree=0. ;
259      double time ;
260      bool doUnlockBuffers ;
261      CTimer::get("Blocking time").resume();
262      do
263      {
264        areBuffersFree = true;
265        doUnlockBuffers=false ;
266        time=MPI_Wtime() ;
267        if (time-lastTimeBuffersNotFree > latency_)
268        {
269          for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
270          {
271            areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
272          }
273          if (!areBuffersFree)
274          {
275            lastTimeBuffersNotFree = time ;
276            doUnlockBuffers=true ;
277          }         
278        }
279        else areBuffersFree = false ;
280
281        if (!areBuffersFree)
282        {
283          if (doUnlockBuffers) for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
284          checkBuffers();
285
286          context_->globalEventLoop() ;
287        }
288
289      } while (!areBuffersFree && !nonBlocking);
290      CTimer::get("Blocking time").suspend();
291
292      if (areBuffersFree)
293      {
294        for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
295          retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
296      }
297      return areBuffersFree;
298   }
299
300   /*!
301   Make a new buffer for a certain connection to server with specific rank
302   \param [in] rank rank of connected server
303   */
304   void CContextClient::newBuffer(int rank)
305   {
306      if (!mapBufferSize_.count(rank))
307      {
308        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
309        mapBufferSize_[rank] = CXios::minBufferSize;
310        maxEventSizes[rank] = CXios::minBufferSize;
311      }
312     
313      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interComm, rank, mapBufferSize_[rank], maxEventSizes[rank]);
314      if (isGrowableBuffer_) buffer->setGrowableBuffer(1.2) ;
315      else buffer->fixBuffer() ;
316      // Notify the server
317      CBufferOut* bufOut = buffer->getBuffer(0, 4*sizeof(MPI_Aint));
318      MPI_Aint sendBuff[4] ;
319      sendBuff[0]=hashId_;
320      sendBuff[1]=mapBufferSize_[rank];
321      sendBuff[2]=buffers[rank]->getWinAddress(0); 
322      sendBuff[3]=buffers[rank]->getWinAddress(1); 
323      info(100)<<"CContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinAddress(1)<<endl;
324      bufOut->put(sendBuff, 4); 
325      buffer->checkBuffer(true);
326     
327       // create windows dynamically for one-sided
328      if (!isAttachedModeEnabled())
329      { 
330        CTimer::get("create Windows").resume() ;
331        MPI_Comm interComm ;
332        MPI_Intercomm_create(commSelf_, 0, interCommMerged_, clientSize+rank, 0, &interComm) ;
333        MPI_Intercomm_merge(interComm, false, &winComm_[rank]) ;
334        MPI_Comm_free(&interComm) ;
335        windows_[rank].resize(2) ;
336        MPI_Win_create_dynamic(MPI_INFO_NULL, winComm_[rank], &windows_[rank][0]);
337        MPI_Win_create_dynamic(MPI_INFO_NULL, winComm_[rank], &windows_[rank][1]);   
338        CTimer::get("create Windows").suspend() ;
339      }
340      else
341      {
342        winComm_[rank] = MPI_COMM_NULL ;
343        windows_[rank].resize(2) ;
344        windows_[rank][0] = MPI_WIN_NULL ;
345        windows_[rank][1] = MPI_WIN_NULL ;
346      }
347      buffer->attachWindows(windows_[rank]) ;
348   }
349
350   /*!
351   Verify state of buffers. Buffer is under pending state if there is no message on it
352   \return state of buffers, pending(true), ready(false)
353   */
354   bool CContextClient::checkBuffers(void)
355   {
356      map<int,CClientBuffer*>::iterator itBuff;
357      bool pending = false;
358      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
359        pending |= itBuff->second->checkBuffer(!pureOneSided);
360      return pending;
361   }
362
363   //! Release all buffers
364   void CContextClient::releaseBuffers()
365   {
366      map<int,CClientBuffer*>::iterator itBuff;
367      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
368      {
369         delete itBuff->second;
370      }
371      buffers.clear();
372
373// don't know when release windows
374
375      if (!isAttachedModeEnabled())
376      { 
377        for(auto& it : winComm_)
378        {
379          int rank = it.first ;
380          MPI_Win_free(&windows_[rank][0]);
381          MPI_Win_free(&windows_[rank][1]);
382          MPI_Comm_free(&winComm_[rank]) ;
383        }
384      } 
385   }
386
387     
388  /*!
389   Lock the buffers for one sided communications
390   \param [in] ranks list rank of server to which client connects to
391   */
392   void CContextClient::lockBuffers(list<int>& ranks)
393   {
394      list<int>::iterator it;
395      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
396   }
397
398  /*!
399   Unlock the buffers for one sided communications
400   \param [in] ranks list rank of server to which client connects to
401   */
402   void CContextClient::unlockBuffers(list<int>& ranks)
403   {
404      list<int>::iterator it;
405      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
406   }
407     
408   /*!
409   Verify state of buffers corresponding to a connection
410   \param [in] ranks list rank of server to which client connects to
411   \return state of buffers, pending(true), ready(false)
412   */
413   bool CContextClient::checkBuffers(list<int>& ranks)
414   {
415      list<int>::iterator it;
416      bool pending = false;
417      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
418      return pending;
419   }
420
421   /*!
422    * Set the buffer size for each connection. Warning: This function is collective.
423    *
424    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
425    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
426   */
427   void CContextClient::setBufferSize(const std::map<int,StdSize>& mapSize)
428   {
429     for(auto& it : mapSize) 
430      buffers[it.first]->fixBufferSize(std::max(CXios::minBufferSize*1.0,std::min(it.second*CXios::bufferSizeFactor*1.01,CXios::maxBufferSize*1.0)));
431   }
432
433  /*!
434  Get leading server in the group of connected server
435  \return ranks of leading servers
436  */
437  const std::list<int>& CContextClient::getRanksServerNotLeader(void) const
438  {
439    return ranksServerNotLeader;
440  }
441
442  /*!
443  Check if client connects to leading server
444  \return connected(true), not connected (false)
445  */
446  bool CContextClient::isServerNotLeader(void) const
447  {
448    return !ranksServerNotLeader.empty();
449  }
450
451  /*!
452  Get leading server in the group of connected server
453  \return ranks of leading servers
454  */
455  const std::list<int>& CContextClient::getRanksServerLeader(void) const
456  {
457    return ranksServerLeader;
458  }
459
460  /*!
461  Check if client connects to leading server
462  \return connected(true), not connected (false)
463  */
464  bool CContextClient::isServerLeader(void) const
465  {
466    return !ranksServerLeader.empty();
467  }
468
469   /*!
470   * Finalize context client and do some reports. Function is non-blocking.
471   */
472  void CContextClient::finalize(void)
473  {
474    map<int,CClientBuffer*>::iterator itBuff;
475    std::list<int>::iterator ItServerLeader; 
476   
477    bool stop = false;
478
479    int* nbServerConnectionLocal  = new int[serverSize] ;
480    int* nbServerConnectionGlobal  = new int[serverSize] ;
481    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
482    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
483    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
484   
485    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
486   
487    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
488    CMessage msg;
489
490    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
491    sendEvent(event);
492
493    delete[] nbServerConnectionLocal ;
494    delete[] nbServerConnectionGlobal ;
495
496
497    CTimer::get("Blocking time").resume();
498    checkBuffers();
499    CTimer::get("Blocking time").suspend();
500
501    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
502                                          iteMap = mapBufferSize_.end(), itMap;
503
504    StdSize totalBuf = 0;
505    for (itMap = itbMap; itMap != iteMap; ++itMap)
506    {
507      report(10) << " Memory report : Context <" << context_->getId() << "> : client side : memory used for buffer of each connection to server" << endl
508                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
509      totalBuf += itMap->second;
510    }
511    report(0) << " Memory report : Context <" << context_->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
512
513  }
514
515
516  /*!
517  */
518  bool CContextClient::havePendingRequests(void)
519  {
520    bool pending = false;
521    map<int,CClientBuffer*>::iterator itBuff;
522    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
523      pending |= itBuff->second->hasPendingRequest();
524    return pending;
525  }
526 
527  bool CContextClient::isNotifiedFinalized(void)
528  {
529    if (isAttachedModeEnabled()) return true ;
530
531    bool finalized = true;
532    map<int,CClientBuffer*>::iterator itBuff;
533    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
534      finalized &= itBuff->second->isNotifiedFinalized();
535    return finalized;
536  }
537
538}
Note: See TracBrowser for help on using the repository browser.