source: XIOS3/trunk/src/transport/legacy_context_client.cpp @ 2551

Last change on this file since 2551 was 2547, checked in by ymipsl, 10 months ago

Major update :

  • New method to lock and unlock one-sided windows (window_dynamic) to avoid network overhead
  • Introducing multithreading on server sided to manage more efficiently dead-lock occuring (similar to co-routine which will be available and implemented in futur c++ standard), based on c++ threads
  • Suprression of old "attached mode" which is replaced by online writer and reder filters

YM

  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 16.2 KB
Line 
1#include "xios_spl.hpp"
2#include "legacy_context_client.hpp"
3#include "context_server.hpp"
4#include "event_client.hpp"
5#include "buffer_out.hpp"
6#include "buffer_client.hpp"
7#include "type.hpp"
8#include "event_client.hpp"
9#include "context.hpp"
10#include "mpi.hpp"
11#include "timer.hpp"
12#include "cxios.hpp"
13#include "server.hpp"
14#include "services.hpp"
15#include "ressources_manager.hpp"
16#include <boost/functional/hash.hpp>
17#include <random>
18#include <chrono>
19
20namespace xios
21{
22    /*!
23    \param [in] parent Pointer to context on client side
24    \param [in] intraComm_ communicator of group client
25    \param [in] interComm_ communicator of group server
26    \cxtSer [in] cxtSer Pointer to context of server side. (It is only used in case of attached mode --> obsolete).
27    */
28    CLegacyContextClient::CLegacyContextClient(CContext* parent, MPI_Comm intraComm_, MPI_Comm interComm_, CContext* cxtSer)
29                         : CContextClient(parent, intraComm_, interComm_, cxtSer),
30                           mapBufferSize_(),  maxBufferedEvents(4)
31    {
32      pureOneSided=CXios::getin<bool>("pure_one_sided",false); // pure one sided communication (for test)
33      MPI_Intercomm_merge(interComm_,false, &interCommMerged_) ;
34      MPI_Comm_split(intraComm_,clientRank,clientRank, &commSelf_) ; // for windows
35      eventScheduler_ = parent->getEventScheduler() ; 
36      timeLine = 1;
37    }
38
39    CContextClient::ETransport getType(void) {return CContextClient::legacy ;}
40
41    /*!
42    \param [in] event Event sent to server
43    */
44    void CLegacyContextClient::sendEvent(CEventClient& event)
45    {
46      list<int> ranks = event.getRanks();
47 
48//      ostringstream str ;
49//      for(auto& rank : ranks) str<<rank<<" ; " ;
50//      info(100)<<"Event "<<timeLine<<" of context "<<context_->getId()<<"  for ranks : "<<str.str()<<endl ;
51
52      if (CXios::checkEventSync)
53      {
54        int typeId, classId, typeId_in, classId_in;
55        long long timeLine_out;
56        long long timeLine_in( timeLine );
57        typeId_in=event.getTypeId() ;
58        classId_in=event.getClassId() ;
59//        MPI_Allreduce(&timeLine,&timeLine_out, 1, MPI_UINT64_T, MPI_SUM, intraComm) ; // MPI_UINT64_T standardized by MPI 3
60        MPI_Allreduce(&timeLine_in,&timeLine_out, 1, MPI_LONG_LONG_INT, MPI_SUM, intraComm) ; 
61        MPI_Allreduce(&typeId_in,&typeId, 1, MPI_INT, MPI_SUM, intraComm) ;
62        MPI_Allreduce(&classId_in,&classId, 1, MPI_INT, MPI_SUM, intraComm) ;
63        if (typeId/clientSize!=event.getTypeId() || classId/clientSize!=event.getClassId() || timeLine_out/clientSize!=timeLine)
64        {
65           ERROR("void CLegacyContextClient::sendEvent(CEventClient& event)",
66               << "Event are not coherent between client for timeline = "<<timeLine);
67        }
68       
69        vector<int> servers(serverSize,0) ;
70        auto ranks=event.getRanks() ;
71        for(auto& rank : ranks) servers[rank]=1 ;
72        MPI_Allreduce(MPI_IN_PLACE, servers.data(), serverSize,MPI_INT,MPI_SUM,intraComm) ;
73        ostringstream osstr ;
74        for(int i=0;i<serverSize;i++)  if (servers[i]==0) osstr<<i<<" , " ;
75        if (!osstr.str().empty())
76        {
77          ERROR("void CLegacyContextClient::sendEvent(CEventClient& event)",
78                 <<" Some servers will not receive the message for timeline = "<<timeLine<<endl
79                 <<"Servers are : "<<osstr.str()) ;
80        }
81
82
83      }
84
85      if (!event.isEmpty())
86      {
87        list<int> sizes = event.getSizes();
88
89         // We force the getBuffers call to be non-blocking on classical servers
90        list<CBufferOut*> buffList;
91        getBuffers(timeLine, ranks, sizes, buffList) ;
92
93        event.send(timeLine, sizes, buffList);
94       
95        //for (auto itRank = ranks.begin(); itRank != ranks.end(); itRank++) buffers[*itRank]->infoBuffer() ;
96
97        unlockBuffers(ranks) ;
98        checkBuffers(ranks);
99       
100      }
101     
102      synchronize() ;
103      timeLine++;
104    }
105
106
107    /*!
108     * Get buffers for each connection to the servers. This function blocks until there is enough room in the buffers unless
109     * it is explicitly requested to be non-blocking.
110     *
111     *
112     * \param [in] timeLine time line of the event which will be sent to servers
113     * \param [in] serverList list of rank of connected server
114     * \param [in] sizeList size of message corresponding to each connection
115     * \param [out] retBuffers list of buffers that can be used to store an event
116     * \param [in] nonBlocking whether this function should be non-blocking
117     * \return whether the already allocated buffers could be used
118    */
119    void CLegacyContextClient::getBuffers(const size_t timeLine, const list<int>& serverList, const list<int>& sizeList, list<CBufferOut*>& retBuffers)
120    {
121      list<int>::const_iterator itServer, itSize;
122      list<CClientBuffer*> bufferList;
123      map<int,CClientBuffer*>::const_iterator it;
124      list<CClientBuffer*>::iterator itBuffer;
125      bool areBuffersFree;
126/*     
127      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
128      {
129        it = buffers.find(*itServer);
130        if (it == buffers.end())
131        {
132          CTokenManager* tokenManager = CXios::getRessourcesManager()->getTokenManager() ;
133          size_t token = tokenManager->getToken() ;
134          while (!tokenManager->checkToken(token)) callGlobalEventLoop() ;
135          newBuffer(*itServer);
136          it = buffers.find(*itServer);
137          checkAttachWindows(it->second,it->first) ;
138          tokenManager->updateToken(token) ;
139        }
140        bufferList.push_back(it->second);
141      }
142*/
143      map<int,MPI_Request> attachList ;
144     
145      for (itServer = serverList.begin(); itServer != serverList.end(); itServer++)
146      {
147        it = buffers.find(*itServer);
148        if (it == buffers.end())
149        {
150          newBuffer(*itServer);
151          it = buffers.find(*itServer);
152          checkAttachWindows(it->second, it->first, attachList) ;
153        }
154        bufferList.push_back(it->second);
155      }
156     
157      while(!attachList.empty())
158      {
159        auto it = attachList.begin() ;
160        while(it!=attachList.end())
161        {
162          if (checkAttachWindows(buffers[it->first], it->first, attachList)) it=attachList.erase(it) ;
163          else ++it ;
164        }
165
166        yield() ;
167      }
168
169
170      double lastTimeBuffersNotFree=0. ;
171      double time ;
172      bool doUnlockBuffers ;
173      CTimer::get("Blocking time").resume();
174      do
175      {
176        areBuffersFree = true;
177        doUnlockBuffers=false ;
178        time=MPI_Wtime() ;
179        if (time-lastTimeBuffersNotFree > latency_)
180        {
181          for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
182          {
183            areBuffersFree &= (*itBuffer)->isBufferFree(*itSize);
184          }
185          if (!areBuffersFree)
186          {
187            lastTimeBuffersNotFree = time ;
188            doUnlockBuffers=true ;
189          }         
190        }
191        else areBuffersFree = false ;
192
193        if (!areBuffersFree)
194        {
195          if (doUnlockBuffers) for (itBuffer = bufferList.begin(); itBuffer != bufferList.end(); itBuffer++) (*itBuffer)->unlockBuffer();
196          checkBuffers();
197
198          yield() ;
199        }
200
201      } while (!areBuffersFree);
202      CTimer::get("Blocking time").suspend();
203
204      for (itBuffer = bufferList.begin(), itSize = sizeList.begin(); itBuffer != bufferList.end(); itBuffer++, itSize++)
205        retBuffers.push_back((*itBuffer)->getBuffer(timeLine, *itSize));
206   }
207
208
209   bool CLegacyContextClient::checkAttachWindows(CClientBuffer* buffer, int rank, map<int, MPI_Request>& attachList)
210   {
211      int dummy;
212      bool ret=true; 
213
214      if (!buffer->isAttachedWindows())
215      {
216           // create windows dynamically for one-sided
217          /*
218          CTimer::get("create Windows").resume() ;
219          MPI_Comm interComm ;
220          int tag = 0 ;
221          MPI_Intercomm_create(commSelf_, 0, interCommMerged_, clientSize+rank, tag, &interComm) ;
222          MPI_Intercomm_merge(interComm, false, &winComm_[rank]) ;
223          MPI_Comm_free(&interComm) ;
224               
225          buffer->attachWindows(winComm_[rank]) ;
226          CXios::getMpiGarbageCollector().registerCommunicator(winComm_[rank]) ;
227          MPI_Barrier(winComm_[rank]) ;
228        */
229        if (attachList.count(rank)==0) 
230        {
231          MPI_Irecv(&dummy,0,MPI_INT,clientSize+rank, 21, interCommMerged_, &attachList[rank]) ;
232          ret = false ;
233        }
234        else
235        {
236          MPI_Status status ;
237          int flag ;
238          MPI_Test(&attachList[rank],&flag, &status) ;
239          if (flag)
240          {
241            CTimer::get("create Windows").resume() ;
242            MPI_Comm interComm ;
243            int tag = 0 ;
244            MPI_Intercomm_create(commSelf_, 0, interCommMerged_, clientSize+rank, tag, &interComm) ;
245            MPI_Intercomm_merge(interComm, false, &winComm_[rank]) ;
246            MPI_Comm_free(&interComm) ;
247             
248            buffer->attachWindows(winComm_[rank]) ;
249            CXios::getMpiGarbageCollector().registerCommunicator(winComm_[rank]) ;
250            MPI_Barrier(winComm_[rank]) ;
251            ret = true ;
252          }
253          else ret=false ;
254        }
255      }
256      return ret ;
257    }
258
259
260   void CLegacyContextClient::eventLoop(void)
261   {
262      if (!locked_) checkBuffers() ;
263   }
264
265   void CLegacyContextClient::callGlobalEventLoop(void)
266   {
267     locked_=true ;
268     context_->yield() ;
269     locked_=false ;
270   }
271
272   void CLegacyContextClient::yield(void)
273   {
274     locked_=true ;
275     context_->yield() ;
276     locked_=false ;
277   }
278
279   void CLegacyContextClient::synchronize(void)
280   {
281     if (context_->getServiceType()!=CServicesManager::CLIENT)
282     {
283       locked_=true ;
284       context_->synchronize() ;
285       locked_=false ;
286     }   
287   }
288   /*!
289   Make a new buffer for a certain connection to server with specific rank
290   \param [in] rank rank of connected server
291   */
292   void CLegacyContextClient::newBuffer(int rank)
293   {
294      if (!mapBufferSize_.count(rank))
295      {
296        error(0) << "WARNING: Unexpected request for buffer to communicate with server " << rank << std::endl;
297        mapBufferSize_[rank] = CXios::minBufferSize;
298        maxEventSizes[rank] = CXios::minBufferSize;
299      }
300      bool hasWindows = true ;
301      CClientBuffer* buffer = buffers[rank] = new CClientBuffer(interCommMerged_, clientSize+rank, mapBufferSize_[rank], hasWindows);
302      if (isGrowableBuffer_) buffer->setGrowableBuffer(1.2) ;
303      else buffer->fixBuffer() ;
304      // Notify the server
305     
306      CBufferOut* bufOut = buffer->getBuffer(0, 4*sizeof(MPI_Aint));
307      MPI_Aint sendBuff[4] ;
308      sendBuff[0]=hashId_;
309      sendBuff[1]=mapBufferSize_[rank];
310      sendBuff[2]=buffers[rank]->getWinBufferAddress(0); 
311      sendBuff[3]=buffers[rank]->getWinBufferAddress(1); 
312      info(100)<<"CLegacyContextClient::newBuffer : rank "<<rank<<" winAdress[0] "<<buffers[rank]->getWinBufferAddress(0)<<" winAdress[1] "<<buffers[rank]->getWinBufferAddress(1)<<endl;
313      bufOut->put(sendBuff,4); 
314      buffer->checkBuffer(true);
315
316   }
317
318 
319 
320   /*!
321   Verify state of buffers. Buffer is under pending state if there is no message on it
322   \return state of buffers, pending(true), ready(false)
323   */
324   bool CLegacyContextClient::checkBuffers(void)
325   {
326      map<int,CClientBuffer*>::iterator itBuff;
327      bool pending = false;
328      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
329        pending |= itBuff->second->checkBuffer(!pureOneSided);
330      return pending;
331   }
332
333   //! Release all buffers
334   void CLegacyContextClient::releaseBuffers()
335   {
336      map<int,CClientBuffer*>::iterator itBuff;
337      for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
338      {
339         delete itBuff->second;
340      }
341      buffers.clear();
342
343      for(auto& it : winComm_)
344      {
345        int rank = it.first ;
346      }
347   }
348
349     
350  /*!
351   Lock the buffers for one sided communications
352   \param [in] ranks list rank of server to which client connects to
353   */
354   void CLegacyContextClient::lockBuffers(list<int>& ranks)
355   {
356      list<int>::iterator it;
357      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->lockBuffer();
358   }
359
360  /*!
361   Unlock the buffers for one sided communications
362   \param [in] ranks list rank of server to which client connects to
363   */
364   void CLegacyContextClient::unlockBuffers(list<int>& ranks)
365   {
366      list<int>::iterator it;
367      for (it = ranks.begin(); it != ranks.end(); it++) buffers[*it]->unlockBuffer();
368   }
369     
370   /*!
371   Verify state of buffers corresponding to a connection
372   \param [in] ranks list rank of server to which client connects to
373   \return state of buffers, pending(true), ready(false)
374   */
375   bool CLegacyContextClient::checkBuffers(list<int>& ranks)
376   {
377      list<int>::iterator it;
378      bool pending = false;
379      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->checkBuffer(!pureOneSided);
380      return pending;
381   }
382
383   /*!
384    * Set the buffer size for each connection. Warning: This function is collective.
385    *
386    * \param [in] mapSize maps the rank of the connected servers to the size of the correspoinding buffer
387    * \param [in] maxEventSize maps the rank of the connected servers to the size of the biggest event
388   */
389   void CLegacyContextClient::setBufferSize(const std::map<int,StdSize>& mapSize)
390   {
391     setFixedBuffer() ;
392     for(auto& it : mapSize)
393     {
394      size_t size=std::max(CXios::minBufferSize*1.0,std::min(it.second*CXios::bufferSizeFactor*1.01,CXios::maxBufferSize*1.0)) ;
395      mapBufferSize_[it.first]=size ;
396      if (buffers.count(it.first)>0) buffers[it.first]->fixBufferSize(size);
397     }
398   }
399
400   /*!
401   * Finalize context client and do some reports. Function is non-blocking.
402   */
403  void CLegacyContextClient::finalize(void)
404  {
405    map<int,CClientBuffer*>::iterator itBuff;
406    std::list<int>::iterator ItServerLeader; 
407   
408    bool stop = false;
409
410    int* nbServerConnectionLocal  = new int[serverSize] ;
411    int* nbServerConnectionGlobal  = new int[serverSize] ;
412    for(int i=0;i<serverSize;++i) nbServerConnectionLocal[i]=0 ;
413    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)  nbServerConnectionLocal[itBuff->first]=1 ;
414    for (ItServerLeader = ranksServerLeader.begin(); ItServerLeader != ranksServerLeader.end(); ItServerLeader++)  nbServerConnectionLocal[*ItServerLeader]=1 ;
415   
416    MPI_Allreduce(nbServerConnectionLocal, nbServerConnectionGlobal, serverSize, MPI_INT, MPI_SUM, intraComm);
417   
418    CEventClient event(CContext::GetType(), CContext::EVENT_ID_CONTEXT_FINALIZE);
419    CMessage msg;
420
421    for (int i=0;i<serverSize;++i) if (nbServerConnectionLocal[i]==1) event.push(i, nbServerConnectionGlobal[i], msg) ;
422    sendEvent(event);
423
424    delete[] nbServerConnectionLocal ;
425    delete[] nbServerConnectionGlobal ;
426
427
428    CTimer::get("Blocking time").resume();
429    checkBuffers();
430    CTimer::get("Blocking time").suspend();
431
432    std::map<int,StdSize>::const_iterator itbMap = mapBufferSize_.begin(),
433                                          iteMap = mapBufferSize_.end(), itMap;
434
435    StdSize totalBuf = 0;
436    for (itMap = itbMap; itMap != iteMap; ++itMap)
437    {
438      report(10) << " Memory report : Context <" << context_->getId() << "> : client side : memory used for buffer of each connection to server" << endl
439                 << "  +) To server with rank " << itMap->first << " : " << itMap->second << " bytes " << endl;
440      totalBuf += itMap->second;
441    }
442    report(0) << " Memory report : Context <" << context_->getId() << "> : client side : total memory used for buffer " << totalBuf << " bytes" << endl;
443
444  }
445
446
447  /*!
448  */
449  bool CLegacyContextClient::havePendingRequests(void)
450  {
451    bool pending = false;
452    map<int,CClientBuffer*>::iterator itBuff;
453    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
454      pending |= itBuff->second->hasPendingRequest();
455    return pending;
456  }
457 
458  bool CLegacyContextClient::havePendingRequests(list<int>& ranks)
459  {
460      list<int>::iterator it;
461      bool pending = false;
462      for (it = ranks.begin(); it != ranks.end(); it++) pending |= buffers[*it]->hasPendingRequest();
463      return pending;
464  }
465
466  bool CLegacyContextClient::isNotifiedFinalized(void)
467  {
468    bool finalized = true;
469    map<int,CClientBuffer*>::iterator itBuff;
470    for (itBuff = buffers.begin(); itBuff != buffers.end(); itBuff++)
471      finalized &= itBuff->second->isNotifiedFinalized();
472    return finalized;
473  }
474
475}
Note: See TracBrowser for help on using the repository browser.