Changeset 2324


Ignore:
Timestamp:
04/15/22 13:05:33 (2 years ago)
Author:
ymipsl
Message:

Solve deadlock or crash occuring when activate second levels of servers.

YM

Location:
XIOS/dev/dev_ym/XIOS_COUPLING/src
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • XIOS/dev/dev_ym/XIOS_COUPLING/src/buffer_client.cpp

    r2298 r2324  
    144144    if (hasWindows) 
    145145    { 
     146      if (winState[current]==true) ERROR("CClientBuffer::lockBuffer(void)",<<"Try lo lock client buffer but winState said it is already locked") ; 
    146147      MPI_Win_lock(MPI_LOCK_EXCLUSIVE,clientRank_, 0, windows_[current]) ; 
    147148      winState[current]=true ; 
     
    155156    if (hasWindows) 
    156157    { 
     158      if (winState[current]==false) ERROR("CClientBuffer::lockBuffer(void)",<<"Try lo unlock client buffer but winState said it is already unlocked") ; 
    157159      MPI_Win_unlock(clientRank_, windows_[current]) ; 
    158160      winState[current]=false ; 
  • XIOS/dev/dev_ym/XIOS_COUPLING/src/context_client.cpp

    r2310 r2324  
    175175      if (isAttachedModeEnabled()) // couldBuffer is always true in attached mode 
    176176      { 
    177         while (checkBuffers(ranks)) context_->globalEventLoop() ; 
     177        while (checkBuffers(ranks)) callGlobalEventLoop() ; 
    178178       
    179179        CXios::getDaemonsManager()->scheduleContext(hashId_) ; 
    180         while (CXios::getDaemonsManager()->isScheduledContext(hashId_)) context_->globalEventLoop() ; 
     180        while (CXios::getDaemonsManager()->isScheduledContext(hashId_)) callGlobalEventLoop() ; 
    181181      } 
    182182       
     
    287287          checkBuffers(); 
    288288 
    289           context_->globalEventLoop() ; 
     289          callGlobalEventLoop() ; 
    290290        } 
    291291 
     
    301301   } 
    302302 
     303   void CContextClient::eventLoop(void) 
     304   { 
     305      if (!locked_) checkBuffers() ; 
     306   } 
     307 
     308   void CContextClient::callGlobalEventLoop(void) 
     309   { 
     310     locked_=true ; 
     311     context_->globalEventLoop() ; 
     312     locked_=false ; 
     313   } 
    303314   /*! 
    304315   Make a new buffer for a certain connection to server with specific rank 
  • XIOS/dev/dev_ym/XIOS_COUPLING/src/context_client.hpp

    r2260 r2324  
    3939      bool checkBuffers(list<int>& ranks); 
    4040      bool checkBuffers(void); 
     41      void eventLoop(void) ; 
     42      void callGlobalEventLoop() ; 
    4143      void releaseBuffers(void); 
    4244      bool havePendingRequests(void); 
     
    129131 
    130132      double latency_=0e-2 ; 
     133 
     134      bool locked_ = false ; //!< The context client is locked to avoid recursive checkBuffer 
    131135  }; 
    132136} 
  • XIOS/dev/dev_ym/XIOS_COUPLING/src/node/context.cpp

    r2321 r2324  
    684684    setCurrent(getId()) ; 
    685685 
    686     if (client!=nullptr && !finalized) client->checkBuffers(); 
     686    if (client!=nullptr && !finalized) client->eventLoop(); 
    687687     
    688688    for (int i = 0; i < clientPrimServer.size(); ++i) 
    689689    { 
    690       if (!finalized) clientPrimServer[i]->checkBuffers(); 
     690      if (!finalized) clientPrimServer[i]->eventLoop(); 
    691691      if (!finalized) finished &= serverPrimServer[i]->eventLoop(enableEventsProcessing); 
    692692    } 
    693693 
    694694    for (auto couplerOut : couplerOutClient_) 
    695       if (!finalized) couplerOut.second->checkBuffers(); 
     695      if (!finalized) couplerOut.second->eventLoop(); 
    696696     
    697697    for (auto couplerIn : couplerInClient_) 
    698       if (!finalized) couplerIn.second->checkBuffers(); 
     698      if (!finalized) couplerIn.second->eventLoop(); 
    699699     
    700700    for (auto couplerOut : couplerOutServer_) 
     
    788788        client->finalize(); 
    789789        info(100)<<"DEBUG: context "<<getId()<<" Client finalize sent"<<endl ; 
    790         while (client->havePendingRequests()) client->checkBuffers(); 
     790        while (client->havePendingRequests()) client->eventLoop(); 
    791791        info(100)<<"DEBUG: context "<<getId()<<" no pending request ok"<<endl ; 
    792792        bool notifiedFinalized=false ; 
     
    808808           do 
    809809           { 
    810              clientPrimServer[i]->checkBuffers(); 
     810             clientPrimServer[i]->eventLoop(); 
    811811             bufferReleased = !clientPrimServer[i]->havePendingRequests(); 
    812812           } while (!bufferReleased); 
     
    10991099    { 
    11001100      for(auto field : fileOutField) slaveServers_.insert(field->getContextClient()) ;  
    1101       for(auto field : fileInField)  slaveServers_.insert(field->getContextClient()) ;   
    11021101    } 
    11031102 
     
    17091708   TRY 
    17101709   { 
    1711      CEventClient event(getType(),EVENT_ID_UPDATE_CALENDAR); 
    17121710     for(auto client : slaveServers_)  
    17131711     { 
     1712       CEventClient event(getType(),EVENT_ID_UPDATE_CALENDAR); 
    17141713       if (client->isServerLeader()) 
    17151714       { 
Note: See TracChangeset for help on using the changeset viewer.