[492] | 1 | #include "event_scheduler.hpp" |
---|
[591] | 2 | #include "xios_spl.hpp" |
---|
[492] | 3 | #include "mpi.hpp" |
---|
[1224] | 4 | #include "tracer.hpp" |
---|
[2564] | 5 | #include "cxios.hpp" |
---|
[492] | 6 | |
---|
| 7 | namespace xios |
---|
| 8 | { |
---|
| 9 | |
---|
| 10 | |
---|
[1639] | 11 | CEventScheduler::CEventScheduler(const MPI_Comm& comm) |
---|
[492] | 12 | { |
---|
[2522] | 13 | schedulerLevel_=0 ; |
---|
| 14 | parentScheduler_.reset(); |
---|
| 15 | childScheduler_.reset(); |
---|
| 16 | initialize(comm) ; |
---|
| 17 | } |
---|
| 18 | |
---|
| 19 | CEventScheduler::CEventScheduler(const MPI_Comm& comm, size_t schedulerLevel) |
---|
| 20 | { |
---|
| 21 | schedulerLevel_=schedulerLevel ; |
---|
| 22 | parentScheduler_.reset(); |
---|
| 23 | childScheduler_.reset(); |
---|
| 24 | initialize(comm) ; |
---|
| 25 | } |
---|
| 26 | |
---|
| 27 | void CEventScheduler::initialize(const MPI_Comm& comm) |
---|
| 28 | { |
---|
[2518] | 29 | MPI_Comm_dup(comm, &communicator_) ; |
---|
[2564] | 30 | CXios::getMpiGarbageCollector().registerCommunicator(communicator_) ; |
---|
[2518] | 31 | MPI_Comm_size(communicator_,&mpiSize_) ; |
---|
| 32 | MPI_Comm_rank(communicator_,&mpiRank_); |
---|
[492] | 33 | |
---|
| 34 | |
---|
| 35 | int maxChild=1 ; |
---|
| 36 | |
---|
| 37 | int m ; |
---|
| 38 | do |
---|
| 39 | { |
---|
| 40 | m=1 ; |
---|
| 41 | maxChild=maxChild+1 ; |
---|
| 42 | for(int i=0;i<maxChild;i++) m=m*maxChild ; |
---|
[2518] | 43 | } while(m<mpiSize_) ; |
---|
[492] | 44 | |
---|
| 45 | |
---|
| 46 | int maxLevel=0 ; |
---|
[2518] | 47 | for(int size=1; size<=mpiSize_; size*=maxChild) maxLevel++ ; |
---|
[492] | 48 | |
---|
| 49 | int begin, end, nb ; |
---|
| 50 | int pos, n ; |
---|
| 51 | |
---|
[2518] | 52 | parent_=vector<int>(maxLevel+1) ; |
---|
| 53 | child_=vector<vector<int> >(maxLevel+1,vector<int>(maxChild)) ; |
---|
| 54 | nbChild_=vector<int> (maxLevel+1) ; |
---|
[492] | 55 | |
---|
[2518] | 56 | level_=0 ; |
---|
[492] | 57 | begin=0 ; |
---|
[2518] | 58 | end=mpiSize_-1 ; |
---|
[492] | 59 | nb=end-begin+1 ; |
---|
| 60 | |
---|
| 61 | do |
---|
| 62 | { |
---|
| 63 | n=0 ; |
---|
| 64 | pos=begin ; |
---|
[2518] | 65 | nbChild_[level_]=0 ; |
---|
| 66 | parent_[level_+1]=begin ; |
---|
[492] | 67 | for(int i=0;i<maxChild && i<nb ;i++) |
---|
| 68 | { |
---|
| 69 | if (i<nb%maxChild) n = nb/maxChild + 1 ; |
---|
| 70 | else n = nb/maxChild ; |
---|
| 71 | |
---|
[2518] | 72 | if (mpiRank_>=pos && mpiRank_<pos+n) |
---|
[492] | 73 | { |
---|
| 74 | begin=pos ; |
---|
| 75 | end=pos+n-1 ; |
---|
| 76 | } |
---|
[2518] | 77 | child_[level_][i]=pos ; |
---|
[492] | 78 | pos=pos+n ; |
---|
[2518] | 79 | nbChild_[level_]++ ; |
---|
[492] | 80 | } |
---|
| 81 | nb=end-begin+1 ; |
---|
[2518] | 82 | level_=level_+1 ; |
---|
[492] | 83 | } while (nb>1) ; |
---|
| 84 | |
---|
| 85 | |
---|
| 86 | } |
---|
| 87 | |
---|
| 88 | CEventScheduler::~CEventScheduler() |
---|
| 89 | { |
---|
[2518] | 90 | while (!pendingSentParentRequest_.empty() || !pendingRecvParentRequest_.empty() || !pendingRecvChildRequest_.empty() || !pendingSentChildRequest_.empty()) |
---|
[2274] | 91 | { |
---|
[2522] | 92 | checkEvent_() ; |
---|
[2274] | 93 | } |
---|
[492] | 94 | } |
---|
| 95 | |
---|
[2522] | 96 | void CEventScheduler::splitScheduler(const MPI_Comm& splittedComm, shared_ptr<CEventScheduler>& parent, shared_ptr<CEventScheduler>& child) |
---|
| 97 | { |
---|
| 98 | int color ; |
---|
| 99 | MPI_Comm newComm ; |
---|
| 100 | child = make_shared<CEventScheduler>(splittedComm, schedulerLevel_+ 1) ; |
---|
| 101 | if (child->isRoot()) color=1 ; |
---|
| 102 | else color=0 ; |
---|
| 103 | MPI_Comm_split(communicator_, color, mpiRank_, &newComm) ; |
---|
[2564] | 104 | CXios::getMpiGarbageCollector().registerCommunicator(newComm) ; |
---|
[2522] | 105 | |
---|
| 106 | parent = make_shared<CEventScheduler>(newComm , schedulerLevel_) ; |
---|
| 107 | child->setParentScheduler(parent) ; |
---|
| 108 | parent->setChildScheduler(child) ; |
---|
| 109 | if (parentScheduler_) |
---|
| 110 | { |
---|
| 111 | parentScheduler_->setChildScheduler(parent) ; |
---|
| 112 | parent->setParentScheduler(parentScheduler_) ; |
---|
| 113 | } |
---|
| 114 | |
---|
| 115 | } |
---|
| 116 | |
---|
[492] | 117 | void CEventScheduler::registerEvent(const size_t timeLine, const size_t contextHashId) |
---|
| 118 | { |
---|
[2522] | 119 | getBaseScheduler()->registerEvent(timeLine, contextHashId, schedulerLevel_) ; |
---|
| 120 | checkEvent_() ; |
---|
[492] | 121 | } |
---|
| 122 | |
---|
[2522] | 123 | void CEventScheduler::registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel) |
---|
[492] | 124 | { |
---|
[2522] | 125 | registerEvent(timeLine, contextHashId, schedulerLevel, level_) ; |
---|
| 126 | checkEvent_() ; |
---|
| 127 | } |
---|
| 128 | |
---|
| 129 | void CEventScheduler::registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel, const size_t lev) |
---|
| 130 | { |
---|
[492] | 131 | |
---|
[1224] | 132 | traceOff() ; |
---|
[492] | 133 | SPendingRequest* sentRequest=new SPendingRequest ; |
---|
| 134 | sentRequest->buffer[0]=timeLine ; |
---|
| 135 | sentRequest->buffer[1]=contextHashId ; |
---|
[2522] | 136 | sentRequest->buffer[2]=schedulerLevel ; |
---|
| 137 | sentRequest->buffer[3]=lev-1 ; |
---|
[492] | 138 | |
---|
[2518] | 139 | pendingSentParentRequest_.push(sentRequest) ; |
---|
[2522] | 140 | // info(100)<<"CEventScheduler::registerEvent => send event to parent "<<parent_[lev]<<" of level" <<lev-1<<endl ; |
---|
| 141 | MPI_Isend(sentRequest->buffer,4, MPI_UNSIGNED_LONG, parent_[lev], 0, communicator_, &sentRequest->request) ; |
---|
[1224] | 142 | traceOn() ; |
---|
[492] | 143 | } |
---|
| 144 | |
---|
[2522] | 145 | |
---|
| 146 | bool CEventScheduler::queryEvent_(const size_t timeLine, const size_t contextHashId) |
---|
[492] | 147 | { |
---|
[2522] | 148 | checkEvent_() ; |
---|
| 149 | |
---|
[2518] | 150 | if (! eventStack_.empty() && eventStack_.front().first==timeLine && eventStack_.front().second==contextHashId) |
---|
[492] | 151 | { |
---|
| 152 | return true ; |
---|
| 153 | } |
---|
| 154 | else return false ; |
---|
| 155 | } |
---|
[2230] | 156 | |
---|
[2522] | 157 | void CEventScheduler::checkEvent_(void) |
---|
[492] | 158 | { |
---|
[2522] | 159 | |
---|
| 160 | if (parentScheduler_) parentScheduler_->checkEvent_() ; |
---|
[1224] | 161 | traceOff() ; |
---|
[492] | 162 | checkChildRequest() ; |
---|
| 163 | checkParentRequest() ; |
---|
[1224] | 164 | traceOn() ; |
---|
[492] | 165 | |
---|
| 166 | } |
---|
| 167 | |
---|
| 168 | void CEventScheduler::checkParentRequest(void) |
---|
| 169 | { |
---|
| 170 | int completed ; |
---|
[1639] | 171 | MPI_Status status ; |
---|
[492] | 172 | int received ; |
---|
| 173 | SPendingRequest* recvRequest ; |
---|
| 174 | completed=true ; |
---|
| 175 | |
---|
| 176 | // check sent request to parent |
---|
[2518] | 177 | while (! pendingSentParentRequest_.empty() && completed) |
---|
[492] | 178 | { |
---|
[2518] | 179 | MPI_Test( & pendingSentParentRequest_.front()->request, &completed, &status) ; |
---|
[492] | 180 | if (completed) |
---|
| 181 | { |
---|
[2518] | 182 | delete pendingSentParentRequest_.front() ; |
---|
| 183 | pendingSentParentRequest_.pop() ; |
---|
[492] | 184 | } |
---|
| 185 | } |
---|
| 186 | |
---|
| 187 | // probe if a message is coming from parent |
---|
| 188 | received=true ; |
---|
| 189 | while(received) |
---|
| 190 | { |
---|
[2518] | 191 | MPI_Iprobe(MPI_ANY_SOURCE,1,communicator_,&received, &status) ; |
---|
[492] | 192 | if (received) |
---|
| 193 | { |
---|
| 194 | recvRequest=new SPendingRequest ; |
---|
[2522] | 195 | MPI_Irecv(recvRequest->buffer, 4, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 1, communicator_, &(recvRequest->request)) ; |
---|
[2518] | 196 | pendingRecvParentRequest_.push(recvRequest) ; |
---|
[492] | 197 | } |
---|
| 198 | } |
---|
| 199 | |
---|
| 200 | // check sent request from parent |
---|
| 201 | completed=true ; |
---|
[2518] | 202 | while (! pendingRecvParentRequest_.empty() && completed) |
---|
[492] | 203 | { |
---|
[2518] | 204 | recvRequest=pendingRecvParentRequest_.front() ; |
---|
[1639] | 205 | MPI_Test( &(recvRequest->request), &completed, &status) ; |
---|
[2522] | 206 | |
---|
[492] | 207 | if (completed) |
---|
| 208 | { |
---|
| 209 | size_t timeLine=recvRequest->buffer[0] ; |
---|
| 210 | size_t hashId=recvRequest->buffer[1] ; |
---|
[2522] | 211 | size_t schedulerLevel=recvRequest->buffer[2] ; |
---|
| 212 | size_t lev=recvRequest->buffer[3] ; |
---|
[1158] | 213 | delete recvRequest ; |
---|
[2518] | 214 | pendingRecvParentRequest_.pop() ; |
---|
[2522] | 215 | |
---|
| 216 | // info(100)<<"CEventScheduler::checkParentRequest => receive event from parent "<< status.MPI_SOURCE<<"at level"<< lev<< endl ; |
---|
| 217 | |
---|
| 218 | if (lev==level_) |
---|
| 219 | { |
---|
| 220 | if (childScheduler_) |
---|
| 221 | { |
---|
| 222 | // info(100)<<"CEventScheduler::checkParentRequest => bcast event to child scheduler "<<endl; |
---|
| 223 | childScheduler_->bcastEvent(timeLine, hashId, schedulerLevel, 0) ; |
---|
| 224 | } |
---|
| 225 | else |
---|
| 226 | { |
---|
| 227 | // info(100)<<"CEventScheduler::checkParentRequest => put event to stack : timeLine : "<<timeLine<<" hashId : "<<hashId<<endl; |
---|
| 228 | eventStack_.push(pair<size_t,size_t>(timeLine,hashId)) ; |
---|
| 229 | } |
---|
| 230 | } |
---|
| 231 | else |
---|
| 232 | { |
---|
| 233 | // info(100)<<"CEventScheduler::checkParentRequest => bcast event to child process "<<endl; |
---|
| 234 | bcastEvent(timeLine, hashId, schedulerLevel, lev) ; |
---|
| 235 | } |
---|
[492] | 236 | } |
---|
| 237 | } |
---|
| 238 | |
---|
| 239 | } |
---|
| 240 | |
---|
| 241 | void CEventScheduler::checkChildRequest(void) |
---|
| 242 | { |
---|
| 243 | // function call only by parent mpi process |
---|
| 244 | |
---|
[1639] | 245 | MPI_Status status ; |
---|
[492] | 246 | int received ; |
---|
| 247 | received=true ; |
---|
| 248 | SPendingRequest* recvRequest ; |
---|
| 249 | |
---|
| 250 | // check for posted requests and make the corresponding receive |
---|
| 251 | while(received) |
---|
| 252 | { |
---|
[2518] | 253 | MPI_Iprobe(MPI_ANY_SOURCE,0,communicator_,&received, &status) ; |
---|
[492] | 254 | if (received) |
---|
| 255 | { |
---|
| 256 | recvRequest=new SPendingRequest ; |
---|
[2522] | 257 | MPI_Irecv(recvRequest->buffer, 4, MPI_UNSIGNED_LONG, MPI_ANY_SOURCE, 0, communicator_, &recvRequest->request) ; |
---|
[2518] | 258 | pendingRecvChildRequest_.push_back(recvRequest) ; |
---|
[492] | 259 | } |
---|
| 260 | } |
---|
| 261 | |
---|
| 262 | // check if receive request is achieved |
---|
| 263 | |
---|
[2518] | 264 | for(list<SPendingRequest*>::iterator it=pendingRecvChildRequest_.begin(); it!=pendingRecvChildRequest_.end() ; ) |
---|
[492] | 265 | { |
---|
[1639] | 266 | MPI_Test(&((*it)->request),&received,&status) ; |
---|
[492] | 267 | if (received) |
---|
| 268 | { |
---|
| 269 | size_t timeLine=(*it)->buffer[0] ; |
---|
| 270 | size_t hashId=(*it)->buffer[1] ; |
---|
[2522] | 271 | size_t schedulerLevel=(*it)->buffer[2] ; |
---|
| 272 | size_t lev=(*it)->buffer[3] ; |
---|
[492] | 273 | |
---|
[2522] | 274 | // info(100)<<"CEventScheduler::checkChildRequest => received event from child "<<status.MPI_SOURCE<<" at level "<<lev<<endl; |
---|
| 275 | |
---|
| 276 | SEvent event={timeLine, hashId, schedulerLevel, lev} ; |
---|
[492] | 277 | delete *it ; // free mem |
---|
[2518] | 278 | it=pendingRecvChildRequest_.erase(it) ; // get out of the list |
---|
[492] | 279 | |
---|
[2518] | 280 | map< SEvent,int>::iterator itEvent=recvEvent_.find(event) ; |
---|
| 281 | if (itEvent==recvEvent_.end()) |
---|
[492] | 282 | { |
---|
[2518] | 283 | itEvent=(recvEvent_.insert(pair< SEvent ,int > (event,1))).first ; |
---|
[492] | 284 | |
---|
| 285 | } |
---|
| 286 | else (itEvent->second)++ ; |
---|
[2518] | 287 | if (itEvent->second==nbChild_[lev]) |
---|
[492] | 288 | { |
---|
| 289 | if (lev==0) |
---|
| 290 | { |
---|
[2522] | 291 | if (schedulerLevel==schedulerLevel_) |
---|
| 292 | { |
---|
| 293 | // info(100)<<"CEventScheduler::checkChildRequest => bcastEvent to child"<<endl ; |
---|
| 294 | bcastEvent(timeLine, hashId, schedulerLevel, lev) ; |
---|
| 295 | } |
---|
| 296 | else |
---|
| 297 | { |
---|
| 298 | // info(100)<<"CEventScheduler::checkChildRequest => register event to parent scheduler"<<endl ; |
---|
| 299 | parentScheduler_->registerEvent(timeLine, hashId, schedulerLevel) ; |
---|
| 300 | } |
---|
[2518] | 301 | recvEvent_.erase(itEvent) ; |
---|
[492] | 302 | } |
---|
| 303 | else |
---|
| 304 | { |
---|
[2522] | 305 | // info(100)<<"CEventScheduler::checkChildRequest => register event to parent process"<<endl ; |
---|
| 306 | registerEvent( timeLine,hashId, schedulerLevel, lev) ; |
---|
[2518] | 307 | recvEvent_.erase(itEvent) ; |
---|
[492] | 308 | } |
---|
| 309 | } |
---|
| 310 | } |
---|
| 311 | else ++it ; |
---|
| 312 | } |
---|
| 313 | |
---|
| 314 | // check if bcast request is achieved |
---|
| 315 | |
---|
[2518] | 316 | for(list<SPendingRequest*>::iterator it=pendingSentChildRequest_.begin(); it!=pendingSentChildRequest_.end() ; ) |
---|
[492] | 317 | { |
---|
[1639] | 318 | MPI_Test(&(*it)->request,&received,&status) ; |
---|
[492] | 319 | if (received) |
---|
| 320 | { |
---|
| 321 | delete *it ; // free memory |
---|
[2518] | 322 | it = pendingSentChildRequest_.erase(it) ; // get out of the list |
---|
[492] | 323 | |
---|
| 324 | } |
---|
| 325 | else ++it ; |
---|
| 326 | |
---|
| 327 | } |
---|
| 328 | } |
---|
| 329 | |
---|
[2522] | 330 | void CEventScheduler::bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel, const size_t lev) |
---|
[492] | 331 | { |
---|
| 332 | SPendingRequest* sentRequest ; |
---|
| 333 | |
---|
| 334 | |
---|
[2518] | 335 | for(int i=0; i<nbChild_[lev];i++) |
---|
[492] | 336 | { |
---|
| 337 | sentRequest=new SPendingRequest ; |
---|
| 338 | sentRequest->buffer[0]=timeLine ; |
---|
| 339 | sentRequest->buffer[1]=contextHashId ; |
---|
[2522] | 340 | sentRequest->buffer[2]=schedulerLevel ; |
---|
| 341 | sentRequest->buffer[3]=lev+1 ; |
---|
| 342 | MPI_Isend(sentRequest->buffer,4, MPI_UNSIGNED_LONG, child_[lev][i], 1, communicator_, & sentRequest->request) ; |
---|
[2518] | 343 | pendingSentChildRequest_.push_back(sentRequest) ; |
---|
[492] | 344 | } |
---|
| 345 | } |
---|
| 346 | |
---|
| 347 | |
---|
| 348 | } |
---|