source: XIOS3/trunk/src/event_scheduler.hpp @ 2554

Last change on this file since 2554 was 2522, checked in by ymipsl, 12 months ago

Improvment of event scheduler. Now a hierachical approach make possible event scheduling accross different process groups, if a parent group of process is totally overlapping a child group of process.
YM

  • Property copyright set to
    Software name : XIOS (Xml I/O Server)
    http://forge.ipsl.jussieu.fr/ioserver
    Creation date : January 2009
    Licence : CeCCIL version2
    see license file in root directory : Licence_CeCILL_V2-en.txt
    or http://www.cecill.info/licences/Licence_CeCILL_V2-en.html
    Holder : CEA/LSCE (Laboratoire des Sciences du CLimat et de l'Environnement)
    CNRS/IPSL (Institut Pierre Simon Laplace)
    Project Manager : Yann Meurdesoif
    yann.meurdesoif@cea.fr
File size: 9.3 KB
Line 
1#ifndef __EVENT_SCHEDULER_HPP__
2#define __EVENT_SCHEDULER_HPP__
3
4#include "xios_spl.hpp"
5#include "mpi.hpp"
6
7namespace xios
8{
9
10    //!  Event scheduling class. An instance of this class is used to order the event providing from different context to avoid dead lock.
11    /*!
12     *   Event are ordered in a same context using the timeLine id, so each server will process the same event. But between different
13     *   context, events are not scheduled and servers may choose to process different events and deadlock or MPI crash may occurs if
14     *   collective MPI communication are involved by the events.
15     *   This class solve the problem by scheduling the event and choose which event must be process by each server to insure correct
16     *   synchronisation. Information is send by asynchronous MPI communication to the root process that order the different events
17     *   (First In First Out) and brodcast the information to the other servers. To avoid to much incoming communication for the root
18     *   process, and hierachical tree is used for communicating from a limited number of child processes to the parent. 
19     */
20   
21    class CEventScheduler
22    {
23       public:
24       //!  Constructor
25       /*! A new communicator is created by duplicate comm. The communicating tree hierarchy is created.
26        *  @param[in] comm : MPI communicator du duplicate for internal use
27        */
28       CEventScheduler(const MPI_Comm& comm) ;
29       CEventScheduler(const MPI_Comm& comm, size_t schedulerLevel) ;
30
31       //! Destructor
32       ~CEventScheduler() ;
33
34
35
36       //! public interface for registring an event from the server
37       /*!
38        *  @param[in] timeLine : Time line id of the event
39        *  @param[in] contextHashId : Hashed id of the context
40        */
41       void registerEvent(const size_t timeLine, const size_t contextHashId) ;
42       
43       private:
44       CEventScheduler* getBaseScheduler(void) { if (childScheduler_== nullptr) return this; else return childScheduler_->getBaseScheduler();}
45
46       public:
47       //! public interface for query if the event defined by timeLine and hashId is sheduled next
48       /*!
49        *  @param[in] timeLine : Time line id of the event
50        *  @param[in] contextHasId : Hashed id of the context
51        *  @return  : boolean value, true is the event is scheduled next
52        *
53        *  If the event is scheduled next, it is remove from the `eventStack` queue list 
54        */   
55       bool queryEvent(const size_t timeLine, const size_t contextHashId) { return getBaseScheduler()->queryEvent_(timeLine, contextHashId); }
56       bool queryEvent_(const size_t timeLine, const size_t contextHashId) ;
57       void popEvent() { getBaseScheduler()->popEvent_() ; }
58       void popEvent_() { eventStack_.pop() ; }
59       bool isRoot(void) { return parent_[0]==mpiRank_ ;}
60       void setParentScheduler(shared_ptr<CEventScheduler> parentScheduler) { parentScheduler_ = parentScheduler ;}
61       void setChildScheduler(shared_ptr<CEventScheduler> childScheduler) { childScheduler_ = childScheduler ;}
62       void splitScheduler(const MPI_Comm& splittedComm, shared_ptr<CEventScheduler>& parent, shared_ptr<CEventScheduler>& child) ;
63
64       //! Public interface to give the hand to the instance to check pending or incoming message.
65       /*!
66        * Must be called periodicaly. Call `checkParentRequest` and `checkChildRequest` private method.
67        */
68       void checkEvent(void) { getBaseScheduler()->checkEvent_(); } 
69       void checkEvent_(void) ;
70
71       private:
72         void initialize(const MPI_Comm& comm) ;
73       
74       //! Send an event to the parent of level `lev+1`
75       /*!
76        *  @param[in] timeLine : Time line id of the event
77        *  @param[in] contextHasId : Hashed id of the context
78        *  @param[in] lev : actual level of the child in the hierarchy
79        *  The event is sent by an asynchrounous MPI_ISend
80        */
81     
82       void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel) ;
83       void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel, const size_t lev) ;
84
85
86
87       //! Children side. Check potential incoming message and if pending request are completed
88       /*!
89        *  - Check by `MPI_Test` if pending request sent to parents are complete.
90        *  - Probe incoming message from parent by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`
91        *  - Check by `MPI_Test` if pending received requests are complete. if yes :
92        *    + Broadcast the event to the childrens if is also a parent
93        *    + Otherwise : push the incomming event in the `eventStack` queue.
94        */
95       void checkParentRequest(void) ;
96
97
98
99       //! Parent side. Check potential incoming message and if pending request are completed
100       /*!
101        *  - Probe incoming message from chidren by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`.
102        *  - Check pending received event request from children using `MPI_Probe`. If and event is received, it is incerted in the
103        *    map `recvEvent` which is increased by 1. If the number of request received from children for this event is equal to the number
104        *    of children then :
105        *    + if the event level is 0, bcast the event to the children.
106        *    + else send the event to the parent.
107        *  - Check pending sent event request to children using `MPI_TEST` and if complete release the corresponding buffer
108        */
109       void checkChildRequest(void) ;
110
111
112
113       //! Parent side. Broadcast a received event from the parent to the children.
114       /*!
115        *  @param[in] timeLine : Time line id of the event
116        *  @param[in] contextHasId : Hashed id of the context
117        *  @param[in] lev : actual level of the child in the hierarchy
118        * Asynchronus MPI_ISend is used.
119        */
120       void bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t schedulerLevel ,const size_t lev) ;
121       
122
123
124
125       //! Structure defining an event, composed of the timeLine, the context hashId and the hierachical level of the communication.
126       struct SEvent
127       {
128         size_t timeLine ; /*!< Time line id of the event in the context */
129         size_t hashId ; /*!< hassh id of the context */
130         size_t schedulerLevel ; /*!< hierarchical level of scherduler */
131         size_t level ;  /*!<hierarchical level of the communication*/
132
133         //! Definition of the == operator : needed to order the object in a map container
134         /*!
135            @param[in] e : object to compare with
136            @return : boolean result of the comparison
137         */
138         bool operator==(const SEvent& e) const
139         { 
140           if (timeLine == e.timeLine && hashId == e.hashId && level==e.level && schedulerLevel==e.schedulerLevel) return true ;
141           else return false ;
142         } ;
143       
144
145         //! Definition of the < operator : needed to order the object in a map container
146         /*!
147            @param[in] e : object to compare with
148            @return : boolean result of the comparison
149         */
150
151         bool operator<(const SEvent& e) const
152         { 
153           if (timeLine < e.timeLine) return true ;
154           else if (timeLine == e.timeLine && hashId < e.hashId) return true ;
155           else if (timeLine == e.timeLine && hashId == e.hashId && schedulerLevel<e.schedulerLevel) return true ;
156           else if (timeLine == e.timeLine && hashId == e.hashId && schedulerLevel==e.schedulerLevel && level<e.level) return true ;
157           else return false ;
158         } ;
159       } ;       
160       
161       //! Pending request struture. It keep send or receive buffer from asynchronous communication while the request is not complete.
162       struct SPendingRequest
163       {
164         size_t buffer[4] ;      /*!< communication buffer : timeLine, hashId, level */
165         MPI_Request request ;   /*!< pending MPI request */ 
166       } ;
167       
168       MPI_Comm communicator_ ;  /*!< Internal MPI communicator */ 
169       int mpiRank_ ;            /*!< Rank in the communicator */
170       int mpiSize_ ;            /*!< Size of the communicator */
171 
172       queue< pair<size_t, size_t> > eventStack_ ;         
173       queue<SPendingRequest* > pendingSentParentRequest_ ;   /*!< Pending request sent to parent   */
174       queue<SPendingRequest*>  pendingRecvParentRequest_ ;   /*!< Pending request recv from parent */   
175       list<SPendingRequest* >  pendingRecvChildRequest_ ;    /*!< Pending request recv from child  */
176       list<SPendingRequest*>   pendingSentChildRequest_ ;    /*!< Pending request sent to child    */
177       map< SEvent, int > recvEvent_ ;                        /*!< list of event received from children. Contains the currnet number children that have already post the same event */
178       
179       
180       int level_ ;                   /*!< Number of hierachical level for communication */
181       vector<int> parent_ ;          /*!< Parent rank for each level */ 
182       vector<vector<int> >  child_ ; /*!< List of child rank for each level */
183       vector<int> nbChild_ ;         /*!< Number of child for each level */   
184       
185       shared_ptr<CEventScheduler> parentScheduler_ ;
186       shared_ptr<CEventScheduler> childScheduler_ ;
187       bool hasParentScheduler_=false ;
188       size_t schedulerLevel_ ;
189
190    } ;
191}
192
193#endif
Note: See TracBrowser for help on using the repository browser.