1 | #ifndef __EVENT_SCHEDULER_HPP__ |
---|
2 | #define __EVENT_SCHEDULER_HPP__ |
---|
3 | |
---|
4 | #include "xios_spl.hpp" |
---|
5 | #include "mpi.hpp" |
---|
6 | |
---|
7 | namespace xios |
---|
8 | { |
---|
9 | |
---|
10 | //! Event scheduling class. An instance of this class is used to order the event providing from different context to avoid dead lock. |
---|
11 | /*! |
---|
12 | * Event are ordered in a same context using the timeLine id, so each server will process the same event. But between different |
---|
13 | * context, events are not scheduled and servers may choose to process different events and deadlock or MPI crash may occurs if |
---|
14 | * collective MPI communication are involved by the events. |
---|
15 | * This class solve the problem by scheduling the event and choose which event must be process by each server to insure correct |
---|
16 | * synchronisation. Information is send by asynchronous MPI communication to the root process that order the different events |
---|
17 | * (First In First Out) and brodcast the information to the other servers. To avoid to much incoming communication for the root |
---|
18 | * process, and hierachical tree is used for communicating from a limited number of child processes to the parent. |
---|
19 | */ |
---|
20 | |
---|
21 | class CEventScheduler |
---|
22 | { |
---|
23 | public: |
---|
24 | //! Constructor |
---|
25 | /*! A new communicator is created by duplicate comm. The communicating tree hierarchy is created. |
---|
26 | * @param[in] comm : MPI communicator du duplicate for internal use |
---|
27 | */ |
---|
28 | CEventScheduler(const ep_lib::MPI_Comm& comm) ; |
---|
29 | |
---|
30 | |
---|
31 | //! Destructor |
---|
32 | ~CEventScheduler() ; |
---|
33 | |
---|
34 | |
---|
35 | |
---|
36 | //! public interface for registring an event from the server |
---|
37 | /*! |
---|
38 | * @param[in] timeLine : Time line id of the event |
---|
39 | * @param[in] contextHashId : Hashed id of the context |
---|
40 | */ |
---|
41 | void registerEvent(const size_t timeLine, const size_t contextHashId) ; |
---|
42 | |
---|
43 | |
---|
44 | |
---|
45 | //! public interface for query if the event defined by timeLine and hashId is sheduled next |
---|
46 | /*! |
---|
47 | * @param[in] timeLine : Time line id of the event |
---|
48 | * @param[in] contextHasId : Hashed id of the context |
---|
49 | * @return : boolean value, true is the event is scheduled next |
---|
50 | * |
---|
51 | * If the event is scheduled next, it is remove from the `eventStack` queue list |
---|
52 | */ |
---|
53 | bool queryEvent(const size_t timeLine, const size_t contextHashId) ; |
---|
54 | |
---|
55 | |
---|
56 | //! Public interface to give the hand to the instance to check pending or incoming message. |
---|
57 | /*! |
---|
58 | * Must be called periodicaly. Call `checkParentRequest` and `checkChildRequest` private method. |
---|
59 | */ |
---|
60 | void checkEvent(void) ; |
---|
61 | |
---|
62 | private: |
---|
63 | |
---|
64 | |
---|
65 | //! Send an event to the parent of level `lev+1` |
---|
66 | /*! |
---|
67 | * @param[in] timeLine : Time line id of the event |
---|
68 | * @param[in] contextHasId : Hashed id of the context |
---|
69 | * @param[in] lev : actual level of the child in the hierarchy |
---|
70 | * The event is sent by an asynchrounous MPI_ISend |
---|
71 | */ |
---|
72 | void registerEvent(const size_t timeLine, const size_t contextHashId, const size_t lev) ; |
---|
73 | |
---|
74 | |
---|
75 | |
---|
76 | //! Children side. Check potential incoming message and if pending request are completed |
---|
77 | /*! |
---|
78 | * - Check by `MPI_Test` if pending request sent to parents are complete. |
---|
79 | * - Probe incoming message from parent by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv` |
---|
80 | * - Check by `MPI_Test` if pending received requests are complete. if yes : |
---|
81 | * + Broadcast the event to the childrens if is also a parent |
---|
82 | * + Otherwise : push the incomming event in the `eventStack` queue. |
---|
83 | */ |
---|
84 | void checkParentRequest(void) ; |
---|
85 | |
---|
86 | |
---|
87 | |
---|
88 | //! Parent side. Check potential incoming message and if pending request are completed |
---|
89 | /*! |
---|
90 | * - Probe incoming message from chidren by using `MPI_Probe`. If yes, post an asynchronous reception by `MPI_IRecv`. |
---|
91 | * - Check pending received event request from children using `MPI_Probe`. If and event is received, it is incerted in the |
---|
92 | * map `recvEvent` which is increased by 1. If the number of request received from children for this event is equal to the number |
---|
93 | * of children then : |
---|
94 | * + if the event level is 0, bcast the event to the children. |
---|
95 | * + else send the event to the parent. |
---|
96 | * - Check pending sent event request to children using `MPI_TEST` and if complete release the corresponding buffer |
---|
97 | */ |
---|
98 | void checkChildRequest(void) ; |
---|
99 | |
---|
100 | |
---|
101 | |
---|
102 | //! Parent side. Broadcast a received event from the parent to the children. |
---|
103 | /*! |
---|
104 | * @param[in] timeLine : Time line id of the event |
---|
105 | * @param[in] contextHasId : Hashed id of the context |
---|
106 | * @param[in] lev : actual level of the child in the hierarchy |
---|
107 | * Asynchronus MPI_ISend is used. |
---|
108 | */ |
---|
109 | void bcastEvent(const size_t timeLine, const size_t contextHashId, const size_t lev) ; |
---|
110 | |
---|
111 | |
---|
112 | |
---|
113 | |
---|
114 | //! Structure defining an event, composed of the timeLine, the context hashId and the hierachical level of the communication. |
---|
115 | struct SEvent |
---|
116 | { |
---|
117 | size_t timeLine ; /*!< Time line id of the event in the context */ |
---|
118 | size_t hashId ; /*!< hassh id of the context */ |
---|
119 | size_t level ; /*!<hierarchical level of the communication*/ |
---|
120 | |
---|
121 | //! Definition of the == operator : needed to order the object in a map container |
---|
122 | /*! |
---|
123 | @param[in] e : object to compare with |
---|
124 | @return : boolean result of the comparison |
---|
125 | */ |
---|
126 | bool operator==(const SEvent& e) const |
---|
127 | { |
---|
128 | if (timeLine == e.timeLine && hashId == e.hashId && level==e.level) return true ; |
---|
129 | else return false ; |
---|
130 | } ; |
---|
131 | |
---|
132 | |
---|
133 | //! Definition of the < operator : needed to order the object in a map container |
---|
134 | /*! |
---|
135 | @param[in] e : object to compare with |
---|
136 | @return : boolean result of the comparison |
---|
137 | */ |
---|
138 | |
---|
139 | bool operator<(const SEvent& e) const |
---|
140 | { |
---|
141 | if (timeLine < e.timeLine) return true ; |
---|
142 | else if (timeLine == e.timeLine && hashId < e.hashId) return true ; |
---|
143 | else if (timeLine == e.timeLine && hashId == e.hashId && level<e.level) return true ; |
---|
144 | else return false ; |
---|
145 | } ; |
---|
146 | } ; |
---|
147 | |
---|
148 | |
---|
149 | //! Pending request struture. It keep send or receive buffer from asynchronous communication while the request is not complete. |
---|
150 | struct SPendingRequest |
---|
151 | { |
---|
152 | size_t buffer[3] ; /*!< communication buffer : timeLine, hashId, level */ |
---|
153 | ep_lib::MPI_Request request ; /*!< pending MPI request */ |
---|
154 | } ; |
---|
155 | |
---|
156 | ep_lib::MPI_Comm communicator ; /*!< Internal MPI communicator */ |
---|
157 | int mpiRank ; /*!< Rank in the communicator */ |
---|
158 | int mpiSize ; /*!< Size of the communicator */ |
---|
159 | |
---|
160 | queue< pair<size_t, size_t> > eventStack ; |
---|
161 | queue<SPendingRequest* > pendingSentParentRequest ; /*!< Pending request sent to parent */ |
---|
162 | queue<SPendingRequest*> pendingRecvParentRequest ; /*!< Pending request recv from parent */ |
---|
163 | list<SPendingRequest* > pendingRecvChildRequest ; /*!< Pending request recv from child */ |
---|
164 | list<SPendingRequest*> pendingSentChildRequest ; /*!< Pending request sent to child */ |
---|
165 | map< SEvent, int > recvEvent ; /*!< list of event received from children. Contains the currnet number children that have already post the same event */ |
---|
166 | |
---|
167 | |
---|
168 | int level ; /*!< Number of hierachical level for communication */ |
---|
169 | vector<int> parent ; /*!< Parent rank for each level */ |
---|
170 | vector<vector<int> > child ; /*!< List of child rank for each level */ |
---|
171 | vector<int> nbChild ; /*!< Number of child for each level */ |
---|
172 | |
---|
173 | } ; |
---|
174 | } |
---|
175 | |
---|
176 | #endif |
---|