1 | #ifndef __GATHERER_CONNECTOR_HPP__ |
---|
2 | #define __GATHERER_CONNECTOR_HPP__ |
---|
3 | |
---|
4 | #include "xios_spl.hpp" |
---|
5 | #include "array_new.hpp" |
---|
6 | #include "distributed_view.hpp" |
---|
7 | #include "mpi.hpp" |
---|
8 | #include "local_view.hpp" |
---|
9 | #include "distributed_view.hpp" |
---|
10 | #include "context_client.hpp" |
---|
11 | |
---|
12 | |
---|
13 | namespace xios |
---|
14 | { |
---|
15 | |
---|
16 | class CGathererConnector |
---|
17 | { |
---|
18 | private: |
---|
19 | CDistributedView* srcView_; |
---|
20 | CLocalView* dstView_; |
---|
21 | map<int, vector<int>> connector_ ; |
---|
22 | map<int, vector<bool>> mask_ ; // mask is on src view |
---|
23 | int dstSize_ ; |
---|
24 | map<int,int> srcSize_ ; |
---|
25 | |
---|
26 | public: |
---|
27 | CGathererConnector(CDistributedView* srcView, CLocalView* dstView) : srcView_(srcView), dstView_(dstView) {} ; |
---|
28 | void computeConnector(void) ; |
---|
29 | |
---|
30 | template<typename T> |
---|
31 | void transfer(int repeat, int sizeT, map<int, CArray<T,1>>& dataIn, CArray<T,1>& dataOut) |
---|
32 | { |
---|
33 | // for future, make a specific transfer function for sizeT=1 to avoid multiplication (increasing performance) |
---|
34 | size_t dstSlice = dstSize_*sizeT ; |
---|
35 | dataOut.resize(repeat* dstSlice) ; |
---|
36 | |
---|
37 | for(auto& data : dataIn) |
---|
38 | { |
---|
39 | T* output = dataOut.dataFirst() ; |
---|
40 | int rank=data.first ; |
---|
41 | auto input = data.second.dataFirst() ; |
---|
42 | auto& connector=connector_[rank] ; |
---|
43 | auto& mask=mask_[rank] ; |
---|
44 | int size=mask.size() ; |
---|
45 | size_t srcSlice = size * sizeT ; |
---|
46 | for(int l=0; l<repeat; l++) |
---|
47 | { |
---|
48 | for(int i=0, j=0 ;i<size;i++) |
---|
49 | { |
---|
50 | if (mask[i]) |
---|
51 | { |
---|
52 | int cj = connector[j]*sizeT ; |
---|
53 | int ci = i*sizeT ; |
---|
54 | for (int k=0;k<sizeT;k++) output[cj+k] = input[ci+k] ; |
---|
55 | j++ ; |
---|
56 | } |
---|
57 | } |
---|
58 | input+=srcSlice ; |
---|
59 | output+=dstSlice ; |
---|
60 | } |
---|
61 | } |
---|
62 | } |
---|
63 | |
---|
64 | template<typename T> |
---|
65 | void transfer(int sizeT, map<int, CArray<T,1>>& dataIn, CArray<T,1>& dataOut) |
---|
66 | { |
---|
67 | transfer(1, sizeT, dataIn, dataOut) ; |
---|
68 | } |
---|
69 | |
---|
70 | template<typename T> |
---|
71 | void transfer(map<int, CArray<T,1>>& dataIn, CArray<T,1>& dataOut) |
---|
72 | { |
---|
73 | transfer(1,dataIn,dataOut) ; |
---|
74 | } |
---|
75 | |
---|
76 | template<typename T> |
---|
77 | void transfer(int rank, CGathererConnector** connectors, int nConnectors, const T* input, T* output) |
---|
78 | { |
---|
79 | auto& connector = connector_[rank] ; // probably costly, find a better way to avoid the map |
---|
80 | auto& mask = mask_[rank] ; |
---|
81 | int srcSize = mask.size() ; |
---|
82 | |
---|
83 | if (nConnectors==0) |
---|
84 | { |
---|
85 | for(int i=0, j=0; i<srcSize; i++) |
---|
86 | if (mask[i]) |
---|
87 | { |
---|
88 | *(output+connector[j]) = *(input + i) ; |
---|
89 | j++ ; |
---|
90 | } |
---|
91 | |
---|
92 | } |
---|
93 | else |
---|
94 | { |
---|
95 | int srcSliceSize = (*(connectors-1))->getSrcSliceSize(rank, connectors-1, nConnectors-1) ; |
---|
96 | int dstSliceSize = (*(connectors-1))->getDstSliceSize(connectors-1, nConnectors-1) ; |
---|
97 | |
---|
98 | const T* in = input ; |
---|
99 | for(int i=0,j=0;i<srcSize;i++) |
---|
100 | { |
---|
101 | if (mask[i]) |
---|
102 | { |
---|
103 | (*(connectors-1))->transfer(rank, connectors-1, nConnectors-1, in, output+connector[j]*dstSliceSize) ; // the multiplication must be avoid in further optimization |
---|
104 | j++ ; |
---|
105 | } |
---|
106 | in += srcSliceSize ; |
---|
107 | } |
---|
108 | } |
---|
109 | |
---|
110 | } |
---|
111 | |
---|
112 | // hook for transfering mask in grid connector, maybe find an other way to doing that... |
---|
113 | void transfer_or(int rank, CGathererConnector** connectors, int nConnectors, const bool* input, bool* output) |
---|
114 | { |
---|
115 | auto& connector = connector_[rank] ; // probably costly, find a better way to avoid the map |
---|
116 | auto& mask = mask_[rank] ; |
---|
117 | int srcSize = mask.size() ; |
---|
118 | |
---|
119 | if (nConnectors==0) |
---|
120 | { |
---|
121 | for(int i=0, j=0; i<srcSize; i++) |
---|
122 | if (mask[i]) |
---|
123 | { |
---|
124 | *(output+connector[j]) |= *(input + i) ; |
---|
125 | j++ ; |
---|
126 | } |
---|
127 | |
---|
128 | } |
---|
129 | else |
---|
130 | { |
---|
131 | int srcSliceSize = (*(connectors-1))->getSrcSliceSize(rank, connectors-1, nConnectors-1) ; |
---|
132 | int dstSliceSize = (*(connectors-1))->getDstSliceSize(connectors-1, nConnectors-1) ; |
---|
133 | |
---|
134 | const bool* in = input ; |
---|
135 | for(int i=0,j=0;i<srcSize;i++) |
---|
136 | { |
---|
137 | if (mask[i]) |
---|
138 | { |
---|
139 | (*(connectors-1))->transfer_or(rank, connectors-1, nConnectors-1, in, output+connector[j]*dstSliceSize) ; // the multiplication must be avoid in further optimization |
---|
140 | j++ ; |
---|
141 | } |
---|
142 | in += srcSliceSize ; |
---|
143 | } |
---|
144 | } |
---|
145 | |
---|
146 | } |
---|
147 | |
---|
148 | |
---|
149 | |
---|
150 | template<typename T> |
---|
151 | void transfer(map<int, CArray<T,1>>& dataIn, CArray<T,1>& dataOut, T missingValue) |
---|
152 | { |
---|
153 | transfer(1, 1, dataIn, dataOut, missingValue); |
---|
154 | } |
---|
155 | |
---|
156 | template<typename T> |
---|
157 | void transfer(int sizeT, map<int, CArray<T,1>>& dataIn, CArray<T,1>& dataOut, T missingValue) |
---|
158 | { |
---|
159 | transfer(1, sizeT, dataIn, dataOut, missingValue) ; |
---|
160 | } |
---|
161 | |
---|
162 | template<typename T> |
---|
163 | void transfer(int repeat , int sizeT, map<int, CArray<T,1>>& dataIn, CArray<T,1>& dataOut, T missingValue) |
---|
164 | { |
---|
165 | dataOut.resize(repeat*dstSize_*sizeT) ; |
---|
166 | dataOut=missingValue ; |
---|
167 | transfer(repeat, sizeT, dataIn, dataOut) ; |
---|
168 | } |
---|
169 | |
---|
170 | template<typename T> |
---|
171 | void transfer(CEventServer& event, int sizeT, CArray<T,1>& dataOut) |
---|
172 | { |
---|
173 | map<int, CArray<T,1>> dataIn ; |
---|
174 | for (auto& subEvent : event.subEvents) |
---|
175 | { |
---|
176 | auto& data = dataIn[subEvent.rank]; |
---|
177 | (*subEvent.buffer) >> data ; |
---|
178 | } |
---|
179 | transfer(1, sizeT, dataIn, dataOut) ; |
---|
180 | } |
---|
181 | |
---|
182 | template<typename T> |
---|
183 | void transfer(CEventServer& event, CArray<T,1>& dataOut) |
---|
184 | { |
---|
185 | transfer(event, 1, dataOut) ; |
---|
186 | } |
---|
187 | |
---|
188 | template<typename T> |
---|
189 | void transfer(CEventServer& event, int sizeT, CArray<T,1>& dataOut, T missingValue) |
---|
190 | { |
---|
191 | map<int, CArray<T,1>> dataIn ; |
---|
192 | for (auto& subEvent : event.subEvents) |
---|
193 | { |
---|
194 | auto& data = dataIn[subEvent.rank]; |
---|
195 | (*subEvent.buffer) >> data ; |
---|
196 | } |
---|
197 | transfer(1, sizeT, dataIn, dataOut, missingValue) ; |
---|
198 | } |
---|
199 | |
---|
200 | template<typename T> |
---|
201 | void transfer(CEventServer& event, CArray<T,1>& dataOut, T missingValue) |
---|
202 | { |
---|
203 | map<int, CArray<T,1>> dataIn ; |
---|
204 | for (auto& subEvent : event.subEvents) |
---|
205 | { |
---|
206 | auto& data = dataIn[subEvent.rank]; |
---|
207 | (*subEvent.buffer) >> data ; |
---|
208 | } |
---|
209 | transfer(1, 1, dataIn, dataOut, missingValue) ; |
---|
210 | } |
---|
211 | |
---|
212 | int getSrcSliceSize(int rank, CGathererConnector** connectors, int nConnectors) |
---|
213 | { if (nConnectors==0) return srcSize_[rank] ; else return srcSize_[rank] * (*(connectors-1))->getSrcSliceSize(rank, connectors-1,nConnectors-1) ; } |
---|
214 | |
---|
215 | int getDstSliceSize(CGathererConnector** connectors, int nConnectors) |
---|
216 | { if (nConnectors==0) return dstSize_ ; else return dstSize_ * (*(connectors-1))->getDstSliceSize(connectors-1,nConnectors-1) ; } |
---|
217 | |
---|
218 | int getDstSize(void) {return dstSize_ ;} |
---|
219 | } ; |
---|
220 | |
---|
221 | } |
---|
222 | |
---|
223 | #endif |
---|