Context Navigation

source: XIOS3/trunk/src/distribution/grid_remote_connector.cpp @ 2397

Last change on this file since 2397 was 2397, checked in by ymipsl, 22 months ago

Optimize remote connector computation in case of read (reverse way).
don't compute anymore clientFromServerConnector (and all intermediate computation) for non reading case.

YM

Property svn:eol-style set to native
Property svn:executable set to *

File size: 33.8 KB

Line
1	#include "grid_remote_connector.hpp"
2	#include "client_client_dht_template.hpp"
3	#include "leader_process.hpp"
4	#include "mpi.hpp"
5	#include "element.hpp"
6
7
8
9	namespace xios
10	{
11	/**
12	* \brief class constructor.
13	* \param srcView List of sources views.
14	* \param dstView List of remotes views.
15	* \param localComm Local MPI communicator
16	* \param remoteSize Size of the remote communicator
17	*/
18	CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, MPI_Comm localComm, int remoteSize)
19	: srcView_(srcView), dstView_(dstView), localComm_(localComm), remoteSize_(remoteSize)
20	{}
21
22	/**
23	* \brief class constructor.
24	* \param srcView List of sources views.
25	* \param dstView List of remotes views.
26	* \param localComm Local MPI communicator
27	* \param remoteSize Size of the remote communicator
28	*/
29	CGridRemoteConnector::CGridRemoteConnector(vector<shared_ptr<CLocalView>>& srcView, vector< shared_ptr<CLocalView> >& dstView, MPI_Comm localComm, int remoteSize)
30	: srcView_(srcView), localComm_(localComm), remoteSize_(remoteSize)
31	{
32	for(auto& it : dstView) dstView_.push_back((shared_ptr<CDistributedView>) it) ;
33	}
34
35
36	/**
37	* \brief Compute if each view composing the source grid and the remote grid is distributed or not.
38	* Result is stored on internal attributes \b isSrcViewDistributed_ and \b isDstViewDistributed_.
39	* \detail To compute this, a hash is computed for each array on indices. The hash must permutable, i.e.
40	* the order of the list of global indices doesn't influence the value of the hash. So simply a sum of
41	* hash of each indices is used for the whole array. After, the computed hash are compared with each other
42	* ranks of \b localComm_ MPI communicator using an MPI_ALLReduce. If, for each ranks, the hash is the same
43	* then the view is not distributed
44	*/
45	void CGridRemoteConnector::computeViewDistribution(void)
46	{
47	HashXIOS<size_t> hashGlobalIndex; // hash function-object
48
49	int nDst = dstView_.size() ;
50	vector<size_t> hashRank(remoteSize_) ;
51	vector<size_t> sizeRank(remoteSize_) ;
52	isDstViewDistributed_.resize(nDst) ;
53
54	for(int i=0; i<nDst; i++)
55	{
56	map<int,CArray<size_t,1>> globalIndexView ;
57	dstView_[i]->getGlobalIndexView(globalIndexView) ;
58	hashRank.assign(remoteSize_,0) ; // everybody ranks to 0 except rank of the remote view I have
59	// that would be assign to my local hash
60	sizeRank.assign(remoteSize_,0) ;
61
62	for(auto& it : globalIndexView)
63	{
64	int rank=it.first ;
65	CArray<size_t,1>& globalIndex = it.second ;
66	size_t globalIndexSize = globalIndex.numElements();
67	size_t hashValue=0 ;
68	for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
69	hashRank[rank] += hashValue ;
70	sizeRank[rank] += globalIndexSize ;
71	}
72	// sum all the hash for every process of the local comm. The reduce is on the size of remote view (remoteSize_)
73	// after that for each rank of the remote view, we get the hash
74	MPI_Allreduce(MPI_IN_PLACE, hashRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
75	MPI_Allreduce(MPI_IN_PLACE, sizeRank.data(), remoteSize_, MPI_SIZE_T, MPI_SUM, localComm_) ;
76	size_t value = hashRank[0] ;
77	size_t size = sizeRank[0] ;
78	isDstViewDistributed_[i]=false ;
79	for(int j=0 ; j<remoteSize_ ; j++)
80	if (size!=sizeRank[j] \|\| value != hashRank[j])
81	{
82	isDstViewDistributed_[i]=true ;
83	break ;
84	}
85	}
86
87	int nSrc = srcView_.size() ;
88	int commSize,commRank ;
89	MPI_Comm_size(localComm_,&commSize) ;
90	MPI_Comm_rank(localComm_,&commRank) ;
91	hashRank.resize(commSize,0) ;
92	isSrcViewDistributed_.resize(nSrc) ;
93
94	for(int i=0; i<nSrc; i++)
95	{
96	CArray<size_t,1> globalIndex ;
97	srcView_[i]->getGlobalIndexView(globalIndex) ;
98	hashRank.assign(commSize,0) ; // 0 for everybody except my rank
99	size_t globalIndexSize = globalIndex.numElements() ;
100
101	size_t allEqual ;
102	MPI_Allreduce(&globalIndexSize, &allEqual, 1, MPI_SIZE_T, MPI_BXOR, localComm_) ;
103	if (allEqual!=0)
104	{
105	isSrcViewDistributed_[i]=true ;
106	break ;
107	}
108
109	// warning : jenkins hash : 0 --> 0 : need to compare number of element for each ranks
110	size_t hashValue=0 ;
111	for(size_t ind=0;ind<globalIndexSize;ind++) hashValue += hashGlobalIndex(globalIndex(ind)) ;
112	MPI_Allreduce(&hashValue, &allEqual, 1, MPI_SIZE_T, MPI_BXOR, localComm_) ;
113	if (allEqual!=0) isSrcViewDistributed_[i]=true ;
114	else isSrcViewDistributed_[i]=false ;
115	}
116
117	}
118
119	/**
120	* \brief Compute the connector, i.e. compute the \b elements_ attribute.
121	* \detail Depending of the distributions of the view computed in the computeViewDistribution() call, the connector is computed in computeConnectorMethods(), and to achieve better optimisation
122	* some redondant ranks can be removed from the elements_ map.
123	*/
124	void CGridRemoteConnector::computeConnector(bool eliminateRedundant)
125	{
126	if (eliminateRedundant)
127	{
128	computeViewDistribution() ;
129	computeConnectorMethods() ;
130	computeRedondantRanks() ;
131	for(auto& rank : rankToRemove_)
132	for(auto& element : elements_) element.erase(rank) ;
133	}
134	else
135	{
136	computeViewDistribution() ;
137	computeConnectorRedundant() ;
138	}
139	}
140
141	/**
142	* \brief Compute the connector, i.e. compute the \b elements_ attribute.
143	* \detail In this routine we don't eliminate redundant cells as it it performed in
144	* computeConnectorMethods. It can be usefull to perform reduce operation over processes.
145	In future, some optimisation could be done considering full redondance of the
146	source view or the destination view.
147	*/
148	void CGridRemoteConnector::computeConnectorRedundant(void)
149	{
150	vector<shared_ptr<CLocalView>> srcView ;
151	vector<shared_ptr<CDistributedView>> dstView ;
152	vector<int> indElements ;
153	elements_.resize(srcView_.size()) ;
154
155	bool srcViewsNonDistributed=true ; // not usefull now but later for optimization
156	for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i] ;
157
158	bool dstViewsNonDistributed=true ; // not usefull now but later for optimization
159	for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
160
161	for(int i=0;i<srcView_.size();i++)
162	{
163	srcView.push_back(srcView_[i]) ;
164	dstView.push_back(dstView_[i]) ;
165	indElements.push_back(i) ;
166	}
167
168	computeGenericMethod(srcView, dstView, indElements) ;
169
170	map<int,bool> ranks ;
171	for(auto& it : elements_[indElements[0]])
172	{
173	if (it.second.numElements()==0) ranks[it.first] = false ;
174	else ranks[it.first] = true ;
175	}
176
177	}
178
179
180	/**
181	* \brief Compute the connector, i.e. compute the \b elements_ attribute.
182	* \detail In order to achive better optimisation,
183	* we distingute the case when the grid is not distributed on source grid (\bcomputeSrcNonDistributed),
184	* or the remote grid (\b computeDstNonDistributed), or the both (\b computeSrcDstNonDistributed).
185	* Otherwise the generic method is called computeGenericMethod. Note that in the case, if one element view
186	* is not distributed on the source and on the remote grid, then we can used the tensorial product
187	* property to computing it independently using \b computeSrcDstNonDistributed method.
188	* After that, we call the \b removeRedondantRanks method to supress blocks of data that can be sent
189	* redondantly the the remote servers
190	*/
191	void CGridRemoteConnector::computeConnectorMethods(bool reverse)
192	{
193	vector<shared_ptr<CLocalView>> srcView ;
194	vector<shared_ptr<CDistributedView>> dstView ;
195	vector<int> indElements ;
196	elements_.resize(srcView_.size()) ;
197
198	bool srcViewsNonDistributed=true ;
199	for(int i=0;i<srcView_.size();i++) srcViewsNonDistributed = srcViewsNonDistributed && !isSrcViewDistributed_[i] ;
200
201	bool dstViewsNonDistributed=true ;
202	for(int i=0;i<dstView_.size();i++) dstViewsNonDistributed = dstViewsNonDistributed && !isDstViewDistributed_[i] ;
203
204	//*****************************************************
205	if (srcViewsNonDistributed && dstViewsNonDistributed)
206	{
207	int commRank, commSize ;
208	MPI_Comm_rank(localComm_,&commRank) ;
209	MPI_Comm_size(localComm_,&commSize) ;
210
211	map<int,bool> ranks ;
212	if (reverse)
213	{
214	int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
215	ranks[leaderRank] = true ;
216	}
217	else
218	{
219	list<int> remoteRanks;
220	list<int> notUsed ;
221	computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
222	for(int rank : remoteRanks) ranks[rank]=true ;
223	}
224	for(int i=0; i<srcView_.size(); i++) computeSrcDstNonDistributed(i,ranks) ;
225	}
226
227	//*****************************************************
228	else if (srcViewsNonDistributed)
229	{
230	int commRank, commSize ;
231	MPI_Comm_rank(localComm_,&commRank) ;
232	MPI_Comm_size(localComm_,&commSize) ;
233	list<int> remoteRanks;
234	list<int> notUsed ;
235	map<int,bool> ranks ;
236
237	if (reverse)
238	{
239	shared_ptr<CLocalElement> voidElement = make_shared<CLocalElement>(commRank, 0, CArray<size_t,1>()) ;
240	shared_ptr<CLocalView> voidView = make_shared<CLocalView>(voidElement, CElementView::FULL, CArray<int,1>()) ;
241
242	for(int i=0;i<srcView_.size();i++)
243	if (isDstViewDistributed_[i])
244	{
245	if (commRank==0) srcView.push_back(srcView_[i]) ;
246	else srcView.push_back(make_shared<CLocalView>(make_shared<CLocalElement>(commRank, srcView_[i]->getGlobalSize(), CArray<size_t,1>()),
247	CElementView::FULL, CArray<int,1>())) ; // void view
248	dstView.push_back(dstView_[i]) ;
249	indElements.push_back(i) ;
250	}
251
252	computeGenericMethod(srcView, dstView, indElements) ;
253
254	for(int i=0;i<srcView_.size();i++)
255	if (isDstViewDistributed_[i])
256	{
257	size_t sizeElement ;
258	int nRank ;
259	if (commRank==0) nRank = elements_[i].size() ;
260	MPI_Bcast(&nRank, 1, MPI_INT, 0, localComm_) ;
261
262	auto it=elements_[i].begin() ;
263	for(int j=0;j<nRank;j++)
264	{
265	int rank ;
266	size_t sizeElement ;
267	if (commRank==0) { rank = it->first ; sizeElement=it->second.numElements(); }
268	MPI_Bcast(&rank, 1, MPI_INT, 0, localComm_) ;
269	MPI_Bcast(&sizeElement, 1, MPI_SIZE_T, 0, localComm_) ;
270	if (commRank!=0) elements_[i][rank].resize(sizeElement) ;
271	MPI_Bcast(elements_[i][rank].dataFirst(), sizeElement, MPI_SIZE_T, 0, localComm_) ;
272	if (commRank==0) ++it ;
273	}
274	}
275
276	for(auto& it : elements_[indElements[0]])
277	{
278	if (it.second.numElements()==0) ranks[it.first] = false ;
279	else ranks[it.first] = true ;
280	}
281
282	for(int i=0;i<srcView_.size();i++)
283	if (!isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
284
285	}
286	else
287	{
288	computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
289	for(int rank : remoteRanks) ranks[rank]=true ;
290
291	for(int i=0; i<srcView_.size(); i++)
292	{
293	if (isDstViewDistributed_[i]) computeSrcNonDistributed(i) ;
294	else computeSrcDstNonDistributed(i, ranks) ;
295	}
296	}
297
298	}
299	//*****************************************************
300	else if (dstViewsNonDistributed)
301	{
302	int commRank, commSize ;
303	MPI_Comm_rank(localComm_,&commRank) ;
304	MPI_Comm_size(localComm_,&commSize) ;
305
306	map<int,bool> ranks ;
307	if (reverse)
308	{
309	int leaderRank=getLeaderRank(remoteSize_, commSize, commRank) ;
310	ranks[leaderRank] = true ;
311	}
312	else for(int i=0;i<remoteSize_;i++) ranks[i]=true ;
313
314	for(int i=0; i<srcView_.size(); i++)
315	{
316	if (isSrcViewDistributed_[i]) computeDstNonDistributed(i,ranks) ;
317	else computeSrcDstNonDistributed(i,ranks) ;
318	}
319	}
320	//*****************************************************
321	else
322	{
323	for(int i=0;i<srcView_.size();i++)
324	if (isSrcViewDistributed_[i] \|\| isDstViewDistributed_[i])
325	{
326	srcView.push_back(srcView_[i]) ;
327	dstView.push_back(dstView_[i]) ;
328	indElements.push_back(i) ;
329	}
330
331	computeGenericMethod(srcView, dstView, indElements) ;
332
333	map<int,bool> ranks ;
334	for(auto& it : elements_[indElements[0]])
335	{
336	if (it.second.numElements()==0) ranks[it.first] = false ;
337	else ranks[it.first] = true ;
338	}
339
340	for(int i=0;i<srcView_.size();i++)
341	if (!isSrcViewDistributed_[i] && !isDstViewDistributed_[i]) computeSrcDstNonDistributed(i, ranks) ;
342	}
343
344	}
345
346
347	/**
348	* \brief Compute the connector for the element \b i when the source view is not distributed.
349	* After the call element_[i] is defined.
350	* \param i Indice of the element composing the source grid.
351	*/
352
353	void CGridRemoteConnector::computeSrcNonDistributed(int i)
354	{
355	auto& element = elements_[i] ;
356	map<int,CArray<size_t,1>> globalIndexView ;
357	dstView_[i]->getGlobalIndexView(globalIndexView) ;
358
359	CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
360
361	for(auto& it : globalIndexView)
362	{
363	auto& globalIndex=it.second ;
364	for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
365	}
366
367	// First we feed the distributed hash map with key (remote global index)
368	// associated with the value of the remote rank
369	CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
370	// after we feed the DHT with the local global indices of the source view
371
372	int commRank, commSize ;
373	MPI_Comm_rank(localComm_,&commRank) ;
374	MPI_Comm_size(localComm_,&commSize) ;
375	CArray<size_t,1> srcIndex ;
376	// like the source view is not distributed, then only the rank 0 need to feed the DHT
377	if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
378
379	// compute the mapping
380	DHT.computeIndexInfoMapping(srcIndex) ;
381	auto& returnInfo = DHT.getInfoIndexMap() ;
382
383	// returnInfo contains now the map for each global indices to send to a list of remote rank
384	// only for the rank=0 because it is the one to feed the DHT
385	// so it need to send the list to each server leader i.e. the local process that handle specifically one or more
386	// servers
387
388	// rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
389	vector<vector<size_t>> rankIndGlo(remoteSize_) ;
390	if (commRank==0)
391	for(auto& it1 : returnInfo)
392	for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
393
394
395	vector<MPI_Request> requests ;
396
397	if (commRank==0)
398	{
399	requests.resize(remoteSize_) ;
400	for(int i=0 ; i<remoteSize_;i++)
401	{
402	// ok send only the global indices for a server to the "server leader"
403	int rank = getLeaderRank(commSize, remoteSize_, i) ;
404	MPI_Isend(rankIndGlo[i].data(), rankIndGlo[i].size(), MPI_SIZE_T, rank, i ,localComm_, &requests[i]) ;
405	}
406	}
407
408	list<int> remoteRanks;
409	list<int> notUsed ;
410	// I am a server leader of which remote ranks ?
411	computeLeaderProcess(commRank, commSize, remoteSize_, remoteRanks, notUsed) ;
412
413	for(auto remoteRank : remoteRanks)
414	{
415	MPI_Status status ;
416	int size ;
417	MPI_Probe(0,remoteRank,localComm_, &status);
418	MPI_Get_count(&status, MPI_SIZE_T, &size) ;
419	elements_[i][remoteRank].resize(size) ;
420	// for each remote ranks receive the global indices from proc 0
421	MPI_Recv(elements_[i][remoteRank].dataFirst(),size, MPI_SIZE_T,0,remoteRank, localComm_,&status) ;
422	}
423
424	if (commRank==0)
425	{
426	vector<MPI_Status> status(remoteSize_) ;
427	// asynchronous for sender, wait for completion
428	MPI_Waitall(remoteSize_, requests.data(), status.data()) ;
429	}
430	}
431
432	/**
433	* \brief Compute the connector for the element \b i when the source view is not distributed.
434	* After the call element_[i] is defined.
435	* \param i Indice of the element composing the source grid.
436	*/
437
438	void CGridRemoteConnector::computeSrcNonDistributedReverse(int i)
439	{
440	auto& element = elements_[i] ;
441	map<int,CArray<size_t,1>> globalIndexView ;
442	dstView_[i]->getGlobalIndexView(globalIndexView) ;
443
444	CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
445
446	for(auto& it : globalIndexView)
447	{
448	auto& globalIndex=it.second ;
449	for(size_t ind : globalIndex) dataInfo[ind]=it.first ;
450	}
451
452	// First we feed the distributed hash map with key (remote global index)
453	// associated with the value of the remote rank
454	CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
455	// after we feed the DHT with the local global indices of the source view
456
457	int commRank, commSize ;
458	MPI_Comm_rank(localComm_,&commRank) ;
459	MPI_Comm_size(localComm_,&commSize) ;
460	CArray<size_t,1> srcIndex ;
461	// like the source view is not distributed, then only the rank 0 need to feed the DHT
462	if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
463
464	// compute the mapping
465	DHT.computeIndexInfoMapping(srcIndex) ;
466	auto& returnInfo = DHT.getInfoIndexMap() ;
467
468	// returnInfo contains now the map for each global indices to send to a list of remote rank
469	// only for the rank=0 because it is the one to feed the DHT
470	// so it need to send the list to each server leader i.e. the local process that handle specifically one or more
471	// servers
472
473	// rankIndGlo : rankIndGlo[rank][indGlo] : list of indice to send the the remote server of rank "rank"
474	vector<vector<size_t>> rankIndGlo(remoteSize_) ;
475	if (commRank==0)
476	for(auto& it1 : returnInfo)
477	for(auto& it2 : it1.second) rankIndGlo[it2].push_back(it1.first) ;
478
479	// bcast the same for each client
480	for(int remoteRank=0 ; remoteRank<remoteSize_ ; remoteRank++)
481	{
482	int remoteDataSize ;
483	if (commRank==0) remoteDataSize = rankIndGlo[remoteRank].size() ;
484	MPI_Bcast(&remoteDataSize, 1, MPI_INT, 0, localComm_) ;
485
486	auto& element = elements_[i][remoteRank] ;
487	element.resize(remoteDataSize) ;
488	if (commRank==0) for(int j=0 ; j<remoteDataSize; j++) element(j)=rankIndGlo[remoteRank][j] ;
489	MPI_Bcast(element.dataFirst(), remoteDataSize, MPI_SIZE_T, 0, localComm_) ;
490	}
491	}
492
493
494
495	/**
496	* \brief Compute the remote connector for the element \b i when the remote view is not distributed.
497	* After the call, element_[i] is defined.
498	* \param i Indice of the element composing the remote grid.
499	* \param ranks The list of rank for which the local proc is in charge to compute the connector
500	* (if leader server for exemple). if ranks[rank] == false the corresponding elements_
501	* is set to void array (no data to sent) just in order to notify corresponding remote server
502	* that the call is collective with each other one
503	*/
504	void CGridRemoteConnector::computeDstNonDistributed(int i, map<int,bool>& ranks)
505	{
506	auto& element = elements_[i] ;
507	map<int,CArray<size_t,1>> globalIndexView ;
508	dstView_[i]->getGlobalIndexView(globalIndexView) ;
509
510
511	CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
512
513	// First we feed the distributed hash map with key (remote global index)
514	// associated with the value of the remote rank
515	for(auto& it : globalIndexView)
516	if (it.first==0) // since the remote view is not distributed, insert only the remote rank 0
517	{
518	auto& globalIndex=it.second ;
519	for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
520	}
521
522	CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
523	// after we feed the DHT with the local global indices of the source view
524
525	CArray<size_t,1> srcIndex ;
526	srcView_[i]->getGlobalIndexView(srcIndex) ;
527	DHT.computeIndexInfoMapping(srcIndex) ;
528	auto& returnInfo = DHT.getInfoIndexMap() ;
529
530	// returnInfo contains now the map for each global indices to send to a list of remote rank
531	// now construct the element_ list of global indices for each rank in my list except if the erray must be empty
532	for (auto& rank : ranks)
533	{
534	if (rank.second) // non empty array => for rank that have not any data to be received
535	{
536	int size=0 ;
537	for(auto& it : returnInfo) if (!it.second.empty()) size++ ;
538	auto& array = element[rank.first] ;
539	array.resize(size) ;
540	size=0 ;
541	for(auto& it : returnInfo)
542	if (!it.second.empty())
543	{
544	array(size)=it.first ;
545	size++ ;
546	}
547	}
548	else element[rank.first] = CArray<size_t,1>(0) ; // empty array => for rank that have not any data to be received
549	}
550	}
551
552	/**
553	* \brief Compute the remote connector for the element \b i when the source and the remote view are not distributed.
554	* After the call, element_[i] is defined.
555	* \param i Indice of the element composing the remote grid.
556	* \param ranks The list of rank for which the local proc is in charge to compute the connector
557	* (if leader server for exemple). if ranks[rank] == false the corresponding elements_
558	* is set to void array (no data to sent) just in order to notify corresponding remote server
559	* that the call is collective with each other one
560	*/
561
562	void CGridRemoteConnector::computeSrcDstNonDistributed(int i, map<int,bool>& ranks)
563	{
564	auto& element = elements_[i] ;
565	map<int,CArray<size_t,1>> globalIndexView ;
566	dstView_[i]->getGlobalIndexView(globalIndexView) ;
567
568
569	CClientClientDHTTemplate<int>::Index2InfoTypeMap dataInfo;
570	// First we feed the distributed hash map with key (remote global index)
571	// associated with the value of the remote rank
572
573	for(auto& it : globalIndexView)
574	if (it.first==0) // insert only the remote rank 0 since the remote view is not distributed
575	{
576	auto& globalIndex=it.second ;
577	for(size_t ind : globalIndex) dataInfo[ind]=0 ; // associated the the rank 0
578	}
579
580	CClientClientDHTTemplate<int> DHT(dataInfo, localComm_) ;
581	// after we feed the DHT with the local global indices of the source view
582
583	int commRank, commSize ;
584	MPI_Comm_rank(localComm_,&commRank) ;
585	MPI_Comm_size(localComm_,&commSize) ;
586	CArray<size_t,1> srcIndex ;
587
588	// like the source view is not distributed, then only the rank 0 need to feed the DHT
589	if (commRank==0) srcView_[i]->getGlobalIndexView(srcIndex) ;
590	DHT.computeIndexInfoMapping(srcIndex) ;
591	auto& returnInfo = DHT.getInfoIndexMap() ;
592
593	vector<size_t> indGlo ;
594	if (commRank==0)
595	for(auto& it1 : returnInfo)
596	for(auto& it2 : it1.second) indGlo.push_back(it1.first) ;
597
598	// now local rank 0 know which indices to send to remote rank 0, but all the server
599	// must receive the same information. So only the leader rank will sent this.
600	// So local rank 0 must broadcast the information to all leader.
601	// for this we create a new communicator composed of local process that must send data
602	// to a remote rank, data are broadcasted, and element_[i] is construction for each remote
603	// rank in charge
604	int color=0 ;
605	if (ranks.empty()) color=0 ;
606	else color=1 ;
607	if (commRank==0) color=1 ;
608	MPI_Comm newComm ;
609	MPI_Comm_split(localComm_, color, commRank, &newComm) ;
610	if (color==1)
611	{
612	// ok, I am part of the process that must send something to one or more remote server
613	// so I get the list of global indices from rank 0
614	int dataSize ;
615	if (commRank==0) dataSize=indGlo.size() ;
616	MPI_Bcast(&dataSize,1,MPI_INT, 0, newComm) ;
617	indGlo.resize(dataSize) ;
618	MPI_Bcast(indGlo.data(),dataSize,MPI_SIZE_T,0,newComm) ;
619	}
620	MPI_Comm_free(&newComm) ;
621
622	// construct element_[i] from indGlo
623	for(auto& rank : ranks)
624	{
625	if (rank.second)
626	{
627	int dataSize=indGlo.size();
628	auto& element = elements_[i][rank.first] ;
629	element.resize(dataSize) ;
630	for(int i=0;i<dataSize; i++) element(i)=indGlo[i] ;
631	}
632	else element[rank.first] = CArray<size_t,1>(0) ;
633	}
634
635	}
636
637
638	/**
639	* \brief Generic method the compute the grid remote connector. Only distributed elements are specifed in the source view and remote view.
640	* Connector for non distributed elements are computed separatly to improve performance and memory consumption. After the call,
641	* \b elements_ is defined.
642	* \param srcView List of the source views composing the grid, without non distributed views
643	* \param dstView List of the remote views composing the grid, without non distributed views
644	* \param indElements Index of the view making the correspondance between all views and views distributed (that are in input)
645	*/
646	void CGridRemoteConnector::computeGenericMethod(vector<shared_ptr<CLocalView>>& srcView, vector<shared_ptr<CDistributedView>>& dstView, vector<int>& indElements)
647	{
648	// generic method, every element can be distributed
649	int nDst = dstView.size() ;
650	vector<size_t> dstSliceSize(nDst) ;
651	dstSliceSize[0] = 1 ;
652	for(int i=1; i<nDst; i++) dstSliceSize[i] = dstView[i-1]->getGlobalSize()*dstSliceSize[i-1] ;
653
654	CClientClientDHTTemplate<int>::Index2VectorInfoTypeMap dataInfo ;
655	CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap info ; // info map
656
657	// first, we need to feed the DHT with the global index of the remote server
658	// for that :
659	// First the first element insert the in a DHT with key as the rank and value the list of global index associated
660	// Then get the previously stored index associate with the remote rank I am in charge and reinsert the global index
661	// corresponding to the position of the element in the remote view suing tensorial product
662	// finaly we get only the list of remote global index I am in charge for the whole remote grid
663
664	for(int pos=0; pos<nDst; pos++)
665	{
666	size_t sliceSize=dstSliceSize[pos] ;
667	map<int,CArray<size_t,1>> globalIndexView ;
668	dstView[pos]->getGlobalIndexView(globalIndexView) ;
669
670	CClientClientDHTTemplate<size_t>::Index2VectorInfoTypeMap lastInfo(info) ;
671
672	if (pos>0)
673	{
674	CArray<size_t,1> ranks(globalIndexView.size()) ;
675	auto it=globalIndexView.begin() ;
676	for(int i=0 ; i<ranks.numElements();i++,it++) ranks(i)=it->first ;
677	CClientClientDHTTemplate<size_t> dataRanks(info, localComm_) ;
678	dataRanks.computeIndexInfoMapping(ranks) ;
679	lastInfo = dataRanks.getInfoIndexMap() ;
680	}
681
682	info.clear() ;
683	for(auto& it : globalIndexView)
684	{
685	int rank = it.first ;
686	auto& globalIndex = it.second ;
687	auto& inf = info[rank] ;
688	if (pos==0) for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)) ;
689	else
690	{
691	auto& lastGlobalIndex = lastInfo[rank] ;
692	for(size_t lastGlobalInd : lastGlobalIndex)
693	{
694	for(int i=0;i<globalIndex.numElements();i++) inf.push_back(globalIndex(i)*sliceSize+lastGlobalInd) ;
695	}
696	}
697	}
698
699	if (pos==nDst-1)
700	{
701	for(auto& it : info)
702	{
703	int rank=it.first ;
704	auto& globalIndex = it.second ;
705	for(auto globalInd : globalIndex) dataInfo[globalInd].push_back(rank) ;
706	}
707	}
708	}
709
710	// we feed the DHT with the remote global index
711	CClientClientDHTTemplate<int> dataRanks(dataInfo, localComm_) ;
712
713	// generate list of global index for src view
714	int nSrc = srcView.size() ;
715	vector<size_t> srcSliceSize(nSrc) ;
716
717	srcSliceSize[0] = 1 ;
718	for(int i=1; i<nSrc; i++) srcSliceSize[i] = srcView[i-1]->getGlobalSize()*srcSliceSize[i-1] ;
719
720	vector<size_t> srcGlobalIndex ;
721	size_t sliceIndex=0 ;
722	srcView[nSrc-1]->getGlobalIndex(srcGlobalIndex, sliceIndex, srcSliceSize.data(), srcView.data(), nSrc-1) ;
723	// now we have the global index of the source grid in srcGlobalIndex
724	// we feed the DHT with the src global index (if we have)
725	if (srcGlobalIndex.size()>0)
726	{
727	CArray<size_t,1> srcGlobalIndexArray(srcGlobalIndex.data(), shape(srcGlobalIndex.size()),neverDeleteData) ;
728	dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
729	}
730	else
731	{
732	CArray<size_t,1> srcGlobalIndexArray ;
733	dataRanks.computeIndexInfoMapping(srcGlobalIndexArray) ;
734	}
735	const auto& returnInfo = dataRanks.getInfoIndexMap() ;
736	// returnInfo contains now the map for each global indices to send to a list of remote rank
737	// but we want to use the tensorial product property to get the same information using only global
738	// index of element view. So the idea is to reverse the information : for a global index of the grid
739	// to send to the remote server, what is the global index of each element composing the grid ?
740
741	vector<map<int, set<size_t>>> elements(nSrc) ; // internal representation of elements composing the grid
742
743	for(auto& indRanks : returnInfo)
744	{
745	size_t gridIndexGlo=indRanks.first ;
746	auto& ranks = indRanks.second ;
747	for(int i=nSrc-1; i>=0; i--)
748	{
749	auto& element = elements[i] ;
750	size_t localIndGlo = gridIndexGlo / srcSliceSize[i] ;
751	gridIndexGlo = gridIndexGlo % srcSliceSize[i] ;
752	for(int rank : ranks) element[rank].insert(localIndGlo) ;
753	}
754	}
755
756	// elements_.resize(nSrc) ;
757	for(int i=0 ; i<nSrc; i++)
758	{
759	auto& element=elements[i] ;
760	for(auto& rankInd : element)
761	{
762	int rank=rankInd.first ;
763	set<size_t>& indGlo = rankInd.second ;
764	CArray<size_t,1>& indGloArray = elements_[indElements[i]][rank] ;
765	indGloArray.resize(indGlo.size()) ;
766	int j=0 ;
767	for (auto index : indGlo) { indGloArray(j) = index ; j++; }
768	}
769	}
770
771	// So what about when there is some server that have no data to receive
772	// they must be inform they receive an event with no data.
773	// So find remote servers with no data, and one client will take in charge
774	// that it receive global index with no data (0-size)
775	vector<int> ranks(remoteSize_,0) ;
776	for(auto& it : elements_[indElements[0]]) ranks[it.first] = 1 ;
777	MPI_Allreduce(MPI_IN_PLACE, ranks.data(), remoteSize_, MPI_INT, MPI_SUM, localComm_) ;
778	int commRank, commSize ;
779	MPI_Comm_rank(localComm_,&commRank) ;
780	MPI_Comm_size(localComm_,&commSize) ;
781	int pos=0 ;
782	for(int i=0; i<remoteSize_ ; i++)
783	if (ranks[i]==0)
784	{
785	if (pos%commSize==commRank)
786	for(int j=0 ; j<nSrc; j++) elements_[indElements[j]][i] = CArray<size_t,1>(0) ;
787	pos++ ;
788	}
789	}
790
791	/**
792	* \brief Once the connector is computed (compute \b elements_), redondant data can be avoid to be sent to the server.
793	* This call compute the redondant rank and store them in \b rankToRemove_ attribute.
794	* The goal of this method is to make a hash of each block of indice that determine wich data to send to a
795	* of a specific server rank using a hash method. So data to send to a rank is associated to a hash.
796	* After we compare hash between local rank and remove redondant data corresponding to the same hash.
797	*/
798	void CGridRemoteConnector::computeRedondantRanks(bool reverse)
799	{
800	int commRank ;
801	MPI_Comm_rank(localComm_,&commRank) ;
802
803	set<int> ranks;
804	for(auto& element : elements_)
805	for(auto& it : element) ranks.insert(it.first) ;
806
807	for(auto& element : elements_)
808	for(auto& it : element)
809	if (ranks.count(it.first)==0) ERROR("void CGridRemoteConnector::removeRedondantRanks(void)",<<"number of ranks in elements is not coherent between each element") ;
810
811	HashXIOS<size_t> hashGlobalIndex;
812
813	map<int,size_t> hashRanks ;
814	for(auto& element : elements_)
815	for(auto& it : element)
816	{
817	auto& globalIndex=it.second ;
818	int rank=it.first ;
819	size_t hash ;
820	hash=0 ;
821	for(int i=0; i<globalIndex.numElements(); i++) hash+=hashGlobalIndex(globalIndex(i)) ;
822	if (globalIndex.numElements()>0)
823	{
824	if (hashRanks.count(rank)==0) hashRanks[rank]=hash ;
825	else hashRanks[rank]=hashGlobalIndex.hashCombine(hashRanks[rank],hash) ;
826	}
827	}
828
829	if (reverse)
830	{
831	set<size_t> hashs ;
832	//easy because local
833	for(auto& hashRank : hashRanks)
834	{
835	if (hashs.count(hashRank.second)==0) hashs.insert(hashRank.second) ;
836	else rankToRemove_.insert(hashRank.first) ;
837	}
838
839	}
840	else
841	{
842	// a hash is now computed for data block I will sent to the server.
843
844	CClientClientDHTTemplate<int>::Index2InfoTypeMap info ;
845
846	map<size_t,int> hashRank ;
847	HashXIOS<int> hashGlobalIndexRank;
848	for(auto& it : hashRanks)
849	{
850	it.second = hashGlobalIndexRank.hashCombine(it.first,it.second) ;
851	info[it.second]=commRank ;
852	hashRank[it.second]=it.first ;
853	}
854
855	// we feed a DHT map with key : hash, value : myrank
856	CClientClientDHTTemplate<int> dataHash(info, localComm_) ;
857	CArray<size_t,1> hashList(hashRank.size()) ;
858
859	int i=0 ;
860	for(auto& it : hashRank) { hashList(i)=it.first ; i++; }
861
862	// now who are the ranks that have the same hash : feed the DHT with my list of hash
863	dataHash.computeIndexInfoMapping(hashList) ;
864	auto& hashRankList = dataHash.getInfoIndexMap() ;
865
866
867	for(auto& it : hashRankList)
868	{
869	size_t hash = it.first ;
870	auto& ranks = it.second ;
871
872	bool first=true ;
873	// only the process with the lowest rank get in charge of sendinf data to remote server
874	for(int rank : ranks) if (commRank>rank) first=false ;
875	if (!first) rankToRemove_.insert(hashRank[hash]) ;
876	}
877	}
878	}
879
880	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: