#ifndef __GATHERER_CONNECTOR_HPP__ #define __GATHERER_CONNECTOR_HPP__ #include "xios_spl.hpp" #include "array_new.hpp" #include "distributed_view.hpp" #include "mpi.hpp" #include "local_view.hpp" #include "distributed_view.hpp" #include "context_client.hpp" namespace xios { class CGathererConnector { private: shared_ptr srcView_; shared_ptr dstView_; map> connector_ ; map> mask_ ; // mask is on src view int dstSize_ ; map srcSize_ ; public: CGathererConnector(shared_ptr srcView, shared_ptr dstView) : srcView_(srcView), dstView_(dstView) {} ; void computeConnector(void) ; template void transfer(int repeat, int sizeT, map>& dataIn, CArray& dataOut) { // for future, make a specific transfer function for sizeT=1 to avoid multiplication (increasing performance) size_t dstSlice = dstSize_*sizeT ; dataOut.resize(repeat* dstSlice) ; for(auto& data : dataIn) { T* output = dataOut.dataFirst() ; int rank=data.first ; auto input = data.second.dataFirst() ; auto& connector=connector_[rank] ; auto& mask=mask_[rank] ; int size=mask.size() ; size_t srcSlice = size * sizeT ; for(int l=0; l void transfer(int sizeT, map>& dataIn, CArray& dataOut) { transfer(1, sizeT, dataIn, dataOut) ; } template void transfer(map>& dataIn, CArray& dataOut) { transfer(1,dataIn,dataOut) ; } template void transfer(int rank, shared_ptr* connectors, int nConnectors, const T* input, T* output) { auto& connector = connector_[rank] ; // probably costly, find a better way to avoid the map auto& mask = mask_[rank] ; int srcSize = mask.size() ; if (nConnectors==0) { for(int i=0, j=0; igetSrcSliceSize(rank, connectors-1, nConnectors-1) ; int dstSliceSize = (*(connectors-1))->getDstSliceSize(connectors-1, nConnectors-1) ; const T* in = input ; for(int i=0,j=0;itransfer(rank, connectors-1, nConnectors-1, in, output+connector[j]*dstSliceSize) ; // the multiplication must be avoid in further optimization j++ ; } in += srcSliceSize ; } } } // hook for transfering mask in grid connector, maybe find an other way to doing that... void transfer_or(int rank, shared_ptr* connectors, int nConnectors, const bool* input, bool* output) { auto& connector = connector_[rank] ; // probably costly, find a better way to avoid the map auto& mask = mask_[rank] ; int srcSize = mask.size() ; if (nConnectors==0) { for(int i=0, j=0; igetSrcSliceSize(rank, connectors-1, nConnectors-1) ; int dstSliceSize = (*(connectors-1))->getDstSliceSize(connectors-1, nConnectors-1) ; const bool* in = input ; for(int i=0,j=0;itransfer_or(rank, connectors-1, nConnectors-1, in, output+connector[j]*dstSliceSize) ; // the multiplication must be avoid in further optimization j++ ; } in += srcSliceSize ; } } } template void transfer(map>& dataIn, CArray& dataOut, T missingValue) { transfer(1, 1, dataIn, dataOut, missingValue); } template void transfer(int sizeT, map>& dataIn, CArray& dataOut, T missingValue) { transfer(1, sizeT, dataIn, dataOut, missingValue) ; } template void transfer(int repeat , int sizeT, map>& dataIn, CArray& dataOut, T missingValue) { dataOut.resize(repeat*dstSize_*sizeT) ; dataOut=missingValue ; transfer(repeat, sizeT, dataIn, dataOut) ; } template void transfer(CEventServer& event, int sizeT, CArray& dataOut) { map> dataIn ; for (auto& subEvent : event.subEvents) { auto& data = dataIn[subEvent.rank]; (*subEvent.buffer) >> data ; } transfer(1, sizeT, dataIn, dataOut) ; } template void transfer(CEventServer& event, CArray& dataOut) { transfer(event, 1, dataOut) ; } template void transfer(CEventServer& event, int sizeT, CArray& dataOut, T missingValue) { map> dataIn ; for (auto& subEvent : event.subEvents) { auto& data = dataIn[subEvent.rank]; (*subEvent.buffer) >> data ; } transfer(1, sizeT, dataIn, dataOut, missingValue) ; } template void transfer(CEventServer& event, CArray& dataOut, T missingValue) { map> dataIn ; for (auto& subEvent : event.subEvents) { auto& data = dataIn[subEvent.rank]; (*subEvent.buffer) >> data ; } transfer(1, 1, dataIn, dataOut, missingValue) ; } int getSrcSliceSize(int rank, shared_ptr* connectors, int nConnectors) { if (nConnectors==0) return srcSize_[rank] ; else return srcSize_[rank] * (*(connectors-1))->getSrcSliceSize(rank, connectors-1,nConnectors-1) ; } int getDstSliceSize(shared_ptr* connectors, int nConnectors) { if (nConnectors==0) return dstSize_ ; else return dstSize_ * (*(connectors-1))->getDstSliceSize(connectors-1,nConnectors-1) ; } int getDstSize(void) {return dstSize_ ;} } ; } #endif