#ifndef __GATHERER_CONNECTOR_HPP__ #define __GATHERER_CONNECTOR_HPP__ #include "xios_spl.hpp" #include "array_new.hpp" #include "distributed_view.hpp" #include "mpi.hpp" #include "local_view.hpp" #include "distributed_view.hpp" #include "context_client.hpp" #include "reduction_types.hpp" namespace xios { class CGathererConnector { private: shared_ptr srcView_; shared_ptr dstView_; map> connector_ ; map> mask_ ; // mask is on src view int dstSize_ ; map srcSize_ ; public: CGathererConnector(shared_ptr srcView, shared_ptr dstView) : srcView_(srcView), dstView_(dstView) {} ; void computeConnector(void) ; template void transfer(int repeat, int sizeT, map>& dataIn, CArray& dataOut, EReduction op = EReduction::none) { // for future, make a specific transfer function for sizeT=1 to avoid multiplication (increasing performance) size_t dstSlice = dstSize_*sizeT ; dataOut.resize(repeat* dstSlice) ; if (op == EReduction::none) // tranfer without reduction { for(auto& data : dataIn) { T* output = dataOut.dataFirst() ; int rank=data.first ; auto input = data.second.dataFirst() ; auto& connector=connector_[rank] ; auto& mask=mask_[rank] ; int size=mask.size() ; size_t srcSlice = size * sizeT ; for(int l=0; l vcount(dataOut.size(),0) ; int* count = vcount.data() ; T defaultValue = std::numeric_limits::quiet_NaN(); for(auto& data : dataIn) { T* output = dataOut.dataFirst() ; int rank=data.first ; auto input = data.second.dataFirst() ; auto& connector=connector_[rank] ; auto& mask=mask_[rank] ; int size=mask.size() ; size_t srcSlice = size * sizeT ; for(int l=0; l0) dataOut[i]/=count[i] ; else dataOut[i] = defaultValue ; } else for(int i=0; i < dataOut.size() ; i++) if (count[i]==0) dataOut[i] = defaultValue ; } } template void transfer(int sizeT, map>& dataIn, CArray& dataOut, EReduction op = EReduction::none) { transfer(1, sizeT, dataIn, dataOut, op) ; } template void transfer(map>& dataIn, CArray& dataOut, EReduction op = EReduction::none) { transfer(1,dataIn,dataOut, op) ; } template void transfer(int rank, shared_ptr* connectors, int nConnectors, const T* input, T* output, EReduction op = EReduction::none, int* count=nullptr) { auto& connector = connector_[rank] ; // probably costly, find a better way to avoid the map auto& mask = mask_[rank] ; int srcSize = mask.size() ; if (nConnectors==0) { if (op == EReduction::none) { for(int i=0, j=0; igetSrcSliceSize(rank, connectors-1, nConnectors-1) ; int dstSliceSize = (*(connectors-1))->getDstSliceSize(connectors-1, nConnectors-1) ; const T* in = input ; for(int i=0,j=0;itransfer(rank, connectors-1, nConnectors-1, in, output+connector[j]*dstSliceSize, op, count+connector[j]*dstSliceSize) ; // the multiplication must be avoid in further optimization j++ ; } in += srcSliceSize ; } } } // hook for transfering mask in grid connector, maybe find an other way to doing that... void transfer_or(int rank, shared_ptr* connectors, int nConnectors, const bool* input, bool* output) { auto& connector = connector_[rank] ; // probably costly, find a better way to avoid the map auto& mask = mask_[rank] ; int srcSize = mask.size() ; if (nConnectors==0) { for(int i=0, j=0; igetSrcSliceSize(rank, connectors-1, nConnectors-1) ; int dstSliceSize = (*(connectors-1))->getDstSliceSize(connectors-1, nConnectors-1) ; const bool* in = input ; for(int i=0,j=0;itransfer_or(rank, connectors-1, nConnectors-1, in, output+connector[j]*dstSliceSize) ; // the multiplication must be avoid in further optimization j++ ; } in += srcSliceSize ; } } } template void transfer(map>& dataIn, CArray& dataOut, T missingValue, EReduction op = EReduction::none) { transfer(1, 1, dataIn, dataOut, missingValue, op); } template void transfer(int sizeT, map>& dataIn, CArray& dataOut, T missingValue, EReduction op = EReduction::none) { transfer(1, sizeT, dataIn, dataOut, missingValue, op) ; } template void transfer(int repeat , int sizeT, map>& dataIn, CArray& dataOut, T missingValue, EReduction op = EReduction::none) { dataOut.resize(repeat*dstSize_*sizeT) ; dataOut=missingValue ; transfer(repeat, sizeT, dataIn, dataOut, op) ; } template void transfer(CEventServer& event, int sizeT, CArray& dataOut, EReduction op = EReduction::none) { map> dataIn ; for (auto& subEvent : event.subEvents) { auto& data = dataIn[subEvent.rank]; (*subEvent.buffer) >> data ; } transfer(1, sizeT, dataIn, dataOut, op) ; } template void transfer(CEventServer& event, CArray& dataOut, EReduction op = EReduction::none) { transfer(event, 1, dataOut, op) ; } template void transfer(CEventServer& event, int sizeT, CArray& dataOut, T missingValue, EReduction op = EReduction::none) { map> dataIn ; for (auto& subEvent : event.subEvents) { auto& data = dataIn[subEvent.rank]; (*subEvent.buffer) >> data ; } transfer(1, sizeT, dataIn, dataOut, missingValue, op) ; } template void transfer(CEventServer& event, CArray& dataOut, T missingValue, EReduction op = EReduction::none) { map> dataIn ; for (auto& subEvent : event.subEvents) { auto& data = dataIn[subEvent.rank]; (*subEvent.buffer) >> data ; } transfer(1, 1, dataIn, dataOut, missingValue, op) ; } int getSrcSliceSize(int rank, shared_ptr* connectors, int nConnectors) { if (nConnectors==0) return srcSize_[rank] ; else return srcSize_[rank] * (*(connectors-1))->getSrcSliceSize(rank, connectors-1,nConnectors-1) ; } int getDstSliceSize(shared_ptr* connectors, int nConnectors) { if (nConnectors==0) return dstSize_ ; else return dstSize_ * (*(connectors-1))->getDstSliceSize(connectors-1,nConnectors-1) ; } int getDstSize(void) {return dstSize_ ;} } ; } #endif