MODULE transfert_mpi_legacy_mod USE genmod USE field_mod IMPLICIT NONE TYPE array INTEGER,POINTER :: value(:)=>null() INTEGER,POINTER :: sign(:)=>null() INTEGER :: domain INTEGER :: rank INTEGER :: tag INTEGER :: size INTEGER :: offset INTEGER :: ireq INTEGER,POINTER :: buffer(:)=>null() REAL,POINTER :: buffer_r(:)=>null() INTEGER,POINTER :: src_value(:)=>null() END TYPE array TYPE t_buffer REAL,POINTER :: r(:) INTEGER :: size INTEGER :: rank END TYPE t_buffer TYPE t_request INTEGER :: type_field INTEGER :: max_size INTEGER :: size LOGICAL :: vector INTEGER,POINTER :: src_domain(:) INTEGER,POINTER :: src_i(:) INTEGER,POINTER :: src_j(:) INTEGER,POINTER :: src_ind(:) INTEGER,POINTER :: target_ind(:) INTEGER,POINTER :: target_i(:) INTEGER,POINTER :: target_j(:) INTEGER,POINTER :: target_sign(:) INTEGER :: nrecv TYPE(ARRAY),POINTER :: recv(:) INTEGER :: nsend INTEGER :: nreq_mpi INTEGER :: nreq_send INTEGER :: nreq_recv TYPE(ARRAY),POINTER :: send(:) END TYPE t_request TYPE(t_request),SAVE,POINTER :: req_i1(:) TYPE(t_request),SAVE,POINTER :: req_e1_scal(:) TYPE(t_request),SAVE,POINTER :: req_e1_vect(:) TYPE(t_request),SAVE,POINTER :: req_z1_scal(:) TYPE(t_request),SAVE,POINTER :: req_i0(:) TYPE(t_request),SAVE,POINTER :: req_e0_scal(:) TYPE(t_request),SAVE,POINTER :: req_e0_vect(:) TYPE t_reorder INTEGER :: ind INTEGER :: rank INTEGER :: tag INTEGER :: isend END TYPE t_reorder TYPE t_message CHARACTER(LEN=100) :: name ! for debug TYPE(t_request), POINTER :: request(:) INTEGER :: nreq INTEGER :: nreq_send INTEGER :: nreq_recv TYPE(t_reorder), POINTER :: reorder(:) INTEGER, POINTER :: mpi_req(:) INTEGER, POINTER :: status(:,:) TYPE(t_buffer),POINTER :: buffers(:) TYPE(t_field),POINTER :: field(:) LOGICAL :: completed LOGICAL :: pending LOGICAL :: open ! for debug INTEGER :: number LOGICAL :: ondevice=.false. !request(ind) d=>domain(ind) IF (type_field==field_t) THEN Max_size=2*(d%iim+2)+2*(d%jjm+2) ELSE IF (type_field==field_u) THEN Max_size=3*(2*(d%iim+2)+2*(d%jjm+2)) ELSE IF (type_field==field_z) THEN Max_size=2*(2*(d%iim+2)+2*(d%jjm+2)) ENDIF req%type_field=type_field req%max_size=max_size*2 req%size=0 req%vector=.FALSE. IF (PRESENT(vector)) req%vector=vector ALLOCATE(req%src_domain(req%max_size)) ALLOCATE(req%src_ind(req%max_size)) ALLOCATE(req%target_ind(req%max_size)) ALLOCATE(req%src_i(req%max_size)) ALLOCATE(req%src_j(req%max_size)) ALLOCATE(req%target_i(req%max_size)) ALLOCATE(req%target_j(req%max_size)) ALLOCATE(req%target_sign(req%max_size)) ENDDO END SUBROUTINE create_request SUBROUTINE reallocate_request(req) IMPLICIT NONE TYPE(t_request),POINTER :: req INTEGER,POINTER :: src_domain(:) INTEGER,POINTER :: src_ind(:) INTEGER,POINTER :: target_ind(:) INTEGER,POINTER :: src_i(:) INTEGER,POINTER :: src_j(:) INTEGER,POINTER :: target_i(:) INTEGER,POINTER :: target_j(:) INTEGER,POINTER :: target_sign(:) PRINT *,"REALLOCATE_REQUEST" src_domain=>req%src_domain src_ind=>req%src_ind target_ind=>req%target_ind src_i=>req%src_i src_j=>req%src_j target_i=>req%target_i target_j=>req%target_j target_sign=>req%target_sign ALLOCATE(req%src_domain(req%max_size*2)) ALLOCATE(req%src_ind(req%max_size*2)) ALLOCATE(req%target_ind(req%max_size*2)) ALLOCATE(req%src_i(req%max_size*2)) ALLOCATE(req%src_j(req%max_size*2)) ALLOCATE(req%target_i(req%max_size*2)) ALLOCATE(req%target_j(req%max_size*2)) ALLOCATE(req%target_sign(req%max_size*2)) req%src_domain(1:req%max_size)=src_domain(:) req%src_ind(1:req%max_size)=src_ind(:) req%target_ind(1:req%max_size)=target_ind(:) req%src_i(1:req%max_size)=src_i(:) req%src_j(1:req%max_size)=src_j(:) req%target_i(1:req%max_size)=target_i(:) req%target_j(1:req%max_size)=target_j(:) req%target_sign(1:req%max_size)=target_sign(:) req%max_size=req%max_size*2 DEALLOCATE(src_domain) DEALLOCATE(src_ind) DEALLOCATE(target_ind) DEALLOCATE(src_i) DEALLOCATE(src_j) DEALLOCATE(target_i) DEALLOCATE(target_j) DEALLOCATE(target_sign) END SUBROUTINE reallocate_request SUBROUTINE request_add_point(ind,i,j,request,pos) USE domain_mod USE field_mod IMPLICIT NONE INTEGER,INTENT(IN) :: ind INTEGER,INTENT(IN) :: i INTEGER,INTENT(IN) :: j TYPE(t_request),POINTER :: request(:) INTEGER,INTENT(IN),OPTIONAL :: pos INTEGER :: src_domain INTEGER :: src_iim,src_i,src_j,src_n,src_pos,src_delta TYPE(t_request),POINTER :: req TYPE(t_domain),POINTER :: d req=>request(ind) d=>domain(ind) IF (req%max_size==req%size) CALL reallocate_request(req) req%size=req%size+1 IF (req%type_field==field_t) THEN src_domain=domain(ind)%assign_domain(i,j) src_iim=domain_glo(src_domain)%iim src_i=domain(ind)%assign_i(i,j) src_j=domain(ind)%assign_j(i,j) req%target_ind(req%size)=(j-1)*d%iim+i req%target_sign(req%size)=1 req%src_domain(req%size)=src_domain req%src_ind(req%size)=(src_j-1)*src_iim+src_i ELSE IF (req%type_field==field_u) THEN IF (.NOT. PRESENT(pos)) STOP 'argument request_add_point non conforme' src_domain=domain(ind)%edge_assign_domain(pos-1,i,j) src_iim=domain_glo(src_domain)%iim src_i=domain(ind)%edge_assign_i(pos-1,i,j) src_j=domain(ind)%edge_assign_j(pos-1,i,j) src_n=(src_j-1)*src_iim+src_i src_delta=domain(ind)%delta(i,j) src_pos=domain(ind)%edge_assign_pos(pos-1,i,j)+1 req%target_ind(req%size)=(j-1)*d%iim+i+d%u_pos(pos) req%target_sign(req%size)= 1 IF (req%vector) req%target_sign(req%size)= domain(ind)%edge_assign_sign(pos-1,i,j) req%src_domain(req%size)=src_domain req%src_ind(req%size)=src_n+domain_glo(src_domain)%u_pos(src_pos) ELSE IF (req%type_field==field_z) THEN IF (.NOT. PRESENT(pos)) STOP 'argument request_add_point non conforme' src_domain=domain(ind)%vertex_assign_domain(pos-1,i,j) src_iim=domain_glo(src_domain)%iim src_i=domain(ind)%vertex_assign_i(pos-1,i,j) src_j=domain(ind)%vertex_assign_j(pos-1,i,j) src_n=(src_j-1)*src_iim+src_i src_delta=domain(ind)%delta(i,j) src_pos=domain(ind)%vertex_assign_pos(pos-1,i,j)+1 req%target_ind(req%size)=(j-1)*d%iim+i+d%z_pos(pos) req%target_sign(req%size)=1 req%src_domain(req%size)=src_domain req%src_ind(req%size)=src_n+domain_glo(src_domain)%z_pos(src_pos) ENDIF END SUBROUTINE request_add_point SUBROUTINE Finalize_request(request) USE mpipara USE domain_mod USE mpi_mod IMPLICIT NONE TYPE(t_request),POINTER :: request(:) TYPE(t_request),POINTER :: req, req_src INTEGER :: nb_domain_recv(0:mpi_size-1) INTEGER :: nb_domain_send(0:mpi_size-1) INTEGER :: tag_rank(0:mpi_size-1) INTEGER :: nb_data_domain_recv(ndomain_glo) INTEGER :: list_domain_recv(ndomain_glo) INTEGER,ALLOCATABLE :: list_domain_send(:) INTEGER :: list_domain(ndomain) INTEGER :: rank,i,j,pos INTEGER :: size_,ind_glo,ind_loc INTEGER :: isend, irecv, ireq, nreq, nsend, nrecv INTEGER, ALLOCATABLE :: mpi_req(:) INTEGER, ALLOCATABLE :: status(:,:) INTEGER, ALLOCATABLE :: rank_list(:) INTEGER, ALLOCATABLE :: offset(:) LOGICAL,PARAMETER :: debug = .FALSE. IF (.NOT. using_mpi) RETURN DO ind_loc=1,ndomain req=>request(ind_loc) nb_data_domain_recv(:) = 0 nb_domain_recv(:) = 0 tag_rank(:)=0 DO i=1,req%size ind_glo=req%src_domain(i) nb_data_domain_recv(ind_glo)=nb_data_domain_recv(ind_glo)+1 ENDDO DO ind_glo=1,ndomain_glo IF ( nb_data_domain_recv(ind_glo) > 0 ) nb_domain_recv(domglo_rank(ind_glo))=nb_domain_recv(domglo_rank(ind_glo))+1 ENDDO req%nrecv=sum(nb_domain_recv(:)) ALLOCATE(req%recv(req%nrecv)) irecv=0 DO ind_glo=1,ndomain_glo IF (nb_data_domain_recv(ind_glo)>0) THEN irecv=irecv+1 list_domain_recv(ind_glo)=irecv req%recv(irecv)%rank=domglo_rank(ind_glo) req%recv(irecv)%size=nb_data_domain_recv(ind_glo) req%recv(irecv)%domain=domglo_loc_ind(ind_glo) ALLOCATE(req%recv(irecv)%value(req%recv(irecv)%size)) ALLOCATE(req%recv(irecv)%sign(req%recv(irecv)%size)) ALLOCATE(req%recv(irecv)%buffer(req%recv(irecv)%size)) ENDIF ENDDO req%recv(:)%size=0 irecv=0 DO i=1,req%size irecv=list_domain_recv(req%src_domain(i)) req%recv(irecv)%size=req%recv(irecv)%size+1 size_=req%recv(irecv)%size req%recv(irecv)%value(size_)=req%src_ind(i) req%recv(irecv)%buffer(size_)=req%target_ind(i) req%recv(irecv)%sign(size_)=req%target_sign(i) ENDDO ENDDO nb_domain_recv(:) = 0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv rank=req%recv(irecv)%rank nb_domain_recv(rank)=nb_domain_recv(rank)+1 ENDDO ENDDO CALL MPI_Alltoall(nb_domain_recv,1,MPI_INTEGER,nb_domain_send,1,MPI_INTEGER,comm_icosa,ierr) ALLOCATE(list_domain_send(sum(nb_domain_send))) nreq=sum(nb_domain_recv(:))+sum(nb_domain_send(:)) ALLOCATE(mpi_req(nreq)) ALLOCATE(status(MPI_STATUS_SIZE,nreq)) ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 CALL MPI_ISEND(req%recv(irecv)%domain,1,MPI_INTEGER,req%recv(irecv)%rank,0,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"Isend ",req%recv(irecv)%domain, "from ", mpi_rank, "to ",req%recv(irecv)%rank, "tag ",0 ENDDO ENDDO IF (debug) PRINT *,"------------" j=0 DO rank=0,mpi_size-1 DO i=1,nb_domain_send(rank) j=j+1 ireq=ireq+1 CALL MPI_IRECV(list_domain_send(j),1,MPI_INTEGER,rank,0,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"IRecv ",list_domain_send(j), "from ", rank, "to ",mpi_rank, "tag ",0 ENDDO ENDDO IF (debug) PRINT *,"------------" CALL MPI_WAITALL(nreq,mpi_req,status,ierr) list_domain(:)=0 DO i=1,sum(nb_domain_send) ind_loc=list_domain_send(i) list_domain(ind_loc)=list_domain(ind_loc)+1 ENDDO DO ind_loc=1,ndomain req=>request(ind_loc) req%nsend=list_domain(ind_loc) ALLOCATE(req%send(req%nsend)) ENDDO IF (debug) PRINT *,"------------" ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 CALL MPI_ISEND(mpi_rank,1,MPI_INTEGER,req%recv(irecv)%rank,req%recv(irecv)%domain,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"Isend ",mpi_rank, "from ", mpi_rank, "to ",req%recv(irecv)%rank,"tag ",req%recv(irecv)%domain ENDDO IF (debug) PRINT *,"------------" DO isend=1,req%nsend ireq=ireq+1 CALL MPI_IRECV(req%send(isend)%rank,1,MPI_INTEGER,MPI_ANY_SOURCE,ind_loc,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"IRecv ",req%send(isend)%rank, "from ", "xxx", "to ",mpi_rank, "tag ",ind_loc ENDDO ENDDO IF (debug) PRINT *,"------------" CALL MPI_WAITALL(nreq,mpi_req,status,ierr) CALL MPI_BARRIER(comm_icosa,ierr) IF (debug) PRINT *,"------------" ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 CALL MPI_ISEND(ind_loc,1,MPI_INTEGER,req%recv(irecv)%rank,req%recv(irecv)%domain,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"Isend ",ind_loc, "from ", mpi_rank, "to ",req%recv(irecv)%rank,"tag ",req%recv(irecv)%domain ENDDO IF (debug) PRINT *,"------------" DO isend=1,req%nsend ireq=ireq+1 CALL MPI_IRECV(req%send(isend)%domain,1,MPI_INTEGER,req%send(isend)%rank,ind_loc,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"IRecv ",req%send(isend)%domain, "from ", req%send(isend)%rank, "to ",mpi_rank, "tag ",ind_loc ENDDO ENDDO IF (debug) PRINT *,"------------" CALL MPI_WAITALL(nreq,mpi_req,status,ierr) CALL MPI_BARRIER(comm_icosa,ierr) IF (debug) PRINT *,"------------" ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 req%recv(irecv)%tag=tag_rank(req%recv(irecv)%rank) tag_rank(req%recv(irecv)%rank)=tag_rank(req%recv(irecv)%rank)+1 CALL MPI_ISEND(req%recv(irecv)%tag,1,MPI_INTEGER,req%recv(irecv)%rank,req%recv(irecv)%domain,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"Isend ",req%recv(irecv)%tag, "from ", mpi_rank, "to ",req%recv(irecv)%rank,"tag ",req%recv(irecv)%domain ENDDO IF (debug) PRINT *,"------------" DO isend=1,req%nsend ireq=ireq+1 CALL MPI_IRECV(req%send(isend)%tag,1,MPI_INTEGER,req%send(isend)%rank,ind_loc,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"IRecv ",req%send(isend)%tag, "from ", req%send(isend)%rank, "to ",mpi_rank, "tag ",ind_loc ENDDO ENDDO IF (debug) PRINT *,"------------" CALL MPI_WAITALL(nreq,mpi_req,status,ierr) CALL MPI_BARRIER(comm_icosa,ierr) IF (debug) PRINT *,"------------" ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 CALL MPI_ISEND(req%recv(irecv)%size,1,MPI_INTEGER,req%recv(irecv)%rank,req%recv(irecv)%tag,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"Isend ",req%recv(irecv)%size, "from ", mpi_rank, "to ",req%recv(irecv)%rank,"tag ",req%recv(irecv)%domain ENDDO IF (debug) PRINT *,"------------" DO isend=1,req%nsend ireq=ireq+1 CALL MPI_IRECV(req%send(isend)%size,1,MPI_INTEGER,req%send(isend)%rank,req%send(isend)%tag,comm_icosa, mpi_req(ireq),ierr) IF (debug) PRINT *,"IRecv ",req%send(isend)%size, "from ", req%send(isend)%rank, "to ",mpi_rank, "tag ",ind_loc ENDDO ENDDO CALL MPI_WAITALL(nreq,mpi_req,status,ierr) ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 CALL MPI_ISEND(req%recv(irecv)%value,req%recv(irecv)%size,MPI_INTEGER,& req%recv(irecv)%rank,req%recv(irecv)%tag,comm_icosa, mpi_req(ireq),ierr) ENDDO DO isend=1,req%nsend ireq=ireq+1 ALLOCATE(req%send(isend)%value(req%send(isend)%size)) CALL MPI_IRECV(req%send(isend)%value,req%send(isend)%size,MPI_INTEGER,& req%send(isend)%rank,req%send(isend)%tag,comm_icosa, mpi_req(ireq),ierr) ENDDO ENDDO CALL MPI_WAITALL(nreq,mpi_req,status,ierr) DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv req%recv(irecv)%value(:)=req%recv(irecv)%buffer(:) req%recv(irecv)%sign(:) =req%recv(irecv)%sign(:) DEALLOCATE(req%recv(irecv)%buffer) ENDDO ENDDO ! domain is on the same mpi process => copie memory to memory DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv IF (req%recv(irecv)%rank==mpi_rank) THEN req_src=>request(req%recv(irecv)%domain) DO isend=1,req_src%nsend IF (req_src%send(isend)%rank==mpi_rank .AND. req_src%send(isend)%tag==req%recv(irecv)%tag) THEN req%recv(irecv)%src_value => req_src%send(isend)%value IF ( size(req%recv(irecv)%value) /= size(req_src%send(isend)%value)) THEN PRINT *,ind_loc, irecv, isend, size(req%recv(irecv)%value), size(req_src%send(isend)%value) STOP "size(req%recv(irecv)%value) /= size(req_src%send(isend)%value" ENDIF ENDIF ENDDO ENDIF ENDDO ENDDO ! true number of mpi request request(:)%nreq_mpi=0 request(:)%nreq_send=0 request(:)%nreq_recv=0 ALLOCATE(rank_list(sum(request(:)%nsend))) ALLOCATE(offset(sum(request(:)%nsend))) offset(:)=0 nsend=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO isend=1,req%nsend IF (req%send(isend)%rank/=mpi_rank) THEN pos=0 DO i=1,nsend IF (req%send(isend)%rank==rank_list(i)) EXIT pos=pos+1 ENDDO IF (pos==nsend) THEN nsend=nsend+1 req%nreq_mpi=req%nreq_mpi+1 req%nreq_send=req%nreq_send+1 IF (mpi_threading_mode==MPI_THREAD_FUNNELED .OR. mpi_threading_mode==MPI_THREAD_SINGLE) THEN rank_list(nsend)=req%send(isend)%rank ELSE rank_list(nsend)=-1 ENDIF ENDIF pos=pos+1 req%send(isend)%ireq=pos req%send(isend)%offset=offset(pos) offset(pos)=offset(pos)+req%send(isend)%size ENDIF ENDDO ENDDO DEALLOCATE(rank_list) DEALLOCATE(offset) ALLOCATE(rank_list(sum(request(:)%nrecv))) ALLOCATE(offset(sum(request(:)%nrecv))) offset(:)=0 nrecv=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv IF (req%recv(irecv)%rank/=mpi_rank) THEN pos=0 DO i=1,nrecv IF (req%recv(irecv)%rank==rank_list(i)) EXIT pos=pos+1 ENDDO IF (pos==nrecv) THEN nrecv=nrecv+1 req%nreq_mpi=req%nreq_mpi+1 req%nreq_recv=req%nreq_recv+1 IF (mpi_threading_mode==MPI_THREAD_FUNNELED .OR. mpi_threading_mode==MPI_THREAD_SINGLE) THEN rank_list(nrecv)=req%recv(irecv)%rank ELSE rank_list(nrecv)=-1 ENDIF ENDIF pos=pos+1 req%recv(irecv)%ireq=nsend+pos req%recv(irecv)%offset=offset(pos) offset(pos)=offset(pos)+req%recv(irecv)%size ENDIF ENDDO ENDDO ! get the offsets ireq=0 DO ind_loc=1,ndomain req=>request(ind_loc) DO irecv=1,req%nrecv ireq=ireq+1 CALL MPI_ISEND(req%recv(irecv)%offset,1,MPI_INTEGER,& req%recv(irecv)%rank,req%recv(irecv)%tag,comm_icosa, mpi_req(ireq),ierr) ENDDO DO isend=1,req%nsend ireq=ireq+1 CALL MPI_IRECV(req%send(isend)%offset,1,MPI_INTEGER,& req%send(isend)%rank,req%send(isend)%tag,comm_icosa, mpi_req(ireq),ierr) ENDDO ENDDO CALL MPI_WAITALL(nreq,mpi_req,status,ierr) END SUBROUTINE Finalize_request SUBROUTINE init_message_seq(field, request, message, name) USE field_mod USE domain_mod USE mpi_mod USE mpipara USE mpi_mod IMPLICIT NONE TYPE(t_field),POINTER :: field(:) TYPE(t_request),POINTER :: request(:) TYPE(t_message) :: message CHARACTER(LEN=*), INTENT(IN), OPTIONAL :: name !$OMP MASTER message%request=>request IF(PRESENT(name)) THEN message%name = TRIM(name) ELSE message%name = 'unknown' END IF !$OMP END MASTER !$OMP BARRIER END SUBROUTINE init_message_seq SUBROUTINE send_message_seq(field,message) USE field_mod USE domain_mod USE mpi_mod USE mpipara USE omp_para USE trace IMPLICIT NONE TYPE(t_field),POINTER :: field(:) TYPE(t_message) :: message CALL transfert_request_seq(field,message%request) END SUBROUTINE send_message_seq SUBROUTINE test_message_seq(message) IMPLICIT NONE TYPE(t_message) :: message END SUBROUTINE test_message_seq SUBROUTINE wait_message_seq(message) IMPLICIT NONE TYPE(t_message) :: message END SUBROUTINE wait_message_seq SUBROUTINE init_message_mpi(field,request, message, name) USE field_mod USE domain_mod USE mpi_mod USE mpipara USE mpi_mod IMPLICIT NONE TYPE(t_field),POINTER :: field(:) TYPE(t_request),POINTER :: request(:) TYPE(t_message) :: message CHARACTER(LEN=*), INTENT(IN),OPTIONAL :: name TYPE(t_request),POINTER :: req INTEGER :: irecv,isend INTEGER :: ireq,nreq INTEGER :: ind INTEGER :: dim3,dim4 INTEGER,SAVE :: message_number=0 ! TYPE(t_reorder),POINTER :: reorder(:) ! TYPE(t_reorder) :: reorder_swap !$OMP BARRIER !$OMP MASTER !init off-device message%ondevice=.false. IF(PRESENT(name)) THEN message%name = TRIM(name) ELSE message%name = 'unknown' END IF message%number=message_number message_number=message_number+1 IF (message_number==100) message_number=0 message%request=>request message%nreq=sum(message%request(:)%nreq_mpi) message%nreq_send=sum(message%request(:)%nreq_send) message%nreq_recv=sum(message%request(:)%nreq_recv) nreq=message%nreq ALLOCATE(message%mpi_req(nreq)) ALLOCATE(message%buffers(nreq)) ALLOCATE(message%status(MPI_STATUS_SIZE,nreq)) message%buffers(:)%size=0 message%pending=.FALSE. message%completed=.FALSE. message%open=.FALSE. DO ind=1,ndomain req=>request(ind) DO isend=1,req%nsend IF (req%send(isend)%rank/=mpi_rank) THEN ireq=req%send(isend)%ireq message%buffers(ireq)%size=message%buffers(ireq)%size+req%send(isend)%size message%buffers(ireq)%rank=req%send(isend)%rank ENDIF ENDDO DO irecv=1,req%nrecv IF (req%recv(irecv)%rank/=mpi_rank) THEN ireq=req%recv(irecv)%ireq message%buffers(ireq)%size=message%buffers(ireq)%size+req%recv(irecv)%size message%buffers(ireq)%rank=req%recv(irecv)%rank ENDIF ENDDO ENDDO IF (field(1)%data_type==type_real) THEN IF (field(1)%ndim==2) THEN DO ireq=1,message%nreq CALL allocate_mpi_buffer(message%buffers(ireq)%r,message%buffers(ireq)%size) ENDDO ELSE IF (field(1)%ndim==3) THEN dim3=size(field(1)%rval3d,2) DO ireq=1,message%nreq message%buffers(ireq)%size=message%buffers(ireq)%size*dim3 CALL allocate_mpi_buffer(message%buffers(ireq)%r,message%buffers(ireq)%size) ENDDO ELSE IF (field(1)%ndim==4) THEN dim3=size(field(1)%rval4d,2) dim4=size(field(1)%rval4d,3) DO ireq=1,message%nreq message%buffers(ireq)%size=message%buffers(ireq)%size*dim3*dim4 CALL allocate_mpi_buffer(message%buffers(ireq)%r,message%buffers(ireq)%size) ENDDO ENDIF ENDIF ! ! Reorder the request, so recv request are done in the same order than send request ! nreq_send=sum(request(:)%nsend) ! message%nreq_send=nreq_send ! ALLOCATE(message%reorder(nreq_send)) ! reorder=>message%reorder ! ireq=0 ! DO ind=1,ndomain ! req=>request(ind) ! DO isend=1,req%nsend ! ireq=ireq+1 ! reorder(ireq)%ind=ind ! reorder(ireq)%isend=isend ! reorder(ireq)%tag=req%send(isend)%tag ! ENDDO ! ENDDO ! ! do a very very bad sort ! DO i=1,nreq_send-1 ! DO j=i+1,nreq_send ! IF (reorder(i)%tag > reorder(j)%tag) THEN ! reorder_swap=reorder(i) ! reorder(i)=reorder(j) ! reorder(j)=reorder_swap ! ENDIF ! ENDDO ! ENDDO ! PRINT *,"reorder ",reorder(:)%tag !$OMP END MASTER !$OMP BARRIER END SUBROUTINE init_message_mpi SUBROUTINE Finalize_message_mpi(message) USE field_mod USE domain_mod USE mpi_mod USE mpipara USE mpi_mod IMPLICIT NONE TYPE(t_message) :: message INTEGER :: ireq, ibuff !$OMP BARRIER !$OMP MASTER IF (message%field(1)%data_type==type_real) THEN DO ireq=1,message%nreq CALL free_mpi_buffer(message%buffers(ireq)%r) ENDDO ENDIF !deallocate device data if ondevice if(message%ondevice) then do ireq=1, ndomain do ibuff=1,message%request(ireq)%nsend !$acc exit data delete(message%request(ireq)%send(ibuff)%buffer(:)) !$acc exit data delete(message%request(ireq)%send(ibuff)%buffer_r(:)) !$acc exit data delete(message%request(ireq)%send(ibuff)%sign(:)) !$acc exit data delete(message%request(ireq)%send(ibuff)%src_value(:)) !$acc exit data delete(message%request(ireq)%send(ibuff)%value(:)) end do do ibuff=1,message%request(ireq)%nrecv !$acc exit data delete(message%request(ireq)%recv(ibuff)%buffer(:)) !$acc exit data delete(message%request(ireq)%recv(ibuff)%buffer_r(:)) !$acc exit data delete(message%request(ireq)%recv(ibuff)%sign(:)) !$acc exit data delete(message%request(ireq)%recv(ibuff)%src_value(:)) !$acc exit data delete(message%request(ireq)%recv(ibuff)%value(:)) end do end do DO ireq=1,message%nreq !$acc exit data delete(message%buffers(ireq)%r) ENDDO message%ondevice=.false. end if DEALLOCATE(message%mpi_req) DEALLOCATE(message%buffers) DEALLOCATE(message%status) !$OMP END MASTER !$OMP BARRIER END SUBROUTINE Finalize_message_mpi !!!Update buffers on device for 'message' !!! does create_device_message when not already ondevice SUBROUTINE update_device_message_mpi(message) USE domain_mod IMPLICIT NONE TYPE(t_message), intent(inout) :: message INTEGER :: ireq, ibuff !if(.not. message%ondevice) call create_device_message_mpi(message) do ireq=1, ndomain do ibuff=1,message%request(ireq)%nsend !device buffers updated even if pointers not attached : !non allocated buffers in 'message' must be set to NULL() !$acc enter data copyin(message%request(ireq)%send(ibuff)%buffer(:)) async !$acc enter data copyin(message%request(ireq)%send(ibuff)%buffer_r(:)) async !$acc enter data copyin(message%request(ireq)%send(ibuff)%sign(:)) async !$acc enter data copyin(message%request(ireq)%send(ibuff)%src_value(:)) async !$acc enter data copyin(message%request(ireq)%send(ibuff)%value(:)) async end do do ibuff=1,message%request(ireq)%nrecv !$acc enter data copyin(message%request(ireq)%recv(ibuff)%buffer(:)) async !$acc enter data copyin(message%request(ireq)%recv(ibuff)%buffer_r(:)) async !$acc enter data copyin(message%request(ireq)%recv(ibuff)%sign(:)) async !$acc enter data copyin(message%request(ireq)%recv(ibuff)%src_value(:)) async !$acc enter data copyin(message%request(ireq)%recv(ibuff)%value(:)) async end do end do DO ireq=1,message%nreq !$acc enter data copyin(message%buffers(ireq)%r) async ENDDO message%ondevice=.true. END SUBROUTINE !TODO : add openacc with multiple process SUBROUTINE send_message_mpi(field,message) USE abort_mod USE profiling_mod USE field_mod USE domain_mod USE mpi_mod USE mpipara USE omp_para USE trace USE abort_mod IMPLICIT NONE TYPE(t_field),POINTER :: field(:) TYPE(t_message) :: message REAL(rstd),POINTER :: rval2d(:), src_rval2d(:) REAL(rstd),POINTER :: rval3d(:,:), src_rval3d(:,:) REAL(rstd),POINTER :: rval4d(:,:,:), src_rval4d(:,:,:) REAL(rstd),POINTER :: buffer_r(:) INTEGER,POINTER :: value(:) INTEGER,POINTER :: sgn(:) TYPE(ARRAY),POINTER :: recv,send TYPE(t_request),POINTER :: req INTEGER :: irecv,isend INTEGER :: ireq INTEGER :: ind,n INTEGER :: dim3,dim4,d3,d4 INTEGER,POINTER :: src_value(:) INTEGER :: offset,msize,moffset,rank INTEGER :: lbegin, lend INTEGER :: max_req ! CALL trace_start("send_message_mpi") CALL enter_profile(id_mpi) !Prepare 'message' for on-device copies if field is on device if( field(1)%ondevice .AND. .NOT. message%ondevice ) call update_device_message_mpi(message) CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) !$OMP MASTER IF(message%open) THEN PRINT *, 'send_message_mpi : message ' // TRIM(message%name) // & ' is still open, no call to wait_message_mpi after last send_message_mpi' CALL dynamico_abort( "send_message_mpi : message still open" ) END IF message%open=.TRUE. ! will be set to .FALSE. by wait_message_mpi message%field=>field IF (message%nreq>0) THEN message%completed=.FALSE. message%pending=.TRUE. ELSE message%completed=.TRUE. message%pending=.FALSE. ENDIF !$OMP END MASTER CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) IF (field(1)%data_type==type_real) THEN IF (field(1)%ndim==2) THEN DO ind=1,ndomain IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE rval2d=>field(ind)%rval2d req=>message%request(ind) DO isend=1,req%nsend send=>req%send(isend) value=>send%value IF (send%rank/=mpi_rank) THEN ireq=send%ireq buffer_r=>message%buffers(ireq)%r offset=send%offset msize=send%size call enter_profile(profile_mpi_copies) !$acc parallel loop default(present) async if (field(ind)%ondevice) DO n=1,msize buffer_r(offset+n)=rval2d(value(n)) ENDDO call exit_profile(profile_mpi_copies) IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") !$OMP CRITICAL CALL MPI_ISEND(buffer_r,send%size,MPI_REAL8,send%rank, & send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) !$OMP END CRITICAL ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") CALL MPI_ISEND(buffer_r,send%size,MPI_REAL8,send%rank, & send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDIF ENDDO ENDDO DO ind=1,ndomain IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE rval2d=>field(ind)%rval2d req=>message%request(ind) DO irecv=1,req%nrecv recv=>req%recv(irecv) IF (recv%rank==mpi_rank) THEN value=>recv%value src_value => recv%src_value src_rval2d=>field(recv%domain)%rval2d sgn=>recv%sign msize=recv%size call enter_profile(profile_mpi_copies) !$acc parallel loop default(present) async if (field(ind)%ondevice) DO n=1,msize rval2d(value(n))=src_rval2d(src_value(n))*sgn(n) ENDDO call exit_profile(profile_mpi_copies) ELSE ireq=recv%ireq buffer_r=>message%buffers(ireq)%r IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") !$OMP CRITICAL CALL MPI_IRECV(buffer_r,recv%size,MPI_REAL8,recv%rank, & recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) !$OMP END CRITICAL ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") CALL MPI_IRECV(buffer_r,recv%size,MPI_REAL8,recv%rank, & recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDIF ENDDO ENDDO ELSE IF (field(1)%ndim==3) THEN max_req=0 DO ind=1,ndomain req=>message%request(ind) IF (req%nsend>max_req) max_req=req%nsend ENDDO DO ind=1,ndomain IF (.NOT. assigned_domain(ind) ) CYCLE dim3=size(field(ind)%rval3d,2) CALL distrib_level(1,dim3, lbegin,lend) rval3d=>field(ind)%rval3d req=>message%request(ind) DO isend=1,req%nsend send=>req%send(isend) value=>send%value IF (send%rank/=mpi_rank) THEN ireq=send%ireq buffer_r=>message%buffers(ireq)%r msize=send%size moffset=send%offset call enter_profile(profile_mpi_copies) !$acc parallel loop default(present) async if (field(ind)%ondevice) DO d3=lbegin,lend offset=moffset*dim3 + (d3-1)*msize !$acc loop DO n=1,msize buffer_r(n+offset)=rval3d(value(n),d3) ENDDO ENDDO call exit_profile(profile_mpi_copies) IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ENDIF IF (is_omp_level_master) THEN IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") !$OMP CRITICAL CALL MPI_ISEND(buffer_r,send%size*dim3,MPI_REAL8,send%rank, & send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) !$OMP END CRITICAL ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") CALL MPI_ISEND(buffer_r,send%size*dim3,MPI_REAL8,send%rank, & send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDIF ELSE IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ENDIF ENDIF ENDDO IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN DO isend=req%nsend+1,max_req CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ENDDO ENDIF ENDDO DO ind=1,ndomain IF (.NOT. assigned_domain(ind) ) CYCLE dim3=size(field(ind)%rval3d,2) CALL distrib_level(1,dim3, lbegin,lend) rval3d=>field(ind)%rval3d req=>message%request(ind) DO irecv=1,req%nrecv recv=>req%recv(irecv) IF (recv%rank==mpi_rank) THEN value=>recv%value src_value => recv%src_value src_rval3d=>field(recv%domain)%rval3d sgn=>recv%sign msize=recv%size call enter_profile(profile_mpi_copies) CALL trace_start("copy_data") !$acc parallel loop collapse(2) default(present) async if (field(ind)%ondevice) DO d3=lbegin,lend DO n=1,msize rval3d(value(n),d3)=src_rval3d(src_value(n),d3)*sgn(n) ENDDO ENDDO call exit_profile(profile_mpi_copies) CALL trace_end("copy_data") ELSE ireq=recv%ireq buffer_r=>message%buffers(ireq)%r IF (is_omp_level_master) THEN IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") !$OMP CRITICAL CALL MPI_IRECV(buffer_r,recv%size*dim3,MPI_REAL8,recv%rank, & recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) !$OMP END CRITICAL ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") CALL MPI_IRECV(buffer_r,recv%size*dim3,MPI_REAL8,recv%rank, & recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDIF ENDIF ENDDO ENDDO ELSE IF (field(1)%ndim==4) THEN max_req=0 DO ind=1,ndomain req=>message%request(ind) IF (req%nsend>max_req) max_req=req%nsend ENDDO DO ind=1,ndomain IF (.NOT. assigned_domain(ind) ) CYCLE dim3=size(field(ind)%rval4d,2) CALL distrib_level(1,dim3, lbegin,lend) dim4=size(field(ind)%rval4d,3) rval4d=>field(ind)%rval4d req=>message%request(ind) DO isend=1,req%nsend send=>req%send(isend) value=>send%value IF (send%rank/=mpi_rank) THEN ireq=send%ireq buffer_r=>message%buffers(ireq)%r msize=send%size moffset=send%offset call enter_profile(profile_mpi_copies) CALL trace_start("copy_to_buffer") !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) DO d4=1,dim4 DO d3=lbegin,lend offset=moffset*dim3*dim4 + dim3*msize*(d4-1) + & (d3-1)*msize !$acc loop DO n=1,msize buffer_r(n+offset)=rval4d(value(n),d3,d4) ENDDO ENDDO ENDDO CALL trace_end("copy_to_buffer") call exit_profile(profile_mpi_copies) IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ENDIF IF (is_omp_level_master) THEN IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") !$OMP CRITICAL CALL MPI_ISEND(buffer_r,send%size*dim3*dim4,MPI_REAL8,send%rank, & send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) !$OMP END CRITICAL ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") CALL MPI_ISEND(buffer_r,send%size*dim3*dim4,MPI_REAL8,send%rank, & send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDIF ELSE IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ENDIF ENDIF ENDDO IF (mpi_threading_mode==MPI_THREAD_SERIALIZED.OR.mpi_threading_mode==MPI_THREAD_MULTIPLE .AND. omp_level_size>1) THEN DO isend=req%nsend+1,max_req CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ENDDO ENDIF ENDDO DO ind=1,ndomain IF (.NOT. assigned_domain(ind) ) CYCLE dim3=size(field(ind)%rval4d,2) CALL distrib_level(1,dim3, lbegin,lend) dim4=size(field(ind)%rval4d,3) rval4d=>field(ind)%rval4d req=>message%request(ind) DO irecv=1,req%nrecv recv=>req%recv(irecv) IF (recv%rank==mpi_rank) THEN value=>recv%value src_value => recv%src_value src_rval4d=>field(recv%domain)%rval4d sgn=>recv%sign msize=recv%size call enter_profile(profile_mpi_copies) CALL trace_start("copy_data") !$acc parallel loop collapse(3) default(present) async if (field(ind)%ondevice) DO d4=1,dim4 DO d3=lbegin,lend DO n=1,msize rval4d(value(n),d3,d4)=src_rval4d(src_value(n),d3,d4)*sgn(n) ENDDO ENDDO ENDDO call exit_profile(profile_mpi_copies) CALL trace_end("copy_data") ELSE ireq=recv%ireq buffer_r=>message%buffers(ireq)%r IF (is_omp_level_master) THEN IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") !$OMP CRITICAL CALL MPI_IRECV(buffer_r,recv%size*dim3*dim4,MPI_REAL8,recv%rank, & recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) !$OMP END CRITICAL ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") CALL MPI_IRECV(buffer_r,recv%size*dim3*dim4,MPI_REAL8,recv%rank, & recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDIF ENDIF ENDDO ENDDO ENDIF IF (mpi_threading_mode==MPI_THREAD_FUNNELED .OR. mpi_threading_mode==MPI_THREAD_SINGLE) THEN CALL enter_profile(profile_mpi_omp_barrier) !$acc wait !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) !$OMP MASTER DO ireq=1,message%nreq_send buffer_r=>message%buffers(ireq)%r msize=message%buffers(ireq)%size rank=message%buffers(ireq)%rank ! Using the "if" clause on the "host_data" directive seems to cause a crash at compilation IF (message%ondevice) THEN !$acc host_data use_device(buffer_r) ! if (message%ondevice) CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & comm_icosa, message%mpi_req(ireq),ierr) !$acc end host_data ELSE CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDDO DO ireq=message%nreq_send+1,message%nreq_send+message%nreq_recv buffer_r=>message%buffers(ireq)%r msize=message%buffers(ireq)%size rank=message%buffers(ireq)%rank ! Using the "if" clause on the "host_data" directive seems to cause a crash at compilation IF (message%ondevice) THEN !$acc host_data use_device(buffer_r) ! if (message%ondevice) CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & comm_icosa, message%mpi_req(ireq),ierr) !$acc end host_data ELSE CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & comm_icosa, message%mpi_req(ireq),ierr) ENDIF ENDDO !$OMP END MASTER ENDIF ENDIF CALL enter_profile(profile_mpi_omp_barrier) !$OMP BARRIER CALL exit_profile(profile_mpi_omp_barrier) ! CALL trace_end("send_message_mpi") CALL exit_profile(id_mpi) END SUBROUTINE send_message_mpi SUBROUTINE test_message_mpi(message) IMPLICIT NONE TYPE(t_message) :: message INTEGER :: ierr !$OMP MASTER IF (message%pending .AND. .NOT. message%completed) CALL MPI_TESTALL(message%nreq,& message%mpi_req,message%completed,message%status,ierr) !$OMP END MASTER END SUBROUTINE test_message_mpi SUBROUTINE wait_message_mpi(message) USE profiling_mod USE field_mod USE domain_mod USE mpi_mod USE mpipara USE omp_para USE trace IMPLICIT NONE TYPE(t_message) :: message TYPE(t_field),POINTER :: field(:) REAL(rstd),POINTER :: rval2d(:) REAL(rstd),POINTER :: rval3d(:,:) REAL(rstd),POINTER :: rval4d(:,:,:) REAL(rstd),POINTER :: buffer_r(:) INTEGER,POINTER :: value(:) INTEGER,POINTER :: sgn(:) TYPE(ARRAY),POINTER :: recv TYPE(t_request),POINTER :: req INTEGER :: irecv INTEGER :: ireq,nreq INTEGER :: ind,n INTEGER :: dim3,dim4,d3,d4,lbegin,lend INTEGER :: offset, msize, moffset message%open=.FALSE. IF (.NOT. message%pending) RETURN CALL enter_profile(id_mpi) ! CALL trace_start("wait_message_mpi") field=>message%field nreq=message%nreq IF (field(1)%data_type==type_real) THEN IF (field(1)%ndim==2) THEN call enter_profile(profile_mpi_waitall) !$OMP MASTER IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & message%status,ierr) !$OMP END MASTER !$OMP BARRIER call exit_profile(profile_mpi_waitall) call enter_profile(profile_mpi_copies) DO ind=1,ndomain IF (.NOT. assigned_domain(ind) .OR. .NOT. is_omp_level_master) CYCLE rval2d=>field(ind)%rval2d req=>message%request(ind) DO irecv=1,req%nrecv recv=>req%recv(irecv) IF (recv%rank/=mpi_rank) THEN ireq=recv%ireq buffer_r=>message%buffers(ireq)%r value=>recv%value sgn=>recv%sign offset=recv%offset msize=recv%size !$acc parallel loop default(present) async if (field(ind)%ondevice) DO n=1,msize rval2d(value(n))=buffer_r(n+offset)*sgn(n) ENDDO ENDIF ENDDO ENDDO call exit_profile(profile_mpi_copies) ELSE IF (field(1)%ndim==3) THEN call enter_profile(profile_mpi_waitall) !$OMP MASTER IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & message%status,ierr) !$OMP END MASTER !$OMP BARRIER call exit_profile(profile_mpi_waitall) DO ind=1,ndomain IF (.NOT. assigned_domain(ind) ) CYCLE rval3d=>field(ind)%rval3d req=>message%request(ind) DO irecv=1,req%nrecv recv=>req%recv(irecv) IF (recv%rank/=mpi_rank) THEN ireq=recv%ireq buffer_r=>message%buffers(ireq)%r value=>recv%value sgn=>recv%sign dim3=size(rval3d,2) CALL distrib_level(1,dim3, lbegin,lend) msize=recv%size moffset=recv%offset call enter_profile(profile_mpi_copies) CALL trace_start("copy_from_buffer") IF (req%vector) THEN !$acc parallel loop default(present) async if (field(ind)%ondevice) DO d3=lbegin,lend offset=moffset*dim3 + (d3-1)*msize !$acc loop DO n=1,msize rval3d(value(n),d3)=buffer_r(n+offset)*sgn(n) ENDDO ENDDO ELSE !$acc parallel loop default(present) async if (field(ind)%ondevice) DO d3=lbegin,lend offset=moffset*dim3 + (d3-1)*msize !$acc loop DO n=1,msize rval3d(value(n),d3)=buffer_r(n+offset) ENDDO ENDDO ENDIF CALL trace_end("copy_from_buffer") call exit_profile(profile_mpi_copies) ENDIF ENDDO ENDDO ELSE IF (field(1)%ndim==4) THEN call enter_profile(profile_mpi_waitall) !$OMP MASTER IF (.NOT. message%completed) CALL MPI_WAITALL(nreq,message%mpi_req, & message%status,ierr) !$OMP END MASTER !$OMP BARRIER call exit_profile(profile_mpi_waitall) DO ind=1,ndomain IF (.NOT. assigned_domain(ind) ) CYCLE rval4d=>field(ind)%rval4d req=>message%request(ind) DO irecv=1,req%nrecv recv=>req%recv(irecv) IF (recv%rank/=mpi_rank) THEN ireq=recv%ireq buffer_r=>message%buffers(ireq)%r value=>recv%value sgn=>recv%sign dim3=size(rval4d,2) CALL distrib_level(1,dim3, lbegin,lend) dim4=size(rval4d,3) msize=recv%size moffset=recv%offset call enter_profile(profile_mpi_copies) CALL trace_start("copy_from_buffer") !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) DO d4=1,dim4 DO d3=lbegin,lend offset=moffset*dim3*dim4 + dim3*msize*(d4-1) + & (d3-1)*msize !$acc loop DO n=1,msize rval4d(value(n),d3,d4)=buffer_r(n+offset)*sgn(n) ENDDO ENDDO ENDDO CALL trace_end("copy_from_buffer") call exit_profile(profile_mpi_copies) ENDIF ENDDO ENDDO ENDIF ENDIF !$OMP MASTER message%pending=.FALSE. !$OMP END MASTER ! CALL trace_end("wait_message_mpi") !$OMP BARRIER CALL exit_profile(id_mpi) END SUBROUTINE wait_message_mpi SUBROUTINE transfert_request_seq(field,request) USE field_mod USE domain_mod IMPLICIT NONE TYPE(t_field),POINTER :: field(:) TYPE(t_request),POINTER :: request(:) REAL(rstd),POINTER :: rval2d(:) REAL(rstd),POINTER :: rval3d(:,:) REAL(rstd),POINTER :: rval4d(:,:,:) INTEGER :: ind TYPE(t_request),POINTER :: req INTEGER :: n DO ind=1,ndomain req=>request(ind) rval2d=>field(ind)%rval2d rval3d=>field(ind)%rval3d rval4d=>field(ind)%rval4d IF (field(ind)%data_type==type_real) THEN IF (field(ind)%ndim==2) THEN DO n=1,req%size rval2d(req%target_ind(n))=field(req%src_domain(n))%rval2d(req%src_ind(n))*& req%target_sign(n) ENDDO ELSE IF (field(ind)%ndim==3) THEN DO n=1,req%size rval3d(req%target_ind(n),:)=field(req%src_domain(n))%rval3d(req%src_ind(n),:)*& req%target_sign(n) ENDDO ELSE IF (field(ind)%ndim==4) THEN DO n=1,req%size rval4d(req%target_ind(n),:,:)=field(req%src_domain(n))%rval4d(req%src_ind(n),:,:)*& req%target_sign(n) ENDDO ENDIF ENDIF ENDDO END SUBROUTINE transfert_request_seq END MODULE transfert_mpi_legacy_mod