Changeset 953 for codes/icosagcm/trunk/src/parallel
- Timestamp:
- 07/15/19 12:29:31 (5 years ago)
- Location:
- codes/icosagcm/trunk/src/parallel
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
codes/icosagcm/trunk/src/parallel/mpipara.F90
r892 r953 14 14 INTEGER,SAVE :: id_mpi ! id for profiling 15 15 16 INTEGER, SAVE :: device_id 17 16 18 INTERFACE allocate_mpi_buffer 17 19 MODULE PROCEDURE allocate_mpi_buffer_r2, allocate_mpi_buffer_r3,allocate_mpi_buffer_r4 … … 46 48 USE xios 47 49 #endif 50 USE abort_mod 48 51 IMPLICIT NONE 49 52 CHARACTER(LEN=256) :: required_mode_str … … 75 78 END SELECT 76 79 80 IF (required_mode==MPI_THREAD_SERIALIZED .OR. required_mode==MPI_THREAD_MULTIPLE) THEN 81 CALL abort_acc("mpi_threading_mode /= 'single' .AND. mpi_threading_mode /= 'funneled'") 82 ENDIF 77 83 78 84 IF (required_mode==MPI_THREAD_SINGLE) PRINT*,'MPI_INIT_THREAD : MPI_SINGLE_THREAD required' … … 120 126 ENDIF 121 127 128 129 #ifdef _OPENACC 130 device_id = setDevice(mpi_size, mpi_rank) 131 PRINT *, 'GPU device ', device_id 132 #else 133 device_id = -1 134 #endif 135 122 136 END SUBROUTINE init_mpipara 123 137 … … 233 247 END SUBROUTINE free_mpi_buffer_r4 234 248 249 #ifdef _OPENACC 250 FUNCTION setDevice(nprocs, myrank) 251 use iso_c_binding 252 use openacc 253 USE mpi_mod 254 implicit none 255 256 interface 257 function gethostid() bind(C) 258 use iso_c_binding 259 integer(C_INT) :: gethostid 260 end function gethostid 261 end interface 262 263 integer, intent(in) :: nprocs, myrank 264 integer :: hostids(nprocs), localprocs(nprocs) 265 integer :: hostid, ierr, numdev, mydev, i, numlocal 266 integer :: setDevice 267 268 ! get the hostids so we can determine what other processes are on this node 269 hostid = gethostid() 270 call mpi_allgather(hostid,1,MPI_INTEGER,hostids,1,MPI_INTEGER, MPI_COMM_WORLD, ierr) 271 272 ! determine which processors are on this node 273 numlocal = 0 274 localprocs(:) = 0 275 do i = 1, nprocs 276 if (hostid == hostids(i)) then 277 localprocs(i) = numlocal 278 numlocal = numlocal + 1 279 end if 280 end do 281 282 ! get the number of device on this node 283 numdev = acc_get_num_devices(ACC_DEVICE_NVIDIA) 284 285 if (numdev < 1) then 286 print *, "Error: there are no devices available on this host. ABORTING", myrank 287 stop 288 end if 289 290 ! print a warning if the number of devices is less than the number of processes on this node. Having multiple processes share a devices is not recommended 291 if (numdev < numlocal) then 292 if (localprocs(myrank+1) == 1) then 293 ! print warning message only once per node 294 print *, "WARNING: the number of process is greater than the number of GPUs.", myrank 295 end if 296 mydev = mod(localprocs(myrank+1), numdev) 297 else 298 mydev = localprocs(myrank+1) 299 end if 300 301 call acc_set_device_num(mydev,ACC_DEVICE_NVIDIA) 302 call acc_init(ACC_DEVICE_NVIDIA) 303 setDevice = acc_get_device_num(ACC_DEVICE_NVIDIA) 304 END FUNCTION setDevice 305 306 #endif 307 308 309 235 310 END MODULE mpipara -
codes/icosagcm/trunk/src/parallel/transfert_mpi.f90
r901 r953 5 5 6 6 TYPE array 7 INTEGER,POINTER :: value(:) 8 INTEGER,POINTER :: sign(:) 7 INTEGER,POINTER :: value(:)=>null() 8 INTEGER,POINTER :: sign(:)=>null() 9 9 INTEGER :: domain 10 10 INTEGER :: rank … … 13 13 INTEGER :: offset 14 14 INTEGER :: ireq 15 INTEGER,POINTER :: buffer(:) 16 REAL,POINTER :: buffer_r(:) 17 INTEGER,POINTER :: src_value(:) 15 INTEGER,POINTER :: buffer(:)=>null() 16 REAL,POINTER :: buffer_r(:)=>null() 17 INTEGER,POINTER :: src_value(:)=>null() 18 18 END TYPE array 19 19 … … 77 77 LOGICAL :: open ! for debug 78 78 INTEGER :: number 79 LOGICAL :: ondevice=.false. !<Ready to transfer ondevice field 79 80 END TYPE t_message 80 81 … … 942 943 !$OMP BARRIER 943 944 !$OMP MASTER 945 !init off-device 946 message%ondevice=.false. 947 944 948 IF(PRESENT(name)) THEN 945 949 message%name = TRIM(name) … … 1058 1062 TYPE(t_message) :: message 1059 1063 1060 INTEGER :: ireq 1064 INTEGER :: ireq, ibuff 1061 1065 1062 1066 !$OMP BARRIER … … 1084 1088 ENDDO 1085 1089 1086 ENDIF 1090 ENDIF 1087 1091 ENDIF 1088 1092 1093 !deallocate device data if ondevice 1094 if(message%ondevice) then 1095 do ireq=1, ndomain 1096 do ibuff=1,message%request(ireq)%nsend 1097 !$acc exit data delete(message%request(ireq)%send(ibuff)%buffer(:)) 1098 !$acc exit data delete(message%request(ireq)%send(ibuff)%buffer_r(:)) 1099 !$acc exit data delete(message%request(ireq)%send(ibuff)%sign(:)) 1100 !$acc exit data delete(message%request(ireq)%send(ibuff)%src_value(:)) 1101 !$acc exit data delete(message%request(ireq)%send(ibuff)%value(:)) 1102 end do 1103 do ibuff=1,message%request(ireq)%nrecv 1104 !$acc exit data delete(message%request(ireq)%recv(ibuff)%buffer(:)) 1105 !$acc exit data delete(message%request(ireq)%recv(ibuff)%buffer_r(:)) 1106 !$acc exit data delete(message%request(ireq)%recv(ibuff)%sign(:)) 1107 !$acc exit data delete(message%request(ireq)%recv(ibuff)%src_value(:)) 1108 !$acc exit data delete(message%request(ireq)%recv(ibuff)%value(:)) 1109 end do 1110 end do 1111 DO ireq=1,message%nreq 1112 !$acc exit data delete(message%buffers(ireq)%r) 1113 ENDDO 1114 message%ondevice=.false. 1115 end if 1116 1089 1117 DEALLOCATE(message%mpi_req) 1090 1118 DEALLOCATE(message%buffers) … … 1120 1148 1121 1149 1150 !!!Update buffers on device for 'message' 1151 !!! does create_device_message when not already ondevice 1152 SUBROUTINE update_device_message_mpi(message) 1153 USE domain_mod 1154 IMPLICIT NONE 1155 TYPE(t_message), intent(inout) :: message 1156 INTEGER :: ireq, ibuff 1157 1158 !if(.not. message%ondevice) call create_device_message_mpi(message) 1159 1160 do ireq=1, ndomain 1161 do ibuff=1,message%request(ireq)%nsend 1162 !device buffers updated even if pointers not attached : 1163 !non allocated buffers in 'message' must be set to NULL() 1164 !$acc enter data copyin(message%request(ireq)%send(ibuff)%buffer(:)) async 1165 !$acc enter data copyin(message%request(ireq)%send(ibuff)%buffer_r(:)) async 1166 !$acc enter data copyin(message%request(ireq)%send(ibuff)%sign(:)) async 1167 !$acc enter data copyin(message%request(ireq)%send(ibuff)%src_value(:)) async 1168 !$acc enter data copyin(message%request(ireq)%send(ibuff)%value(:)) async 1169 end do 1170 do ibuff=1,message%request(ireq)%nrecv 1171 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%buffer(:)) async 1172 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%buffer_r(:)) async 1173 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%sign(:)) async 1174 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%src_value(:)) async 1175 !$acc enter data copyin(message%request(ireq)%recv(ibuff)%value(:)) async 1176 end do 1177 end do 1178 DO ireq=1,message%nreq 1179 !$acc enter data copyin(message%buffers(ireq)%r) async 1180 ENDDO 1181 message%ondevice=.true. 1182 END SUBROUTINE 1183 1184 !TODO : add openacc with multiple process 1122 1185 SUBROUTINE send_message_mpi(field,message) 1123 1186 USE abort_mod … … 1129 1192 USE omp_para 1130 1193 USE trace 1194 USE abort_mod 1131 1195 IMPLICIT NONE 1132 1196 TYPE(t_field),POINTER :: field(:) … … 1145 1209 INTEGER :: dim3,dim4,d3,d4 1146 1210 INTEGER,POINTER :: src_value(:) 1147 INTEGER :: offset,msize, rank1211 INTEGER :: offset,msize,moffset,rank 1148 1212 INTEGER :: lbegin, lend 1149 1213 INTEGER :: max_req … … 1152 1216 1153 1217 CALL enter_profile(id_mpi) 1218 1219 !Prepare 'message' for on-device copies if field is on device 1220 if( field(1)%ondevice .AND. .NOT. message%ondevice ) call update_device_message_mpi(message) 1154 1221 1155 1222 !$OMP BARRIER … … 1195 1262 buffer_r=>message%buffers(ireq)%r 1196 1263 offset=send%offset 1197 1198 DO n=1,send%size 1264 msize=send%size 1265 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1266 DO n=1,msize 1199 1267 buffer_r(offset+n)=rval2d(value(n)) 1200 1268 ENDDO 1201 1269 1202 1270 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1271 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1203 1272 !$OMP CRITICAL 1204 1273 CALL MPI_ISEND(buffer_r,send%size,MPI_REAL8,send%rank, & … … 1206 1275 !$OMP END CRITICAL 1207 1276 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1277 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1208 1278 CALL MPI_ISEND(buffer_r,send%size,MPI_REAL8,send%rank, & 1209 1279 send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1228 1298 src_rval2d=>field(recv%domain)%rval2d 1229 1299 sgn=>recv%sign 1230 1231 DO n=1,recv%size 1300 msize=recv%size 1301 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1302 DO n=1,msize 1232 1303 rval2d(value(n))=src_rval2d(src_value(n))*sgn(n) 1233 1304 ENDDO … … 1239 1310 buffer_r=>message%buffers(ireq)%r 1240 1311 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1312 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1241 1313 !$OMP CRITICAL 1242 1314 CALL MPI_IRECV(buffer_r,recv%size,MPI_REAL8,recv%rank, & … … 1244 1316 !$OMP END CRITICAL 1245 1317 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1318 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1246 1319 CALL MPI_IRECV(buffer_r,recv%size,MPI_REAL8,recv%rank, & 1247 1320 recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1278 1351 buffer_r=>message%buffers(ireq)%r 1279 1352 1280 offset=send%offset*dim3 + (lbegin-1)*send%size1281 1353 msize=send%size 1354 moffset=send%offset 1282 1355 CALL trace_start("copy_to_buffer") 1283 1356 1357 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1284 1358 DO d3=lbegin,lend 1285 DO n=1,send%size 1359 offset=moffset*dim3 + (d3-1)*msize 1360 !$acc loop 1361 DO n=1,msize 1286 1362 buffer_r(n+offset)=rval3d(value(n),d3) 1287 1363 ENDDO 1288 offset=offset+send%size1289 1364 ENDDO 1290 1365 CALL trace_end("copy_to_buffer") … … 1296 1371 IF (is_omp_level_master) THEN 1297 1372 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1373 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1298 1374 !$OMP CRITICAL 1299 1375 CALL MPI_ISEND(buffer_r,send%size*dim3,MPI_REAL8,send%rank, & … … 1301 1377 !$OMP END CRITICAL 1302 1378 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1379 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1303 1380 CALL MPI_ISEND(buffer_r,send%size*dim3,MPI_REAL8,send%rank, & 1304 1381 send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1335 1412 src_rval3d=>field(recv%domain)%rval3d 1336 1413 sgn=>recv%sign 1414 msize=recv%size 1337 1415 1338 1416 CALL trace_start("copy_data") 1339 1417 !$acc parallel loop collapse(2) default(present) async if (field(ind)%ondevice) 1340 1418 DO d3=lbegin,lend 1341 DO n=1, recv%size1419 DO n=1,msize 1342 1420 rval3d(value(n),d3)=src_rval3d(src_value(n),d3)*sgn(n) 1343 1421 ENDDO … … 1351 1429 IF (is_omp_level_master) THEN 1352 1430 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1431 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1353 1432 !$OMP CRITICAL 1354 1433 CALL MPI_IRECV(buffer_r,recv%size*dim3,MPI_REAL8,recv%rank, & … … 1356 1435 !$OMP END CRITICAL 1357 1436 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1437 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1358 1438 CALL MPI_IRECV(buffer_r,recv%size*dim3,MPI_REAL8,recv%rank, & 1359 1439 recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1390 1470 ireq=send%ireq 1391 1471 buffer_r=>message%buffers(ireq)%r 1472 msize=send%size 1473 moffset=send%offset 1392 1474 1393 1475 CALL trace_start("copy_to_buffer") 1476 !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) 1394 1477 DO d4=1,dim4 1395 offset=send%offset*dim3*dim4 + dim3*send%size*(d4-1) + &1396 (lbegin-1)*send%size1397 1398 1478 DO d3=lbegin,lend 1399 DO n=1,send%size 1479 offset=moffset*dim3*dim4 + dim3*msize*(d4-1) + & 1480 (d3-1)*msize 1481 !$acc loop 1482 DO n=1,msize 1400 1483 buffer_r(n+offset)=rval4d(value(n),d3,d4) 1401 1484 ENDDO 1402 offset=offset+send%size1403 1485 ENDDO 1404 1486 ENDDO … … 1411 1493 IF (is_omp_level_master) THEN 1412 1494 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1495 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1413 1496 !$OMP CRITICAL 1414 1497 CALL MPI_ISEND(buffer_r,send%size*dim3*dim4,MPI_REAL8,send%rank, & … … 1416 1499 !$OMP END CRITICAL 1417 1500 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1501 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1418 1502 CALL MPI_ISEND(buffer_r,send%size*dim3*dim4,MPI_REAL8,send%rank, & 1419 1503 send%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1451 1535 src_rval4d=>field(recv%domain)%rval4d 1452 1536 sgn=>recv%sign 1453 1537 msize=recv%size 1454 1538 CALL trace_start("copy_data") 1539 1540 !$acc parallel loop collapse(3) default(present) async if (field(ind)%ondevice) 1455 1541 DO d4=1,dim4 1456 1542 DO d3=lbegin,lend 1457 DO n=1, recv%size1543 DO n=1,msize 1458 1544 rval4d(value(n),d3,d4)=src_rval4d(src_value(n),d3,d4)*sgn(n) 1459 1545 ENDDO … … 1469 1555 IF (is_omp_level_master) THEN 1470 1556 IF (mpi_threading_mode==MPI_THREAD_SERIALIZED) THEN 1557 CALL abort_acc("mpi_threading_mode==MPI_THREAD_SERIALIZED") 1471 1558 !$OMP CRITICAL 1472 1559 CALL MPI_IRECV(buffer_r,recv%size*dim3*dim4,MPI_REAL8,recv%rank, & … … 1474 1561 !$OMP END CRITICAL 1475 1562 ELSE IF (mpi_threading_mode==MPI_THREAD_MULTIPLE) THEN 1563 CALL abort_acc("mpi_threading_mode==MPI_THREAD_MULTIPLE") 1476 1564 CALL MPI_IRECV(buffer_r,recv%size*dim3*dim4,MPI_REAL8,recv%rank, & 1477 1565 recv%tag+1000*message%number,comm_icosa, message%mpi_req(ireq),ierr) … … 1485 1573 1486 1574 IF (mpi_threading_mode==MPI_THREAD_FUNNELED .OR. mpi_threading_mode==MPI_THREAD_SINGLE) THEN 1575 !$acc wait 1487 1576 !$OMP BARRIER 1488 1577 !$OMP MASTER … … 1492 1581 msize=message%buffers(ireq)%size 1493 1582 rank=message%buffers(ireq)%rank 1494 CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1495 comm_icosa, message%mpi_req(ireq),ierr) 1583 ! Using the "if" clause on the "host_data" directive seems to cause a crash at compilation 1584 IF (message%ondevice) THEN 1585 !$acc host_data use_device(buffer_r) ! if (message%ondevice) 1586 CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1587 comm_icosa, message%mpi_req(ireq),ierr) 1588 !$acc end host_data 1589 ELSE 1590 CALL MPI_ISEND(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1591 comm_icosa, message%mpi_req(ireq),ierr) 1592 ENDIF 1496 1593 ENDDO 1497 1594 … … 1500 1597 msize=message%buffers(ireq)%size 1501 1598 rank=message%buffers(ireq)%rank 1502 CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1503 comm_icosa, message%mpi_req(ireq),ierr) 1599 ! Using the "if" clause on the "host_data" directive seems to cause a crash at compilation 1600 IF (message%ondevice) THEN 1601 !$acc host_data use_device(buffer_r) ! if (message%ondevice) 1602 CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1603 comm_icosa, message%mpi_req(ireq),ierr) 1604 !$acc end host_data 1605 ELSE 1606 CALL MPI_IRECV(buffer_r,msize,MPI_REAL8,rank,1000*message%number, & 1607 comm_icosa, message%mpi_req(ireq),ierr) 1608 ENDIF 1504 1609 ENDDO 1505 1610 … … 1552 1657 INTEGER :: ind,n 1553 1658 INTEGER :: dim3,dim4,d3,d4,lbegin,lend 1554 INTEGER :: offset 1659 INTEGER :: offset, msize, moffset 1555 1660 1556 1661 message%open=.FALSE. … … 1586 1691 sgn=>recv%sign 1587 1692 offset=recv%offset 1588 DO n=1,recv%size 1693 msize=recv%size 1694 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1695 DO n=1,msize 1589 1696 rval2d(value(n))=buffer_r(n+offset)*sgn(n) 1590 1697 ENDDO … … 1621 1728 1622 1729 CALL distrib_level(1,dim3, lbegin,lend) 1623 offset=recv%offset*dim3 + (lbegin-1)*recv%size 1730 msize=recv%size 1731 moffset=recv%offset 1624 1732 CALL trace_start("copy_from_buffer") 1625 1733 1626 1734 IF (req%vector) THEN 1735 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1627 1736 DO d3=lbegin,lend 1628 DO n=1,recv%size 1737 offset=moffset*dim3 + (d3-1)*msize 1738 !$acc loop 1739 DO n=1,msize 1629 1740 rval3d(value(n),d3)=buffer_r(n+offset)*sgn(n) 1630 1741 ENDDO 1631 offset=offset+recv%size1632 1742 ENDDO 1633 1743 ELSE 1744 !$acc parallel loop default(present) async if (field(ind)%ondevice) 1634 1745 DO d3=lbegin,lend 1635 DO n=1,recv%size 1746 offset=moffset*dim3 + (d3-1)*msize 1747 !$acc loop 1748 DO n=1,msize 1636 1749 rval3d(value(n),d3)=buffer_r(n+offset) 1637 1750 ENDDO 1638 offset=offset+recv%size1639 1751 ENDDO 1640 1752 ENDIF … … 1670 1782 CALL distrib_level(1,dim3, lbegin,lend) 1671 1783 dim4=size(rval4d,3) 1784 msize=recv%size 1785 moffset=recv%offset 1672 1786 CALL trace_start("copy_from_buffer") 1787 !$acc parallel loop default(present) collapse(2) async if (field(ind)%ondevice) 1673 1788 DO d4=1,dim4 1674 offset=recv%offset*dim3*dim4 + dim3*recv%size*(d4-1) + &1675 (lbegin-1)*recv%size1676 1789 DO d3=lbegin,lend 1677 DO n=1,recv%size 1790 offset=moffset*dim3*dim4 + dim3*msize*(d4-1) + & 1791 (d3-1)*msize 1792 !$acc loop 1793 DO n=1,msize 1678 1794 rval4d(value(n),d3,d4)=buffer_r(n+offset)*sgn(n) 1679 1795 ENDDO 1680 offset=offset+recv%size1681 1796 ENDDO 1682 1797 ENDDO … … 1697 1812 ! CALL trace_end("wait_message_mpi") 1698 1813 !$OMP BARRIER 1699 1814 1700 1815 CALL exit_profile(id_mpi) 1701 1816
Note: See TracChangeset
for help on using the changeset viewer.