| program main |
| use iso_c_binding |
| use omp_lib |
| implicit none (type, external) |
| |
| integer(c_size_t), parameter :: sizeof_int = 4 |
| integer, parameter :: sk = c_size_t |
| logical, allocatable :: isshared(:) |
| integer, allocatable :: maxdim(:,:) |
| integer :: ndev |
| |
| ndev = omp_get_num_devices() |
| call init_isshared |
| call init_maxdim |
| |
| call one |
| call two |
| call three |
| call four |
| |
| deallocate(isshared, maxdim) |
| contains |
| |
| subroutine init_maxdim |
| integer :: dev, dev2, r |
| integer(c_size_t), parameter :: nl = 0 |
| |
| allocate(maxdim(0:ndev,0:ndev)) |
| do dev = 0, ndev |
| do dev2 = 0, ndev |
| r = omp_target_memcpy_rect (c_null_ptr, c_null_ptr, nl, & |
| num_dims=1_c_int, volume=[nl], & |
| dst_offsets=[nl], src_offsets=[nl], & |
| dst_dimensions=[nl], src_dimensions=[nl], & |
| dst_device_num=dev, src_device_num=omp_initial_device) |
| if (r < 3) stop 1 ! OpenMP requirement |
| if (r < huge(0_c_int)) stop 2 ! GCC implementation |
| maxdim(dev2,dev) = r |
| end do |
| end do |
| end subroutine |
| |
| subroutine init_isshared |
| integer :: dev |
| logical :: dev_isshared |
| |
| allocate(isshared(0:ndev)) |
| do dev = 0, ndev |
| dev_isshared = .false. |
| !$omp target device(dev) map(to: dev_isshared) |
| dev_isshared = .true. |
| !$omp end target |
| isshared(dev) = dev_isshared |
| end do |
| end subroutine |
| |
| |
| subroutine one |
| integer(c_size_t), parameter :: N1 = 30 |
| integer, target :: host_data(N1) |
| type(c_ptr) :: dev_cptr(0:ndev), cptr, tmp_cptr |
| integer :: dev, dev2, i, r |
| |
| do dev = 0, ndev |
| dev_cptr(dev) = omp_target_alloc (N1*sizeof_int, dev) |
| if (.not. c_associated (dev_cptr(dev))) stop 11 |
| end do |
| |
| do i = 1, N1 |
| host_data(i) = i |
| end do |
| |
| ! copy full array host -> all devices + check value + set per-device value |
| do dev = 0, ndev |
| r = omp_target_memcpy_rect (dev_cptr(dev), c_loc(host_data), sizeof_int, & |
| num_dims=1_c_int, volume=[N1], & |
| dst_offsets=[0_sk], src_offsets=[0_sk], & |
| dst_dimensions=[N1], src_dimensions=[N1], & |
| dst_device_num=dev, src_device_num=omp_initial_device) |
| if (r /= 0) stop 12 |
| cptr = dev_cptr(dev) |
| !$omp target device(dev) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:) |
| call c_f_pointer(cptr, fptr, [N1]) |
| do i = 1, N1 |
| if (fptr(i) /= i) stop 13 |
| fptr(i) = i*100 + 10000 * (dev+3) |
| end do |
| end block |
| end do |
| |
| ! Test strided data - forth and back - same array sizes |
| do dev = 0, ndev |
| do dev2 = 0, ndev |
| tmp_cptr = omp_target_alloc (N1*sizeof_int, dev) |
| if (.not. c_associated (tmp_cptr)) stop 14 |
| |
| !$omp target device(dev) is_device_ptr(tmp_cptr) |
| block |
| integer, pointer, contiguous :: fptr(:) |
| call c_f_pointer(tmp_cptr, fptr, [N1]) |
| do i = 1, N1 |
| fptr(i) = i*100 + 10000*(dev+1) |
| end do |
| end block |
| |
| if (N1-17 > N1 - max(12,13)) stop 18 |
| r = omp_target_memcpy_rect (dev_cptr(dev2), tmp_cptr, sizeof_int, & |
| num_dims=1_c_int, volume=[N1-17], & |
| dst_offsets=[12_sk], src_offsets=[13_sk], & |
| dst_dimensions=[N1], src_dimensions=[N1], & |
| dst_device_num=dev2, src_device_num=dev) |
| if (r /= 0) stop 15 |
| |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| logical :: checked(N1) |
| integer, pointer, contiguous :: fptr(:) |
| call c_f_pointer(cptr, fptr, [N1]) |
| checked = .false. |
| do i = 1, N1-17 |
| if (fptr(i+12) /= (i+13)*100 + 10000 * (dev+1)) stop 16 |
| checked(i+12) = .true. |
| end do |
| ! original device value |
| do i = 1, N1 |
| if (.not. checked(i)) then |
| if (fptr(i) /= i*100 + 10000 * (dev2+3)) stop 17 |
| end if |
| end do |
| end block |
| call omp_target_free (tmp_cptr, dev) |
| end do |
| |
| ! reset to original value |
| do dev2 = 0, ndev |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:) |
| call c_f_pointer(cptr, fptr, [N1]) |
| do i = 1, N1 |
| fptr(i) = i*100 + 10000 * (dev2+3) |
| end do |
| end block |
| end do |
| end do |
| |
| do dev = 0, ndev |
| call omp_target_free (dev_cptr(dev), dev) |
| end do |
| end subroutine |
| |
| |
| subroutine two |
| integer(c_size_t), parameter :: N = 10, M = 30 |
| integer, target :: host_data(N,M) |
| type(c_ptr) :: dev_cptr(0:ndev), cptr, tmp_cptr |
| integer :: dev, dev2, i, j, r |
| |
| do dev = 0, ndev |
| dev_cptr(dev) = omp_target_alloc (N*M*sizeof_int, dev) |
| if (.not. c_associated (dev_cptr(dev))) stop 21 |
| end do |
| |
| do i = 1, M |
| do j = 1, N |
| host_data(j,i) = i*100 + j |
| end do |
| end do |
| |
| ! copy full array host -> all devices + check value + set per-device value |
| do dev = 0, ndev |
| r = omp_target_memcpy_rect (dev_cptr(dev), c_loc(host_data), sizeof_int, & |
| num_dims=2_c_int, volume=[M, N], & |
| dst_offsets=[0_sk, 0_sk], src_offsets=[0_sk, 0_sk], & |
| dst_dimensions=[M, N], src_dimensions=[M,N], & |
| dst_device_num=dev, src_device_num=omp_initial_device) |
| if (r /= 0) stop 22 |
| cptr = dev_cptr(dev) |
| !$omp target device(dev) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:) |
| call c_f_pointer(cptr, fptr, [N,M]) |
| do i = 1, M |
| do j = 1, N |
| if (fptr(j,i) /= i*100 + j) stop 23 |
| fptr(j,i) = i*100 + j + 1000 * dev |
| end do |
| end do |
| end block |
| end do |
| |
| ! Test strided data - forth and back - same array sizes |
| do dev = 0, ndev |
| do dev2 = 0, ndev |
| tmp_cptr = omp_target_alloc (N*M*sizeof_int, dev) |
| if (.not. c_associated (tmp_cptr)) stop 24 |
| |
| !$omp target device(dev) is_device_ptr(tmp_cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:) |
| call c_f_pointer(tmp_cptr, fptr, [N,M]) |
| do i = 1, M |
| do j = 1, N |
| fptr(j,i) = i*100 + j + 100000 * (dev+1) |
| end do |
| end do |
| end block |
| |
| if (M-14 > M - max(5,2) & |
| .or. N-3 > N - max(2,1)) stop 28 |
| r = omp_target_memcpy_rect (dev_cptr(dev2), tmp_cptr, sizeof_int, & |
| num_dims=2_c_int, volume=[M-14, N-3], & |
| dst_offsets=[5_sk, 3_sk], src_offsets=[2_sk, 1_sk], & |
| dst_dimensions=[M, N], src_dimensions=[M,N], & |
| dst_device_num=dev2, src_device_num=dev) |
| if (r /= 0) stop 25 |
| |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| logical :: checked(N,M) |
| integer, pointer, contiguous :: fptr(:,:) |
| call c_f_pointer(cptr, fptr, [N,M]) |
| checked = .false. |
| do i = 1, M-14 |
| do j = 1, N-3 |
| if (fptr(j+3, i+5) /= (i+2)*100 + (j+1) + 100000 * (dev+1)) stop 26 |
| checked(j+3, i+5) = .true. |
| end do |
| end do |
| ! original device value |
| do i = 1, M |
| do j = 1, N |
| if (.not. checked(j,i)) then |
| if (fptr(j,i) /= i*100 + j + 1000 * dev2) stop 27 |
| end if |
| end do |
| end do |
| end block |
| call omp_target_free (tmp_cptr, dev) |
| end do |
| |
| ! reset to original value |
| do dev2 = 0, ndev |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:) |
| call c_f_pointer(cptr, fptr, [N,M]) |
| do i = 1, M |
| do j = 1, N |
| fptr(j,i) = i*100 + j + 1000 * dev2 |
| end do |
| end do |
| end block |
| end do |
| end do |
| |
| do dev = 0, ndev |
| call omp_target_free (dev_cptr(dev), dev) |
| end do |
| end subroutine |
| |
| |
| subroutine three |
| integer(c_size_t), parameter :: N1 = 10, N2 = 30, N3 = 15 |
| integer, target :: host_data(N3,N2,N1) |
| type(c_ptr) :: dev_cptr(0:ndev), cptr, tmp_cptr |
| integer :: dev, dev2, i, j, k, r |
| |
| do dev = 0, ndev |
| dev_cptr(dev) = omp_target_alloc (N1*N2*N3*sizeof_int, dev) |
| if (.not. c_associated (dev_cptr(dev))) stop 31 |
| end do |
| |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| host_data(k, j,i) = i*1000 + 100*j + k |
| end do |
| end do |
| end do |
| |
| ! copy full array host -> all devices + check value + set per-device value |
| do dev = 0, ndev |
| r = omp_target_memcpy_rect (dev_cptr(dev), c_loc(host_data), sizeof_int, & |
| num_dims=3_c_int, volume=[N1, N2, N3], & |
| dst_offsets=[0_sk, 0_sk, 0_sk], src_offsets=[0_sk, 0_sk, 0_sk], & |
| dst_dimensions=[N1, N2, N3], src_dimensions=[N1, N2, N3], & |
| dst_device_num=dev, src_device_num=omp_initial_device) |
| if (r /= 0) stop 32 |
| cptr = dev_cptr(dev) |
| !$omp target device(dev) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:,:) |
| call c_f_pointer(cptr, fptr, [N3,N2,N1]) |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| if (fptr(k, j,i) /= i*1000 + 100*j + k) stop 33 |
| fptr(k,j,i) = i*1000 + 100*j + k + 1000 * dev |
| end do |
| end do |
| end do |
| end block |
| end do |
| |
| ! Test strided data - forth and back - same array sizes |
| do dev = 0, ndev |
| do dev2 = 0, ndev |
| tmp_cptr = omp_target_alloc (N1*N2*N3*sizeof_int, dev) |
| if (.not. c_associated (tmp_cptr)) stop 34 |
| |
| !$omp target device(dev) is_device_ptr(tmp_cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:,:) |
| call c_f_pointer(tmp_cptr, fptr, [N3,N2,N1]) |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| fptr(k,j,i) = i*1000 + 100*j + k + 100000 * (dev+1) |
| end do |
| end do |
| end do |
| end block |
| |
| if (N1-5 > N1 - max(5,2) & |
| .or. N2-13 > N2 - max(3,1) & |
| .or. N3-5 > N3 - max(2,4)) stop 38 |
| r = omp_target_memcpy_rect (dev_cptr(dev2), tmp_cptr, sizeof_int, & |
| num_dims=3_c_int, volume=[N1-5, N2-13,N3-5], & |
| dst_offsets=[5_sk, 3_sk,2_sk], src_offsets=[2_sk, 1_sk,4_sk], & |
| dst_dimensions=[N1,N2,N3], src_dimensions=[N1,N2,N3], & |
| dst_device_num=dev2, src_device_num=dev) |
| if (r /= 0) stop 35 |
| |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| logical :: checked(N3,N2,N1) |
| integer, pointer, contiguous :: fptr(:,:,:) |
| call c_f_pointer(cptr, fptr, [N3,N2,N1]) |
| checked = .false. |
| do i = 1, N1-5 |
| do j = 1, N2-13 |
| do k = 1, N3-5 |
| if (fptr(k+2, j+3, i+5) /= (i+2)*1000 + 100*(j+1) + (k+4) + 100000 * (dev+1)) stop 36 |
| checked(k+2, j+3, i+5) = .true. |
| end do |
| end do |
| end do |
| ! original device value |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| if (.not. checked(k,j,i)) then |
| if (fptr(k,j,i) /= i*1000 + 100*j + k + 1000 * dev2) stop 37 |
| end if |
| end do |
| end do |
| end do |
| end block |
| call omp_target_free (tmp_cptr, dev) |
| end do |
| |
| ! reset to original value |
| do dev2 = 0, ndev |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:,:) |
| call c_f_pointer(cptr, fptr, [N3,N2,N1]) |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| fptr(k,j,i) = i*1000 + 100*j + k + 1000 * dev2 |
| end do |
| end do |
| end do |
| end block |
| end do |
| end do |
| |
| do dev = 0, ndev |
| call omp_target_free (dev_cptr(dev), dev) |
| end do |
| end subroutine |
| |
| |
| subroutine four |
| integer(c_size_t), parameter :: N1 = 10, N2 = 30, N3 = 15, N4 = 25 |
| integer, target :: host_data(N4, N3,N2,N1) |
| type(c_ptr) :: dev_cptr(0:ndev), cptr, tmp_cptr |
| integer :: dev, dev2, i, j, k, ll, r |
| |
| do dev = 0, ndev |
| dev_cptr(dev) = omp_target_alloc (N1*N2*N3*N4*sizeof_int, dev) |
| if (.not. c_associated (dev_cptr(dev))) stop 41 |
| end do |
| |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| do ll = 1, N4 |
| host_data(ll, k, j,i) = i*1000 + 100*j + k*10 + ll |
| end do |
| end do |
| end do |
| end do |
| |
| ! copy full array host -> all devices + check value + set per-device value |
| do dev = 0, ndev |
| r = omp_target_memcpy_rect (dev_cptr(dev), c_loc(host_data), sizeof_int, & |
| num_dims=4_c_int, volume=[N1, N2, N3, N4], & |
| dst_offsets=[0_sk, 0_sk, 0_sk, 0_sk], src_offsets=[0_sk, 0_sk, 0_sk, 0_sk], & |
| dst_dimensions=[N1, N2, N3, N4], src_dimensions=[N1, N2, N3, N4], & |
| dst_device_num=dev, src_device_num=omp_initial_device) |
| if (r /= 0) stop 42 |
| cptr = dev_cptr(dev) |
| !$omp target device(dev) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:,:,:) |
| call c_f_pointer(cptr, fptr, [N4,N3,N2,N1]) |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| do ll = 1, N4 |
| if (fptr(ll, k, j,i) /= i*1000 + 100*j + k*10 + ll) stop 43 |
| fptr(ll,k,j,i) = i*1000 + 100*j + k*10 + ll + 1000 * dev |
| end do |
| end do |
| end do |
| end do |
| end block |
| end do |
| |
| ! Test strided data - forth and back - same array sizes |
| do dev = 0, ndev |
| do dev2 = 0, ndev |
| tmp_cptr = omp_target_alloc (N1*N2*N3*N4*sizeof_int, dev) |
| if (.not. c_associated (tmp_cptr)) stop 44 |
| |
| !$omp target device(dev) is_device_ptr(tmp_cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:,:,:) |
| call c_f_pointer(tmp_cptr, fptr, [N4,N3,N2,N1]) |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| do ll = 1, N4 |
| fptr(ll,k,j,i) = i*1000 + 100*j + k*10 + ll + 100000 * (dev+1) |
| end do |
| end do |
| end do |
| end do |
| end block |
| |
| if (N1-5 > N1 - max(5,2) & |
| .or. N2-13 > N2 - max(3,1) & |
| .or. N3-5 > N3 - max(2,4) & |
| .or. N4-11 > N4 - max(7,5)) stop 48 |
| r = omp_target_memcpy_rect (dev_cptr(dev2), tmp_cptr, sizeof_int, & |
| num_dims=4_c_int, volume=[N1-5, N2-13,N3-5,N4-11], & |
| dst_offsets=[5_sk, 3_sk,2_sk,7_sk], src_offsets=[2_sk, 1_sk,4_sk,5_sk], & |
| dst_dimensions=[N1,N2,N3,N4], src_dimensions=[N1,N2,N3,N4], & |
| dst_device_num=dev2, src_device_num=dev) |
| if (r /= 0) stop 45 |
| |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| logical, allocatable :: checked(:,:,:,:) ! allocatble to reduce stack size |
| integer, pointer, contiguous :: fptr(:,:,:,:) |
| call c_f_pointer(cptr, fptr, [N4,N3,N2,N1]) |
| allocate (checked(N4,N3,N2,N1), source=.false.) |
| do i = 1, N1-5 |
| do j = 1, N2-13 |
| do k = 1, N3-5 |
| do ll = 1, N4-11 |
| if (fptr(ll+7, k+2, j+3, i+5) /= (i+2)*1000 + 100*(j+1) + (k+4)*10 + ll+5 + 100000 * (dev+1)) stop 46 |
| checked(ll+7, k+2, j+3, i+5) = .true. |
| end do |
| end do |
| end do |
| end do |
| ! original device value |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| do ll = 1, N4 |
| if (.not. checked(ll,k,j,i)) then |
| if (fptr(ll,k,j,i) /= i*1000 + 100*j + k*10 + ll + 1000 * dev2) stop 47 |
| end if |
| end do |
| end do |
| end do |
| end do |
| deallocate (checked) |
| end block |
| call omp_target_free (tmp_cptr, dev) |
| end do |
| |
| ! reset to original value |
| do dev2 = 0, ndev |
| cptr = dev_cptr(dev2) |
| !$omp target device(dev2) is_device_ptr(cptr) |
| block |
| integer, pointer, contiguous :: fptr(:,:,:,:) |
| call c_f_pointer(cptr, fptr, [N4,N3,N2,N1]) |
| do i = 1, N1 |
| do j = 1, N2 |
| do k = 1, N3 |
| do ll = 1, N4 |
| fptr(ll,k,j,i) = i*1000 + 100*j + k*10 + ll + 1000 * dev2 |
| end do |
| end do |
| end do |
| end do |
| end block |
| end do |
| end do |
| |
| do dev = 0, ndev |
| call omp_target_free (dev_cptr(dev), dev) |
| end do |
| end subroutine |
| end program |