Skip to content

Commit 3d4f748

Browse files
authored
Add timing information
Transform to be similar to C++ USM demonstration code
1 parent b86bc90 commit 3d4f748

File tree

1 file changed

+41
-15
lines changed
  • DirectProgramming/Fortran/Jupyter/OpenMP-offload-training/USM

1 file changed

+41
-15
lines changed

DirectProgramming/Fortran/Jupyter/OpenMP-offload-training/USM/main.f90

+41-15
Original file line numberDiff line numberDiff line change
@@ -5,35 +5,61 @@
55
! =============================================================
66
program main
77
use omp_lib
8+
implicit none
89
integer, parameter :: N=16
10+
integer :: correct_count=0
911
integer :: i
10-
integer, allocatable :: x(:)
11-
logical :: is_cpu = .true.
12+
integer, allocatable :: x(:), y(:)
13+
double precision :: te, tb
1214

1315
!$omp allocate allocator(omp_target_shared_mem_alloc)
14-
allocate(x(N))
16+
allocate(x(N),y(N))
17+
18+
print *,'Number of OpenMP Devices ',omp_get_num_devices()
19+
20+
tb = omp_get_wtime()
21+
22+
do i=1,N
23+
x(i) = 1
24+
end do
1525

1626
do i=1,N
17-
x(i) = i
18-
end do
27+
y(i) = 1
28+
end do
1929

2030
!$omp target map(tofrom: is_cpu) has_device_addr(x)
31+
!$omp target map(tofrom: is_cpu) has_device_addr(y)
2132
!$omp teams distribute parallel do
2233
do i=1,N
23-
if ((i==1) .and. (.not.(omp_is_initial_device()))) is_cpu=.false.
24-
x(i) = x(i) * 2
34+
x(i) = x(i) + y(i)
2535
end do
2636
!$omp end target
37+
38+
do i=1,N
39+
y(i) = 2
40+
end do
2741

28-
if (is_cpu) then
29-
print *, "Running on CPU"
30-
else
31-
print *, "Running on GPU"
32-
end if
33-
42+
!$omp target map(tofrom: is_cpu) has_device_addr(y)
43+
!$omp teams distribute parallel do
3444
do i=1,N
35-
print *, x(i)
45+
x(i) = x(i) + y(i)
3646
end do
47+
!$omp end target
48+
49+
te = omp_get_wtime()
50+
print *,'Time of kernel ',te-tb,' seconds'
51+
52+
do i=1,N
53+
if (x(i)==4) then
54+
correct_count = correct_count + 1
55+
end if
56+
end do
57+
58+
if (correct_count==N) then
59+
print *, 'Test: PASSED'
60+
else
61+
print *, 'Test: Failed'
62+
endif
3763

38-
deallocate(x)
64+
deallocate(x,y)
3965
end program main

0 commit comments

Comments
 (0)