Parallel Programming Services logo


Sample Code & Cluster User's Guide

Below is the code for a Sample OpenMP Program.

Sample OpenMP Program


         program stress_test
!=======================================================
!  Simple contrived program to see how systems perform
!  repeated OpenMP multi-threaded tests.
!
!  This test repeatedly "smooths" or "filters" a 3-d
!  (gridded) array, starting either from a random or
!  deterministic data array.
! 
!  This makes a good system "stress test", since the
!  arrays can be as larger or small as required, and
!  the job can be run for as long or as short as
!  required (by varying the number of filtering passes).
!  The 7-point filter around the centre-point in a 3-d
!  lattice structure poses a performance challenge for
!  scalar processors!
!
!  This version does direct "brute force" treatment of
!  domain faces, edges and corners.
!
!  The program reads 8 input variables, from 6 lines
!  of standard input, or redirected from a file. 
!  See "program parameters" below.
!=======================================================
       implicit none
       integer nx,ny,nz, i,j,k, iseed, icount, maxcount
       integer iofreq
!
       real xran1, twopi,xdist,ydist,zdist
       real, allocatable :: arr_in(:,:,:), arr_out(:,:,:)
       real valmax, wght, rwght3,rwght4,rwght5,rwght6
!
!
!==========================================================
!      Some program parameters:
!   (These may be read from a file, redirected from stdin).
!==========================================================
      read(*,*) maxcount   ! Max. number of filter iterations (problem length)
      read(*,*) nx,ny,nz   ! 3-d grid dimensions (determines problem size)
      read(*,*) iofreq     ! No. of iterations between writing output
      read(*,*) wght       ! Filter weights (coefficients)
      read(*,*) valmax     ! Max. value of 3-d field (for scaling only)
      read(*,*) iseed      ! Zero for deterministic array; otherwise random seed
!
!  Some suggested values:
!      100                 ! maxcount, number of filter iterations
!      400, 400, 800       ! nx,ny,nz values, to use 1 GB memory altogether.
!      10                  ! iofreq, i.e, output every 10 iterations
!      0.05                ! wght, for relatively "light" filtering
!      100.                ! valmax, just for order-1 output values
!      0                   ! iseed, zero for deterministic array, best for 
						    checking correctness
!
        rwght3 = 1.0 - 3.0*wght  ! Weight factors derived from fundamental weight
        rwght4 = 1.0 - 4.0*wght
        rwght5 = 1.0 - 5.0*wght
        rwght6 = 1.0 - 6.0*wght
!
      allocate (arr_in(nx,ny,nz))
      allocate (arr_out(nx,ny,nz))
!
      print *, 'Maxcount:        ',maxcount
      print *, 'nx,ny,nz:        ',nx,ny,nz
      print *, 'Output interval: ',iofreq
      print *, 'Weight (wght):   ',wght
      print *, 'Value range:     ',valmax
      print *, 'Random seed:     ',iseed
      if(iseed.eq.0) then
         print *, 'Using deterministic initialization'
      else
         print *, 'Using random initialization'
      endif
      print *, ' '
!
!=====================================================
!  Initialize the main data array,
!  either deterministically, or with random numbers:
!=====================================================
      if(iseed.eq.0) then
        twopi = 8.0*atan(1.0)
        do k=1,nz
          zdist=twopi*float(k-1)/float(nz-1)
          do j=1,ny
            ydist=twopi*float(j-1)/float(ny-1)
            do i=1,nx
              xdist=twopi*float(i-1)/float(nx-1)
        arr_in(i,j,k) = valmax*Cos(7.*xdist)*cos(13.*ydist)*
     &              cos(11.*zdist)
            enddo
          enddo
        enddo
!
      else
!
       do k=1,nz
         do j=1,ny
           do i=1,nx
             arr_in(i,j,k) = valmax*(-1. + 2.0*xran1(iseed))
           enddo
         enddo
       enddo
      endif
!
      write(*,*) 'Initial random values are:'
      write(*,100) (arr_in(2,2,k),k=2,nz-1,nz/7)
      open (unit=12,file='stresstest.dat',form='formatted')
      write(12,*) 'The Parameters of this run are:'
      write(12,*) 'Maxcount:        ',maxcount
      write(12,*) 'nx,ny,nz:        ',nx,ny,nz
      write(12,*) 'Output interval: ',iofreq
      write(12,*) 'Weight (wght):   ',wght
      write(12,*) 'Value range:     ',valmax
      write(12,*) 'Random seed:     ',iseed
      if(iseed.eq.0) then
         write(12,*) 'Using deterministic initialization'
      else
         write(12,*) 'Using random initialization'
      endif
      write(12,*) ' '
      write(12,*) 'Initial random values are (i,j,k=1,10):'
!     write (12,100) (arr_in(i,j,k),i=1,10),j=1,10),k=1,10)
      write (12,100) (arr_in(10,10,k),k=1,10)
      write (12,*) ' '
!
!     Start of loop over smoothing iterations:
      do icount=0,maxcount
!
!
!
!=====================================================
! --- Main smoothing loop:
!=====================================================
! Main body of data:
!$OMP PARALLEL DO PRIVATE(i,j,k)
      do k=2,nz-1
        do j=2,ny-1
          do i=2,nx-1
            arr_out(i,j,k) = rwght6*arr_in(i,j,k) + wght*(
     &         arr_in(i-1,j,k) + arr_in(i+1,j,k) +
     &         arr_in(i,j-1,k) + arr_in(i,j+1,k) +
     &         arr_in(i,j,k-1) + arr_in(i,j,k+1) )
          enddo
        enddo
      enddo
!$OMP END PARALLEL DO
!
! Now for the 6 faces:
!$OMP PARALLEL DO PRIVATE(i,j)
      do j=2,ny-1
        do i=2,nx-1
          arr_out(i,j,1) = rwght5*arr_in(i,j,1) + wght*(
     &         arr_in(i-1,j,1) + arr_in(i+1,j,1) +
     &         arr_in(i,j-1,1) + arr_in(i,j+1,1) +
     &         arr_in(i,j,2) )
          arr_out(i,j,nz) = rwght5*arr_in(i,j,nz) + wght*(
     &         arr_in(i-1,j,nz) + arr_in(i+1,j,nz) +
     &         arr_in(i,j-1,nz) + arr_in(i,j+1,nz) +
     &         arr_in(i,j,nz-1) )
        enddo
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(i,k)
      do k=2,nz-1
        do i=2,nx-1
          arr_out(i,1,k) = rwght5*arr_in(i,1,k) + wght*(
     &         arr_in(i-1,1,k) + arr_in(i+1,1,k) +
     &         arr_in(i,1,k-1) + arr_in(i,1,k+1) +
     &         arr_in(i,2,k) )
          arr_out(i,ny,k) = rwght5*arr_in(i,ny,k) + wght*(
     &         arr_in(i-1,ny,k) + arr_in(i+1,ny,k) +
     &         arr_in(i,ny,k-1) + arr_in(i,ny,k+1) +
     &         arr_in(i,ny-1,k) )
        enddo
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(j,k)
      do k=2,nz-1
        do j=2,ny-1
          arr_out(1,j,k) = rwght5*arr_in(1,j,k) + wght*(
     &         arr_in(1,j-1,k) + arr_in(1,j+1,k) +
     &         arr_in(1,j,k-1) + arr_in(1,j,k+1) +
     &         arr_in(2,j,k) )
          arr_out(nx,j,k) = rwght5*arr_in(nx,j,k) + wght*(
     &         arr_in(nx,j-1,k) + arr_in(nx,j+1,k) +
     &         arr_in(nx,j,k-1) + arr_in(nx,j,k+1) +
     &         arr_in(nx-1,j,k) )
        enddo
      enddo
!$OMP END PARALLEL DO
!
! - Now 12 edges:
!$OMP PARALLEL DO PRIVATE(i)
      do i=2,nx-1
         arr_out(i,1,1) = rwght4*arr_in(i,1,1) + wght*(
     &      arr_in(i-1,1,1) + arr_in(i+1,1,1) +
     &      arr_in(i,1,2)   + arr_in(i,2,1) )
         arr_out(i,ny,1) = rwght4*arr_in(nx,ny,1) + wght*(
     &      arr_in(i-1,ny,1) + arr_in(i+1,ny,1) +
     &      arr_in(i,ny-1,1) + arr_in(i,ny,2) )
         arr_out(i,1,nz) = rwght4*arr_in(i,1,nz) + wght*(
     &      arr_in(i-1,1,nz) + arr_in(i+1,1,nz) +
     &      arr_in(i,2,nz)   + arr_in(i,1,nz-1) )
         arr_out(i,ny,nz) = rwght4*arr_in(i,ny,nz) + wght*(
     &      arr_in(i-1,ny,nz) + arr_in(i+1,ny,nz) +
     &      arr_in(i,ny-1,nz) + arr_in(i,ny,nz-1) )
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(j)
      do j=2,ny-1
         arr_out(1,j,1) = rwght4*arr_in(1,j,1) + wght*(
     &      arr_in(1,j-1,1) + arr_in(1,j+1,1) +
     &      arr_in(1,j,2)   + arr_in(2,j,1) )
         arr_out(nx,j,1) = rwght4*arr_in(nx,j,1) + wght*(
     &      arr_in(nx,j-1,1) + arr_in(nx,j+1,1) +
     &      arr_in(nx-1,j,1) + arr_in(nx,j,2) )
         arr_out(1,j,nz) = rwght4*arr_in(1,j,nz) + wght*(
     &      arr_in(1,j-1,nz) + arr_in(1,j+1,nz) +
     &      arr_in(2,j,nz)   + arr_in(1,j,nz-1) )
         arr_out(nx,j,nz) = rwght4*arr_in(nx,j,nz) + wght*(
     &      arr_in(nx,j-1,nz) + arr_in(nx,j+1,nz) +
     &      arr_in(nx-1,j,nz) + arr_in(nx,j,nz-1) )
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(k)
      do k=2,nz-1
         arr_out(1,1,k) = rwght4*arr_in(1,1,k) + wght*(
     &      arr_in(1,1,k-1) + arr_in(1,1,k+1) +
     &      arr_in(1,2,k)   + arr_in(2,1,k) )
         arr_out(nx,1,k) = rwght4*arr_in(nx,1,k) + wght*(
     &      arr_in(nx,1,k-1) + arr_in(nx,1,k+1) +
     &      arr_in(nx-1,1,k) + arr_in(nx,2,k) )
         arr_out(1,ny,k) = rwght4*arr_in(1,ny,k) + wght*(
     &      arr_in(1,ny,k-1) + arr_in(1,ny,k+1) +
     &      arr_in(2,ny,k)   + arr_in(1,ny-1,k) )
         arr_out(nx,ny,k) = rwght4*arr_in(nx,ny,k) + wght*(
     &      arr_in(nx,ny,k-1) + arr_in(nx,ny,k+1) +
     &      arr_in(nx-1,ny,k) + arr_in(nx,ny-1,k) )
      enddo
!$OMP END PARALLEL DO
!
! Now 8 corner points:
      arr_out(1,1,1) = rwght3*arr_in(1,1,1) + wght*(
     &   arr_in(2,1,1)+arr_in(1,2,1)+arr_in(1,1,2))
      arr_out(nx,1,1) = rwght3*arr_in(nx,1,1) + wght*(
     &   arr_in(nx-1,1,1)+arr_in(nx,2,1)+arr_in(nx,1,2))
      arr_out(1,ny,1) = rwght3*arr_in(1,ny,1) + wght*(
     &   arr_in(2,ny,1)+arr_in(1,ny-1,1)+arr_in(1,ny,2))
      arr_out(1,1,nz) = rwght3*arr_in(1,1,nz) + wght*(
     &   arr_in(2,1,nz)+arr_in(1,2,nz)+arr_in(1,1,nz-1))
      arr_out(nx,ny,1) = rwght3*arr_in(nx,ny,1) + wght*(
     &   arr_in(nx-1,ny,1)+arr_in(nx,ny-1,1)+arr_in(nx,ny,2))
      arr_out(1,ny,nz) = rwght3*arr_in(1,ny,nz) + wght*(
     &   arr_in(2,ny,nz)+arr_in(1,ny-1,nz)+arr_in(1,ny,nz-1))
      arr_out(nx,1,nz) = rwght3*arr_in(nx,1,nz) + wght*(
     &   arr_in(nx-1,1,nz)+arr_in(nx,2,nz)+arr_in(nx,1,nz-1))
      arr_out(nx,ny,nz)=rwght3*arr_in(nx,ny,nz) + wght*(
     &   arr_in(nx-1,ny,nz)+arr_in(nx,ny-1,nz)+arr_in(nx,ny,nz-1))
!
!
!
!--- Need to update arr_in for next iteration:
!    Main body of data:
!$OMP PARALLEL DO PRIVATE(i,j,k)
      do k=2,nz-1
        do j=2,ny-1
          do i=2,nx-1
            arr_in(i,j,k) = rwght6*arr_out(i,j,k) + wght*(
     &         arr_out(i-1,j,k) + arr_out(i+1,j,k) +
     &         arr_out(i,j-1,k) + arr_out(i,j+1,k) +
     &         arr_out(i,j,k-1) + arr_out(i,j,k+1) )
          enddo
        enddo
      enddo
!$OMP END PARALLEL DO
!
!    Next the 6 faces
!$OMP PARALLEL DO PRIVATE(i,j)
      do j=2,ny-1
        do i=2,nx-1
          arr_in(i,j,1) = rwght5*arr_out(i,j,1) + wght*(
     &         arr_out(i-1,j,1) + arr_out(i+1,j,1) +
     &         arr_out(i,j-1,1) + arr_out(i,j+1,1) +
     &         arr_out(i,j,2) )
          arr_in(i,j,nz) = rwght5*arr_out(i,j,nz) + wght*(
     &         arr_out(i-1,j,nz) + arr_out(i+1,j,nz) +
     &         arr_out(i,j-1,nz) + arr_out(i,j+1,nz) +
     &         arr_out(i,j,nz-1) )
        enddo
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(i,k)
      do k=2,nz-1
        do i=2,nx-1
          arr_in(i,1,k) = rwght5*arr_out(i,1,k) + wght*(
     &         arr_out(i-1,1,k) + arr_out(i+1,1,k) +
     &         arr_out(i,1,k-1) + arr_in(i,1,k+1) +
     &         arr_out(i,2,k) )
          arr_in(i,ny,k) = rwght5*arr_out(i,ny,k) + wght*(
     &         arr_out(i-1,ny,k) + arr_out(i+1,ny,k) +
     &         arr_out(i,ny,k-1) + arr_in(i,ny,k+1) +
     &         arr_out(i,ny-1,k) )
        enddo
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(k,j)
      do k=2,nz-1
        do j=2,ny-1
          arr_in(1,j,k) = rwght5*arr_out(1,j,k) + wght*(
     &         arr_out(1,j-1,k) + arr_out(1,j+1,k) +
     &         arr_out(1,j,k-1) + arr_in(1,j,k+1) +
     &         arr_out(2,j,k) )
          arr_in(nx,j,k) = rwght5*arr_out(nx,j,k) + wght*(
     &         arr_out(nx,j-1,k) + arr_out(nx,j+1,k) +
     &         arr_out(nx,j,k-1) + arr_in(nx,j,k+1) +
     &         arr_out(nx-1,j,k) )
        enddo
      enddo
!$OMP END PARALLEL DO
!
! - Now 12 edges:
!$OMP PARALLEL DO PRIVATE(i)
      do i=2,nx-1
         arr_in(i,1,1) = rwght4*arr_out(i,1,1) + wght*(
     &      arr_out(i-1,1,1) + arr_out(i+1,1,1) +
     &      arr_out(i,1,2)   + arr_out(i,2,1) )
         arr_in(i,ny,1) = rwght4*arr_out(nx,ny,1) + wght*(
     &      arr_out(i-1,ny,1) + arr_out(i+1,ny,1) +
     &      arr_out(i,ny-1,1) + arr_out(i,ny,2) )
         arr_in(i,1,nz) = rwght4*arr_out(i,1,nz) + wght*(
     &      arr_out(i-1,1,nz) + arr_out(i+1,1,nz) +
     &      arr_out(i,2,nz)   + arr_out(i,1,nz-1) )
         arr_in(i,ny,nz) = rwght4*arr_out(i,ny,nz) + wght*(
     &      arr_out(i-1,ny,nz) + arr_out(i+1,ny,nz) +
     &      arr_out(i,ny-1,nz) + arr_out(i,ny,nz-1) )
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(j)
      do j=2,ny-1
         arr_in(1,j,1) = rwght4*arr_out(1,j,1) + wght*(
     &      arr_out(1,j-1,1) + arr_out(1,j+1,1) +
     &      arr_out(1,j,2)   + arr_out(2,j,1) )
         arr_in(nx,j,1) = rwght4*arr_out(nx,j,1) + wght*(
     &      arr_out(nx,j-1,1) + arr_out(nx,j+1,1) +
     &      arr_out(nx-1,j,1) + arr_out(nx,j,2) )
         arr_in(1,j,nz) = rwght4*arr_out(1,j,nz) + wght*(
     &      arr_out(1,j-1,nz) + arr_out(1,j+1,nz) +
     &      arr_out(2,j,nz)   + arr_out(1,j,nz-1) )
         arr_in(nx,j,nz) = rwght4*arr_out(nx,j,nz) + wght*(
     &      arr_out(nx,j-1,nz) + arr_out(nx,j+1,nz) +
     &      arr_out(nx-1,j,nz) + arr_out(nx,j,nz-1) )
      enddo
!$OMP END PARALLEL DO
!
!$OMP PARALLEL DO PRIVATE(k)
      do k=2,nz-1
         arr_in(1,1,k) = rwght4*arr_out(1,1,k) + wght*(
     &      arr_out(1,1,k-1) + arr_out(1,1,k+1) +
     &      arr_out(1,2,k)   + arr_out(2,1,k) )
         arr_in(nx,1,k) = rwght4*arr_out(nx,1,k) + wght*(
     &      arr_out(nx,1,k-1) + arr_out(nx,1,k+1) +
     &      arr_out(nx-1,1,k) + arr_out(nx,2,k) )
         arr_in(1,ny,k) = rwght4*arr_out(1,ny,k) + wght*(
     &      arr_out(1,ny,k-1) + arr_out(1,ny,k+1) +
     &      arr_out(2,ny,k)   + arr_out(1,ny-1,k) )
         arr_in(nx,ny,k) = rwght4*arr_out(nx,ny,k) + wght*(
     &      arr_out(nx,ny,k-1) + arr_out(nx,ny,k+1) +
     &      arr_out(nx-1,ny,k) + arr_out(nx,ny-1,k) )
      enddo
!$OMP END PARALLEL DO
!
! Now 8 corner points:
      arr_in(1,1,1) = rwght3*arr_out(1,1,1) + wght*(
     &   arr_out(2,1,1)+arr_out(1,2,1)+arr_out(1,1,2))
      arr_in(nx,1,1) = rwght3*arr_out(nx,1,1) + wght*(
     &   arr_out(nx-1,1,1)+arr_out(nx,2,1)+arr_out(nx,1,2))
      arr_in(1,ny,1) = rwght3*arr_out(1,ny,1) + wght*(
     &   arr_out(2,ny,1)+arr_out(1,ny-1,1)+arr_out(1,ny,2))
      arr_in(1,1,nz) = rwght3*arr_out(1,1,nz) + wght*(
     &   arr_out(2,1,nz)+arr_out(1,2,nz)+arr_out(1,1,nz-1))
      arr_in(nx,ny,1) = rwght3*arr_out(nx,ny,1) + wght*(
     &   arr_out(nx-1,ny,1)+arr_out(nx,ny-1,1)+arr_out(nx,ny,2))
      arr_in(1,ny,nz) = rwght3*arr_out(1,ny,nz) + wght*(
     &   arr_out(2,ny,nz)+arr_out(1,ny-1,nz)+arr_out(1,ny,nz-1))
      arr_in(nx,1,nz) = rwght3*arr_out(nx,1,nz) + wght*(
     &   arr_out(nx-1,1,nz)+arr_out(nx,2,nz)+arr_out(nx,1,nz-1))
      arr_in(nx,ny,nz)=rwght3*arr_out(nx,ny,nz) + wght*(
     &   arr_out(nx-1,ny,nz)+arr_out(nx,ny-1,nz)+arr_out(nx,ny,nz-1))
!
!
!
!
 
      if(icount.eq.0) write(*,*) 'Successively smoothed values are:'
      if(icount.lt.10 .or. mod(icount,iofreq).eq.0)
     &       write(*,100) (arr_out(2,2,k),k=2,nz-1,nz/7)
!
      enddo    ! (End of main smoothing loop over icount)
!
 999  write(12,*) 'Final (smoothed?) values are (i,j,k=1,10):'
!     write (12,100) (((arr_out(i,j,k),i=1,10),j=1,10),k=1,10)
      write (12,100) (arr_out(10,10,k),k=1,10)
!
      stop 'Normal end, max smoothing iterations completed.'
 100  format(6e12.4)
      end
 
 
 
 
 
      function xran1(idum)
c--------------------------------------------------------------------
c     Routine from Numerical Recipes to return a uniform random
c     deviate between 0.0 and 1.0.  Set idum to any negative number
c     to initialize or reinitialize the sequence.
c--------------------------------------------------------------------
      parameter (nn=97)
      parameter (m1=259200, ia1=7141, ic1=54773, rm1=1./m1)
      parameter (m2=134456, ia2=8121, ic2=28411, rm2=1./m2)
      parameter (m3=243000, ia3=4561, ic3=51349)
      real xran1, r(nn)
      save r, ix1,ix2,ix3
c
      data iff /0/
      if (idum.lt.0 .or. iff.eq.0) then
        iff = 1
        ix1 = mod(ic1-idum,m1)     ! seed the first routine
        ix1 = mod(ia1*ix1+ic1,m1)
        ix2 = mod(ix1,m2)          ! and use it to seed the second
        ix1 = mod(ia1*ix1+ic1,m1)
        ix3 = mod(ix1,m3)          ! and to seed the third
c
        do 11,j=1,nn                ! fill the table with sequential
          ix1 = mod(ia1*ix1+ic1,m1) ! random deviates generated by the
          ix2 = mod(ia2*ix2+ic2,m2) ! first two routines
          r(j) = (float(ix1)+float(ix2)*rm2)*rm1
 11     continue
c
        idum = 1
      endif
c
      ix1 = mod(ia1*ix1+ic1,m1)
      ix2 = mod(ia2*ix2+ic2,m2)
      ix3 = mod(ia3*ix3+ic3,m3)
c
      j = 1+(nn*ix3)/m3  ! use 3rd sequence for no. between 1 and 97
c
      if(j.gt.nn.or.j.lt.1) pause
      xran1 = r(j)        ! return that table entry
      r(j) = (float(ix1)+float(ix2)*rm2)*rm1    ! and refill it
      return
      end
 
      function xran2(idum)
      integer idum,ia,im,iq,ir,ntab,ndiv
      real xran2, am, eps, rnmx
      parameter (ia=16807,im=2147483647,am=1./im,iq=127773,ir=2836,
     &  ntab=32,ndiv=1+(im-1)/ntab,eps=1.1e-9,rnmx=1.-eps)
      integer j,k,iv(ntab),iy
      save iv,iy
!
      data iv/ntab*0/, iy/0/
!
      if (idum.le.0.or.iy.eq.0) then
          idum =max(-idum,1)
          do j=ntab+8,1,-1
            k=idum/iq
            idum=ia*(idum-k+iq)-ir*k
            if(idum.lt.0) idum=idum+im
            if(j.le.ntab) iv(j)=idum
          enddo
          iy=iv(1)
      endif
!
      k=idum/iq
      idum=ia*(idum-k*iq)-ir*k
      if (idum.lt.0) idum = idum + im
      j=1+iy/ndiv
      iy=iv(j)
      iv(j)=idum
      xran2=min(am*iy,rnmx)
      return
      end