#pragma -xO
c   date: fri, 24 jun 88 11:16:54 pdt
c   from: david bailey <dbailey@ew11.nas.nasa.gov>
      program nasker
c
c   nas kernel benchmark program
c   12/17/84      david h bailey
c
      character*8 pn(8)
      real er(8), fp(8), tm(8), rt(8)
      data pn/'mxm', 'cfft2d', 'cholsky', 'btrix', 'gmtry', 'emit',
     $ 'vpenta', 'total'/
c
      call cputim(-1.0d0)
c
      write (6, 1)
1     format (/16x, 'the nas kernel benchmark program'//)
c
      call mxmtst (er(1), fp(1), tm(1))
      call ffttst (er(2), fp(2), tm(2))
      call chotst (er(3), fp(3), tm(3))
      call btrtst (er(4), fp(4), tm(4))
      call gmttst (er(5), fp(5), tm(5))
      call emitst (er(6), fp(6), tm(6))
      call vpetst (er(7), fp(7), tm(7))
c
      te = 0.
      tf = 0.
      tt = 0.
      do 100 i = 1, 7
        te = te + er(i)
        tf = tf + fp(i)
        tt = tt + tm(i)
        rt(i) = 1e-6 * fp(i) / tm(i)
100   continue
      er(8) = te
      fp(8) = tf
      tm(8) = tt
      rt(8) = 1e-6 * tf / tt
c
      write (6, 2)  (pn(i), er(i), fp(i), tm(i), rt(i), i = 1, 8)
2     format (' program', 8x, 'error', 10x, 'fp ops', 7x, 'seconds',
     $  6x, 'mflops'// 7(1x, a8, 1p, 2e15.4, 0p, f12.4, f12.2/)/
     $  1x, a8, 1p, 2e15.4, 0p, f12.4, f12.2//)
c
      stop
      end
c
      function cptime ()
c
c   returns the cpu time since the last call to cptime.
c   this subprogram may be changed as needed for a particular computer
c   system without penalty, provided it performs this function.
c
c     data tx/0./
c     t = second ()
c     cptime = t - tx
c     tx = t
      real*8 a
      call cputim(a)
      cptime = a
      return
      end
c
      subroutine copy (n, a, b)
c
c   array copy routine
c
      real a(n), b(n)
      do 100 i = 1, n
        b(i) = a(i)
100   continue
      return
      end
c
      subroutine mxmtst (er, fp, tm)
c
c   floating-point matrix multiply test
c
      parameter (l=256, m=128, n=64, f7=78125., t30=1073741824.)
      common /arrays2/ a(l,m), s1, b(m,n), s2, c(l,n)
      data it/100/, ans/35.2026179738722/
c
c   initialization
c
c   the arrays a and b are filled with pseudo-random  (0., 1.)  data
c   using a random number generator based on the recursion
c           x(n+1) = 5**7 * x(n)  (mod 2**30)
c   this recursion will generate 2**28 (approx. 268 million) numbers
c   before repeating.  for this scheme to work properly, the hardware
c   multiply operation must be correct to 47 bits of precision.
c   this same scheme is used to initialize data arrays for all tests.
c
      t = f7 / t30
      do 100 j = 1, m
        do 100 i = 1, l
          t = mod (f7 * t, 1.)
          a(i,j) = t
100   continue
      do 110 j = 1, n
        do 110 i = 1, m
          t = mod (f7 * t, 1.)
          b(i,j) = t
110   continue
      tm = cptime ()
c
c   timing test
c
      do 120 ii = 1, it
        call mxm (a, b, c, l, m, n)
120   continue
c
      tm = cptime ()
      er = abs ((c(19,19) - ans) / ans)
      fp = 2. * it * l * m * n
c
      return
      end
c
      subroutine mxm (a, b, c, l, m, n)
      dimension a(l,m), b(m,n), c(l,n)
c
c   4-way unrolled matrix multiply routine for vector computers.
c   m must be a multiple of 4.  contiguous data assumed.
c   d h bailey  11/15/84
c
      do 100 k = 1, n
        do 100 i = 1, l
          c(i,k) = 0.
100     continue
      do 110 j = 1, m, 4
        do 110 k = 1, n
          do 110 i = 1, l
            c(i,k) = c(i,k) + a(i,j) * b(j,k)
     $             + a(i,j+1) * b(j+1,k) + a(i,j+2) * b(j+2,k)
     $             + a(i,j+3) * b(j+3,k)
110   continue
c
      return
      end
c
      subroutine ffttst (er, fp, tm)
c
c   2-d fft test program
c
      parameter (m=128, n=256, m1=128, f7=78125., t30=1073741824.)
      complex x, y, ct
      common /arrays3/ x(m1,n), w1(m), w2(n), ip(2*n)
      data it/100/, ans/0.894799941219277/
c
c   initialize
c
      amn = m * n
      rmn = 1. / amn
      t2 = f7 / t30
      do 100 j = 1, n
        do 100 i = 1, m
          t1 = mod (f7 * t2, 1.)
          t2 = mod (f7 * t1, 1.)
          x(i,j) = cmplx (t1, t2)
100   continue
      call cfft2d1 (0, m, m1, n, x, w1, ip)
      call cfft2d2 (0, m, m1, n, x, w2, ip)
      tm = cptime ()
c
c   test iterations
c
      do 120 k = 1, it
        do 110 j = 1, n
          do 110 i = 1, m
            x(i,j) = rmn * x(i,j)
110     continue
c
        call cfft2d1 (1, m, m1, n, x, w1, ip)
        call cfft2d2 (1, m, m1, n, x, w2, ip)
        call cfft2d2 (-1, m, m1, n, x, w2, ip)
        call cfft2d1 (-1, m, m1, n, x, w1, ip)
120   continue
c
      tm = cptime ()
      er = abs ((real(x(19,19)) - ans) / ans)
      fp = it * amn * (2.  +  10. * log (amn)/log (2.))
c
      return
      end
c
      subroutine cfft2d1 (is, m, m1, n, x, w, ip)
c
c   performs complex radix 2 ffts on the first dimension of the 2-d array x
c   d h bailey  11/15/84
c
      complex x(m1,n), w(m), ct, cx
      integer ip(2,m)
      data pi/3.141592653589793/
c
c   if  is = 0  then initialize only
c
      m2 = m / 2
      if (is .eq. 0)  then
        do 100 i = 1, m2
          t = 2. * pi * (i-1) / m
          w(i) = cmplx (cos (t), sin (t))
100     continue
        return
      endif
c
c   perform forward or backward ffts according to  is = 1 or -1
c
      do 110 i = 1, m
        ip(1,i) = i
110   continue
      l = 1
      i1 = 1
c
120   i2 = 3 - i1
      do 130 j = l, m2, l
        cx = w(j-l+1)
        if (is .lt. 0)  cx = conjg (cx)
        do 130 i = j-l+1, j
          ii = ip(i1,i)
          ip(i2,i+j-l) = ii
          im = ip(i1,i+m2)
          ip(i2,i+j) = im
          do 130 k = 1, n
            ct = x(ii,k) - x(im,k)
            x(ii,k) = x(ii,k) + x(im,k)
            x(im,k) = ct * cx
130   continue
      l = 2 * l
      i1 = i2
      if (l .le. m2)  goto 120
c
      do 150 i = 1, m
        ii = ip(i1,i)
        if (ii .gt. i)  then
          do 140 k = 1, n
            ct = x(i,k)
            x(i,k) = x(ii,k)
            x(ii,k) = ct
140       continue
        endif
150   continue
c
      return
      end
c
      subroutine cfft2d2 (is, m, m1, n, x, w, ip)
c
c   performs complex radix 2 ffts on the second dimension of the 2-d array x
c   d h bailey  11/15/84
c
      complex x(m1,n), w(n), ct, cx
      integer ip(2,n)
      data pi/3.141592653589793/
c
c   if  is = 0  then initialize only
c
      n2 = n / 2
      if (is .eq. 0)  then
        do 100 i = 1, n2
          t = 2. * pi * (i-1) / n
          w(i) = cmplx (cos (t), sin (t))
100     continue
        return
      endif
c
c   peform forward or backward ffts according to  is = 1 or -1
c
      do 110 i = 1, n
        ip(1,i) = i
110   continue
      l = 1
      i1 = 1
c
120   i2 = 3 - i1
      do 130 j = l, n2, l
        cx = w(j-l+1)
        if (is .lt. 0)  cx = conjg (cx)
        do 130 i = j-l+1, j
          ii = ip(i1,i)
          ip(i2,i+j-l) = ii
          im = ip(i1,i+n2)
          ip(i2,i+j) = im
          do 130 k = 1, m
            ct = x(k,ii) - x(k,im)
            x(k,ii) = x(k,ii) + x(k,im)
            x(k,im) = ct * cx
130   continue
      l = 2 * l
      i1 = i2
      if (l .le. n2)  goto 120
c
      do 150 i = 1, n
        ii = ip(i1,i)
        if (ii .gt. i)  then
          do 140 k = 1, m
            ct = x(k,i)
            x(k,i) = x(k,ii)
            x(k,ii) = ct
140       continue
        endif
150   continue
c
      return
      end
c
      subroutine chotst (er, fp, tm)
c
c   cholsky test program
c
      parameter (ida=250, nmat=250, m=4, n=40, nrhs=3, f7=78125.,
     $ t30=1073741824.)
      common /arrays4/ a(0:ida, -m:0, 0:n), b(0:nrhs, 0:nmat, 0:n),
     $   ax(0:ida, -m:0, 0:n), bx(0:nrhs, 0:nmat, 0:n)
      data  it/200/, ans/5177.88531774562/
c
c   initialize
c
      la = (ida+1) * (m+1) * (n+1)
      lb = (nrhs+1) * (nmat+1) * (n+1)
      t = f7 / t30
      do 100 k = 0, n
        do 100 j = -m, 0
          do 100 i = 0, ida
            t = mod (f7 * t, 1.)
            ax(i,j,k) = t
100     continue
      do 110 k = 0, n
        do 110 j = 0, nmat
          do 110 i = 0, nrhs
            t = mod (f7 * t, 1.)
            bx(i,j,k) = t
110   continue
      tm = cptime ()
c
c   begin timing test
c
      do 120 j = 1, it
        call copy (la, ax, a)
        call copy (lb, bx, b)
        call cholsky (ida, nmat, m, n, a, nrhs, ida, b)
120   continue
c
      tm = cptime ()
      er = abs ((b(1,19,19) - ans) / ans)
      fp = it * (nmat + 1) * 4403.
c
      return
      end
c
      subroutine cholsky (ida, nmat, m, n, a, nrhs, idb, b)
c
c   cholesky decomposition/substitution subroutine.
c
c   11/28/84  d h bailey  modified for nas kernel test
c
      real a(0:ida, -m:0, 0:n), b(0:nrhs, 0:idb, 0:n), epss(0:256)
      data eps/1e-13/
c
c  cholesky decomposition
c
      do 1 j = 0, n
        i0 = max ( -m, -j )
c
c  off diagonal elements
c
        do 2 i = i0, -1
          do 3 jj = i0 - i, -1
            do 3 l = 0, nmat
3             a(l,i,j) = a(l,i,j) - a(l,jj,i+j) * a(l,i+jj,j)
          do 2 l = 0, nmat
2           a(l,i,j) = a(l,i,j) * a(l,0,i+j)
c
c  store inverse of diagonal elements
c
        do 4 l = 0, nmat
4         epss(l) = eps * a(l,0,j)
        do 5 jj = i0, -1
          do 5 l = 0, nmat
5           a(l,0,j) = a(l,0,j) - a(l,jj,j) ** 2
        do 1 l = 0, nmat
1         a(l,0,j) = 1. / sqrt ( abs (epss(l) + a(l,0,j)) )
c
c  solution
c
      do 6 i = 0, nrhs
        do 7 k = 0, n
          do 8 l = 0, nmat
8           b(i,l,k) = b(i,l,k) * a(l,0,k)
          do 7 jj = 1, min (m, n-k)
            do 7 l = 0, nmat
7             b(i,l,k+jj) = b(i,l,k+jj) -  a(l,-jj,k+jj) * b(i,l,k)
c
        do 6 k = n, 0, -1
          do 9 l = 0, nmat
9           b(i,l,k) = b(i,l,k) * a(l,0,k)
          do 6 jj = 1, min (m, k)
            do 6 l = 0, nmat
6             b(i,l,k-jj) = b(i,l,k-jj) - a(l,-jj,k) * b(i,l,k)
c
      return
      end
c
      subroutine btrtst (er, fp, tm)
c
c   btrix test program
c
      parameter (jd=30, kd=30, ld=30, md=30, f7=78125., t30=1073741824.)
      common /arrays5/ s(jd,kd,ld,5), a(5,5,md,md), b(5,5,md,md),
     $     c(5,5,md,md), sx(jd,kd,ld,5), bx(5,5,md,md)
      data js/2/, je/29/, ls/2/, le/29/, it/20/, ans/-0.286282658663962/
c
c   initialization
c
      nb = 25 * md * md
      ns = jd * kd * ld * 5
      t = f7 / t30
      do 100 l = 1, md
        do 100 k = 1, md
          do 100 j = 1, 5
            do 100 i = 1, 5
              t = mod (f7 * t, 1.)
              a(i,j,k,l) = t
              t = mod (f7 * t, 1.)
              bx(i,j,k,l) = t
              t = mod (f7 * t, 1.)
              c(i,j,k,l) = t
100   continue
      do 110 l = 1, 5
        do 110 k = 1, ld
          do 110 j = 1, kd
            do 110 i = 1, jd
              t = mod (f7 * t, 1.)
              sx(i,j,k,l) = t
110   continue
      tm = cptime ()
c
c   timing test
c
      do 120 ii = 1, it
        call copy (ns, sx, s)
        do 120 k = 1, kd
          call copy (nb, bx, b)
          call btrix (js, je, ls, le, k)
120   continue
c
      tm = cptime ()
      er = abs ((s(19,19,19,1) - ans) / ans)
      fp = it * md * (le - 1) * 19165.
c
      return
      end
c
      subroutine btrix (js, je, ls, le, k)
c
c     vectorized block tri-diagonal solver in the j direction
c          for k = constant planes
c
c   11/15/84  d h bailey  modified for nas kernel test
c
      parameter (jd=30, kd=30, ld=30, md=30)
      common /arrays5/ s(jd,kd,ld,5), a(5,5,md,md), b(5,5,md,md),
     $     c(5,5,md,md)
c
      dimension    u12(md), u13(md), u14(md), u15(md), u23(md),
     $             u24(md), u25(md), u34(md), u35(md), u45(md)
c
      real         l11(md), l21(md), l31(md), l41(md), l51(md),
     $             l22(md), l32(md), l42(md), l52(md), l33(md),
     $             l43(md), l53(md), l44(md), l54(md), l55(md)
c
c     part 1.  forward block sweep
c
c
      do 100 j   = js,je
c
c**********   step 1.  construct l(i) in b    **************************
c
      if(j.eq.js) go to 4
      do 3 m = 1,5
        do 3 n = 1,5
          do 3 l = ls,le
            b(m,n,j,l) = b(m,n,j,l) - a(m,1,j,l)*b(1,n,j-1,l)
     $           - a(m,2,j,l)*b(2,n,j-1,l) - a(m,3,j,l)*b(3,n,j-1,l)
     $           - a(m,4,j,l)*b(4,n,j-1,l) - a(m,5,j,l)*b(5,n,j-1,l)
    3 continue
c
    4 continue
c
c**********    step 2.  conpute l inverse    ***************************
c
c
c        a.  decompose l(i) into l and u
c
      do 20 l = ls,le
        l11(l)  = 1. / b(1,1,j,l)
        u12(l)  = b(1,2,j,l)*l11(l)
        u13(l)  = b(1,3,j,l)*l11(l)
        u14(l)  = b(1,4,j,l)*l11(l)
        u15(l)  = b(1,5,j,l)*l11(l)
        l21(l)  = b(2,1,j,l)
        l22(l)  = 1. / (b(2,2,j,l) - l21(l)*u12(l))
        u23(l)  = (b(2,3,j,l) - l21(l)*u13(l)) * l22(l)
        u24(l)  = (b(2,4,j,l) - l21(l)*u14(l)) * l22(l)
        u25(l)  = (b(2,5,j,l) - l21(l)*u15(l)) * l22(l)
        l31(l)  = b(3,1,j,l)
        l32(l)  = b(3,2,j,l) - l31(l)*u12(l)
        l33(l)  = 1. / (b(3,3,j,l) - l31(l)*u13(l) - l32(l)*u23(l))
        u34(l)  = (b(3,4,j,l) - l31(l)*u14(l) - l32(l)*u24(l)) * l33(l)
        u35(l)  = (b(3,5,j,l) - l31(l)*u15(l) - l32(l)*u25(l)) * l33(l)
   20 continue
c
      do 25 l = ls,le
        l41(l)  = b(4,1,j,l)
        l42(l)  = b(4,2,j,l) - l41(l)*u12(l)
        l43(l)  = b(4,3,j,l) - l41(l)*u13(l) - l42(l)*u23(l)
        l44(l)  = 1. / (b(4,4,j,l) - l41(l)*u14(l) - l42(l)*u24(l)
     $      - l43(l)*u34(l))
        u45(l)  = (b(4,5,j,l) - l41(l)*u15(l) - l42(l)*u25(l)
     $      - l43(l)*u35(l)) * l44(l)
        l51(l)  = b(5,1,j,l)
        l52(l)  = b(5,2,j,l) - l51(l)*u12(l)
        l53(l)  = b(5,3,j,l) - l51(l)*u13(l) - l52(l)*u23(l)
        l54(l)  = b(5,4,j,l) - l51(l)*u14(l) - l52(l)*u24(l)
     $      - l53(l)*u34(l)
        l55(l)  = 1. / (b(5,5,j,l) - l51(l)*u15(l) - l52(l)*u25(l)
     $      - l53(l)*u35(l) - l54(l)*u45(l))
   25 continue
c
c**********    step 3.  solve for intermediate vector    ***************
c
c         a.  construct rhs
c
      if(j.eq.js) go to 34
      do 33 m = 1,5
        do 33 l = ls,le
          s(j,k,l,m) = s(j,k,l,m) - a(m,1,j,l)*s(j-1,k,l,1)
     $         - a(m,2,j,l)*s(j-1,k,l,2) - a(m,3,j,l)*s(j-1,k,l,3)
     $         - a(m,4,j,l)*s(j-1,k,l,4) - a(m,5,j,l)*s(j-1,k,l,5)
   33 continue
c
c         b. intermediate vector
c
   34 continue
c
c         fwd substitution
c
      do 35 l = ls,le
        d1 = s(j,k,l,1)*l11(l)
        d2 = (s(j,k,l,2) - l21(l)*d1) * l22(l)
        d3 = (s(j,k,l,3) - l31(l)*d1 - l32(l)*d2) * l33(l)
        d4 = (s(j,k,l,4) - l41(l)*d1 - l42(l)*d2 - l43(l)*d3) * l44(l)
        d5 = (s(j,k,l,5) - l51(l)*d1 - l52(l)*d2 - l53(l)*d3
     $       - l54(l)*d4) * l55(l)
c
c         bwd substitution
c
        s(j,k,l,5)  = d5
        s(j,k,l,4)  = d4 - u45(l)*d5
        s(j,k,l,3)  = d3 - u34(l)*s(j,k,l,4) - u35(l)*d5
        s(j,k,l,2)  = d2 - u23(l)*s(j,k,l,3) - u24(l)*s(j,k,l,4)
     $       - u25(l)*d5
        s(j,k,l,1)  = d1 - u12(l)*s(j,k,l,2) - u13(l)*s(j,k,l,3)
     $       - u14(l)*s(j,k,l,4) - u15(l)*d5
   35 continue
c
c**********    step 4.  construct u(i) = l(i)**(-1)*c(i+1)    **********
c**********             by columns and store in b            **********
c
      if(j.eq.je) go to 100
      do 40 n = 1,5
        do 40 l = ls,le
c
c         fwd substitution
c
          c1 = c(1,n,j,l)*l11(l)
          c2 = (c(2,n,j,l) - l21(l)*c1) * l22(l)
          c3 = (c(3,n,j,l) - l31(l)*c1 - l32(l)*c2) * l33(l)
          c4 = (c(4,n,j,l) - l41(l)*c1 - l42(l)*c2 - l43(l)*c3)
     $          * l44(l)
          c5 = (c(5,n,j,l) - l51(l)*c1 - l52(l)*c2 - l53(l)*c3
     $          - l54(l)*c4) * l55(l)
c
c         bwd substitution
c
          b(5,n,j,l)  = c5
          b(4,n,j,l)  = c4 - u45(l)*c5
          b(3,n,j,l)  = c3 - u34(l)*b(4,n,j,l) - u35(l)*c5
          b(2,n,j,l)  = c2 - u23(l)*b(3,n,j,l) - u24(l)*b(4,n,j,l)
     $         - u25(l)*c5
          b(1,n,j,l)  = c1 - u12(l)*b(2,n,j,l) - u13(l)*b(3,n,j,l)
     $         - u14(l)*b(4,n,j,l) - u15(l)*c5
   40 continue
c
c
  100 continue
c
c     part 2.  backward block sweep
c
      jem1 = je - 1
c
      do 200 j = jem1,js,-1
        do 200 m = 1,5
          do 200 l = ls,le
            s(j,k,l,m) = s(j,k,l,m) - b(m,1,j,l)*s(j+1,k,l,1)
     $           - b(m,2,j,l)*s(j+1,k,l,2) - b(m,3,j,l)*s(j+1,k,l,3)
     $           - b(m,4,j,l)*s(j+1,k,l,4) - b(m,5,j,l)*s(j+1,k,l,5)
  200 continue
c
      return
      end
c
      subroutine gmttst (er, fp, tm)
c
      parameter (nw=100, nb=5, f7=78125., t30=1073741824.)
      complex wall, zcr, proj, zi, z1, zz
      common /arrays6/ nwall(nb), wall(nw,nb), rmatrx(nw*nb,nw*nb),
     $ zcr(nw,nb), proj(nw,nb), xxmax(nb)
      data it/2/, ans/-2.57754233214174/
c
c   initialization
c
      lw = 2 * nw * nb
      t2 = f7 / t30
      do 100 j = 1, nb
        nwall(j) = nw
100   continue
      do 110 j = 1, nb
        do 110 i = 1, nw
          t1 = mod (f7 * t2, 1.)
          t2 = mod (f7 * t1, 1.)
          wall(i,j) = cmplx (t1, t2)
110   continue
      tm = cptime ()
c
c   timing test
c
      do 120 i = 1, it
        call gmtry
120   continue
c
      tm = cptime ()
      er = abs ((rmatrx(19,19) - ans) / ans)
      fp = it * (120. * (nb*nw) ** 2  +  0.666 * (nb*nw) ** 3)
c
      return
      end
c
      subroutine gmtry
c
c  compute solid-related arrays,  gauss eliminate the matrix of wall
c  influence coefficients.
c
c    11/30/84  d h bailey    revised code for nas kernel test
c
      parameter (nw=100, nb=5)
      complex wall, zcr, proj, zi, z1, zz
      common /arrays6/ nwall(nb), wall(nw,nb), rmatrx(nw*nb,nw*nb),
     $ zcr(nw,nb), proj(nw,nb), xxmax(nb)
c
      data  arcl /5./, pi /3.141592653589793/, period/3./
c
c  compute arclength.
c
      matdim = 0
      arcl = 0.
      ymin = 1.e+50
      ymax = -1.e+50
      pidp = pi / period
c
      do 9 l = 1, nb
        matdim = matdim + nwall(l)
        do 9 k = 1,nwall(l)
          arcl = arcl + abs(wall(k,l) - wall(1+mod(k,nwall(l)), l))
9     continue
c
c  compute core radius.
c
      r0 = arcl / (matdim*2.)
      sigma = r0 / 2.
c
c  define creation points.
c
      do 6 l = 1,nb
        do 5 k = 1,nwall(l)
          zz = wall(1+mod(k+nwall(l)-2,nwall(l)), l)
     &         - wall(1+mod(k,nwall(l)), l)
          zcr(k,l) = wall(k,l) + cmplx(0., r0/abs(zz)) * zz
5       continue
c
c  check that wall and creation points are not crossed due to
c  too sharp a concave kink or an error in defining the body.
c  also find highest,  lowest and right-most point.
c
        xxmax(l) = real(zcr(1,l))
        ls = 0
        do 6 k = 1,nwall(l)
          ymin = min (ymin, aimag(zcr(k,l)))
          ymax = max (ymax, aimag(zcr(k,l)))
          xxmax(l) = max (xxmax(l), real(zcr(k,l)))
          kp = 1 + mod(k, nwall(l))
          if (real((zcr(kp,l) - zcr(k,l)) *
     &        conjg(wall(kp,l) - wall(k,l))).gt.0.) then
            ls = l
            ks = k
          endif
6     continue
c
c       if (ls .ne. 0) then
c         write (6, 102) ls, ks
c102      format(" on body number ", i3, " you have too sharp a",
c     &      " kink near point ", i4)
c         stop
c       endif
c
c  the "main period" will be between ylimit and ylimit + period.
c
      ylimit = (ymin - period + ymax)/2
c
c  project creation points into main period. this is technical.
c
      do 1 l = 1,nb
        do 1 k = 1,nwall(l)
          proj(k,l) = zcr(k,l) - cmplx(0., period*
     &           (int(5. + (aimag(zcr(k,l)) - ylimit) / period) - 5.))
1     continue
c
c   compute matrix.
c
      sig2 = (2. * pidp * sigma) ** 2
      i0 = 0
      do 2 l1 = 1,nb
        j0 = 0
        do 4 l2 = 1,nb
          kron = 0
          if (l1 .eq. l2) kron = 1
          do 3 j = 1,nwall(l2)
            rmatrx(i0+1,j0+j) = kron
            z1 = exp ((wall(1,l1) - zcr(j,l2)) * pidp)
            z1 = z1 - 1. / z1
            dum = sig2 + real(z1)**2 + aimag(z1)**2
            do 3 i = 2,nwall(l1)
              zi = exp ((wall(i,l1) - zcr(j,l2)) * pidp)
              zz = zi - 1. / zi
              rmatrx(i0+i,j0+j) = -0.25 / pi * log (dum /
     &              (sig2 + real(zz) ** 2 + aimag(zz) ** 2))
3         continue
          j0 = j0 + nwall(l2)
4       continue
        i0 = i0 + nwall(l1)
2     continue
c
c  gauss elimination
c
      do 8 i = 1, matdim
        rmatrx(i,i) = 1. / rmatrx(i,i)
        do 8 j = i+1, matdim
          rmatrx(j,i) = rmatrx(j,i) * rmatrx(i,i)
          do 8 k = i+1, matdim
            rmatrx(j,k) = rmatrx(j,k) - rmatrx(j,i) * rmatrx(i,k)
8     continue
c
      return
      end
c
      subroutine emitst (er, fp, tm)
c
c   emit test subroutine
c
      parameter (nw=100, nb=5, nv=1000, nvm=1500, f7=78125.,
     $ t30=1073741824.)
      complex z, wall, zcr, refpt,  expwkl, expmwk, force,
     & uupstr, dum3, expz, expmz
      common /arrays6/ nwall(nb), wall(nw,nb), rmatrx(nw*nb,nw*nb),
     $ zcr(nw,nb), z(nvm), gamma(nvm), refpt(nb), rhs(nw*nb),
     $ force(nb), rmom(nb), cp(nw,nb), dpds(nw,nb), expz(nvm),
     $ expmz(nvm), psi(nw), ps(nvm)
      data it/10/, ans/6.0088546832072/
c
      t2 = f7 / t30
      do 100 j = 1, nb
        nwall(j) = nw
        refpt(j) = 0.
        force(j) = 0.
        rmom(j) = 0.
        do 100 i = 1, nw
          t1 = mod (f7 * t2, 1.)
          t2 = mod (f7 * t1, 1.)
          wall(i,j) = cmplx (t1, t2)
          t1 = mod (f7 * t2, 1.)
          t2 = mod (f7 * t1, 1.)
          zcr(i,j) = cmplx (t1, t2)
          dpds(i,j) = 0.
100   continue
      do 110 j = 1, nw*nb
        rmatrx(j,j) = 1.
        do 110 i = 1, j-1
          t2 = mod (f7 * t2, 1.)
          rmatrx(i,j) = .001 * t2
          rmatrx(j,i) = .001 * t2
110   continue
      do 120 i = 1, nvm
        t1 = mod (f7 * t2, 1.)
        t2 = mod (f7 * t1, 1.)
        z(i) = cmplx (t1, t2)
        t2 = mod (f7 * t2, 1.)
        gamma(i) = t2
120   continue
      tm = cptime ()
c
c   timing test
c
      do 130 i = 1, it
        call emit
130   continue
c
      tm = cptime ()
      er = abs ((rhs(19) - ans) / ans)
      fp = it * (56.*nv + nb*nw * (97. + 44.*nv + 2.*nb*nw))
c
      return
      end
c
      subroutine emit
c
c  emit new vortices to satisfy boundary condition.
c  finish computing pressure, forces, etc.
c
c   11/28/84  d h bailey  modified for nas kernel test
c
      parameter (nw=100, nb=5, nvm=1500)
      complex z, wall, zcr, refpt,  expwkl, expmwk, force,
     & uupstr, dum3, expz, expmz, zz
      common /arrays6/ nwall(nb), wall(nw,nb), rmatrx(nw*nb,nw*nb),
     $ zcr(nw,nb), z(nvm), gamma(nvm), refpt(nb), rhs(nw*nb),
     $ force(nb), rmom(nb), cp(nw,nb), dpds(nw,nb), expz(nvm),
     $ expmz(nvm), psi(nw), ps(nvm)
c
      data  period/3./, sig2/3./, u0/4./, matdim/500/, delt/1./,
     $  chord/5./, pi/3.141592653589793/, uupstr/(3., 4.)/
c
c  store exp(z(i)) and exp(-z(i)) to reduce work in inner loop 4.
c
      nv = 1000
      pidp = pi / period
c
      do 2 i = 1, nv
        expz(i) = exp (z(i) * pidp)
        expmz(i) = 1. / expz(i)
2     continue
c
      i0 = 0
      cupst = real(uupstr) ** 2 + aimag(uupstr) ** 2
c
      do 5 l = 1, nb
        do 6 k = 1, nwall(l)
          expwkl = exp (wall(k,l) * pidp)
          expmwk = 1. / expwkl
          sps = 0.
          do 4 i = 1, nv
            dum3 = expz(i) * expmwk - expwkl * expmz(i)
            ps(i) = gamma(i) * log (real(dum3) ** 2 +
     &              aimag(dum3) ** 2 + sig2)
            sps = sps + ps(i)
4         continue
          psi(k) = aimag(wall(k,l) * conjg (uupstr + cmplx (0., u0)))
     &             - sps * 0.25 / pi
6       continue
c
c  compute right-hand side.
c
        do 8 k = 1, nwall(l)
          rhs(i0+k) = psi(k) - psi(1)
8       continue
        i0 = i0 + nwall(l)
5     continue
c
c  solve system
c
      do 10 i = 1, matdim
        do 10 j = i+1, matdim
          rhs(j) = rhs(j) - rmatrx(j,i) * rhs(i)
10    continue
      do 11 i = matdim, 1, -1
        rhs(i) = rmatrx(i,i) * rhs(i)
        do 11 j = 1, i-1
          rhs(j) = rhs(j) - rmatrx(j,i) * rhs(i)
11    continue
c
c  create new vortices.
c
      nolld = nv
      i0 = 0
      do 7 l = 1, nb
        do 3 k = 1, nwall(l)
c
c  put the new vortex at the end of the array.
c
          nv = nv+1
          z(nv) = zcr(k,l)
          gamma(nv) = rhs(i0+k)
c
c  record the gain of linear and angular momentum
c
          force(l) = force(l) + gamma(nv) * z(nv)
          rmom(l) = rmom(l) + gamma(nv) * (real (z(nv) - refpt(l)) ** 2
     &              + aimag (z(nv) - refpt(l)) ** 2)
          dpds(k,l) = dpds(k,l) - gamma(nv)
3       continue
c
c  filter and integrate pressure gradient to get pressure
c
        cp(1,l) = 0.
        cpm = -1e50
        do 9 k = 2, nwall(l)
          cp(k,l) = cp(k-1,l) + (3. * (dpds(k,l) + dpds(k-1,l))
     &              + dpds(1+mod(k,nwall(l)), l)
     &              + dpds(1+mod(k+nwall(l)-3, nwall(l)), l))
     &              / (4. * delt * cupst)
          cpm = max (cpm, cp(k,l))
9       continue
c
c  normalize pressure
c
        do 12 k = 1, nwall(l)
          cp(k,l) = cp(k,l) - cpm
12      continue
c
c  finish computing force and moment,  as time rate of change of linear
c  and angular momentum
c
        force(l) = force(l) * cmplx (0., 2. / (delt * chord * cupst))
        rmom(l) = rmom(l) * 2. / (delt * chord ** 2 * cupst)
c
        i0=i0+nwall(l)
7     continue
c
      return
      end

      subroutine vpetst (er, fp, tm)
c
c   vpenta test program
c
      parameter (nja=128, njb=128, jl=1, ju=128, kl=1, ku=128,
     $ f7=78125., t30=1073741824.)
      common /arrays7/ a(nja,njb), b(nja,njb), c(nja,njb), d(nja,njb),
     $ e(nja,njb), f(nja,njb,3), x(nja,njb), y(nja,njb), fx(nja,njb,3)
      data it/400/, ans/-0.354649411858726/
c
      lf = nja * njb * 3
      t = f7 / t30
      do 100 j = kl, ku
        do 100 i = jl, ju
          t = mod (f7 * t, 1.)
          a(i,j) = t
          t = mod (f7 * t, 1.)
          b(i,j) = t
          t = mod (f7 * t, 1.)
          c(i,j) = t
          t = mod (f7 * t, 1.)
          d(i,j) = t
          t = mod (f7 * t, 1.)
          e(i,j) = t
          do 100 k = 1, 3
            t = mod (f7 * t, 1.)
            fx(i,j,k) = t
100   continue
      tm = cptime ()
c
c   timing test
c
      do 110 i = 1, it
        call copy (lf, fx, f)
        call vpenta
110   continue
c
      tm = cptime ()
      er = abs ((f(19,19,1) - ans) / ans)
      fp = it * ku * (40. * ku - 53.)
c
      return
      end
c
      subroutine vpenta
c
c    routine to invert 3 pentadiagonals simultaneously
c
c   12/05/84  d h bailey   modified for nas kernel test
c
      parameter (nja=128, njb=128, jl=1, ju=128, kl=1, ku=128)
      common /arrays7/ a(nja,njb), b(nja,njb), c(nja,njb), d(nja,njb),
     $ e(nja,njb), f(nja,njb,3), x(nja,njb), y(nja,njb), fx(nja,njb,3)
c
c	! start forward generation process and sweep
c
      j = jl
      do 1 k = kl,ku
        rld = c(j,k)
        rldi = 1./rld
        f(j,k,1) = f(j,k,1)*rldi
        f(j,k,2) = f(j,k,2)*rldi
        f(j,k,3) = f(j,k,3)*rldi
        x(j,k) = d(j,k)*rldi
        y(j,k) = e(j,k)*rldi
1     continue
c
      j = jl+1
      do 2 k = kl,ku
        rld1 = b(j,k)
        rld = c(j,k) - rld1*x(j-1,k)
        rldi = 1./rld
        f(j,k,1) = (f(j,k,1) - rld1*f(j-1,k,1))*rldi
        f(j,k,2) = (f(j,k,2) - rld1*f(j-1,k,2))*rldi
        f(j,k,3) = (f(j,k,3) - rld1*f(j-1,k,3))*rldi
        x(j,k) = (d(j,k) - rld1*y(j-1,k))*rldi
        y(j,k) = e(j,k)*rldi
2     continue
c
      do 3 j = jl+2,ju-2
        do 11 k = kl,ku
          rld2 = a(j,k)
          rld1 = b(j,k) - rld2*x(j-2,k)
          rld = c(j,k) - (rld2*y(j-2,k) + rld1*x(j-1,k))
          rldi = 1./rld
          f(j,k,1) = (f(j,k,1) - rld2*f(j-2,k,1) - rld1*f(j-1,k,1))*rldi
          f(j,k,2) = (f(j,k,2) - rld2*f(j-2,k,2) - rld1*f(j-1,k,2))*rldi
          f(j,k,3) = (f(j,k,3) - rld2*f(j-2,k,3) - rld1*f(j-1,k,3))*rldi
          x(j,k) = (d(j,k) - rld1*y(j-1,k))*rldi
          y(j,k) = e(j,k)*rldi
11      continue
3     continue
c
      j = ju-1
      do 12 k = kl,ku
        rld2 = a(j,k)
        rld1 = b(j,k) - rld2*x(j-2,k)
        rld = c(j,k) - (rld2*y(j-2,k) + rld1*x(j-1,k))
        rldi = 1./rld
        f(j,k,1) = (f(j,k,1) - rld2*f(j-2,k,1) - rld1*f(j-1,k,1))*rldi
        f(j,k,2) = (f(j,k,2) - rld2*f(j-2,k,2) - rld1*f(j-1,k,2))*rldi
        f(j,k,3) = (f(j,k,3) - rld2*f(j-2,k,3) - rld1*f(j-1,k,3))*rldi
        x(j,k) = (d(j,k) - rld1*y(j-1,k))*rldi
12    continue
c
      j = ju
      do 13 k = kl,ku
        rld2 = a(j,k)
        rld1 = b(j,k) - rld2*x(j-2,k)
        rld = c(j,k) - (rld2*y(j-2,k) + rld1*x(j-1,k))
        rldi = 1./rld
        f(j,k,1) = (f(j,k,1) - rld2*f(j-2,k,1) - rld1*f(j-1,k,1))*rldi
        f(j,k,2) = (f(j,k,2) - rld2*f(j-2,k,2) - rld1*f(j-1,k,2))*rldi
        f(j,k,3) = (f(j,k,3) - rld2*f(j-2,k,3) - rld1*f(j-1,k,3))*rldi
13    continue
c
c        !  back sweep solution
c
      do 14 k = kl,ku
        f(ju,k,1) = f(ju,k,1)
        f(ju,k,2) = f(ju,k,2)
        f(ju,k,3) = f(ju,k,3)
        f(ju-1,k,1) = f(ju-1,k,1) - x(ju-1,k)*f(ju,k,1)
        f(ju-1,k,2) = f(ju-1,k,2) - x(ju-1,k)*f(ju,k,2)
        f(ju-1,k,3) = f(ju-1,k,3) - x(ju-1,k)*f(ju,k,3)
14    continue
c
      do 4 j = 2,ju-jl
        jx = ju-j
        do 15 k = kl,ku
          f(jx,k,1) = f(jx,k,1) - x(jx,k)*f(jx+1,k,1) -
     *                y(jx,k)*f(jx+2,k,1)
          f(jx,k,2) = f(jx,k,2) - x(jx,k)*f(jx+1,k,2) -
     *                y(jx,k)*f(jx+2,k,2)
          f(jx,k,3) = f(jx,k,3) - x(jx,k)*f(jx+1,k,3) -
     *                y(jx,k)*f(jx+2,k,3)
15      continue
4     continue
c
      return
      end


