nek/navier4_8F90_source.html

 !-----------------------------------------------------------------------

 !-----------------------------------------------------------------------

 module helmholtz

   use kinds, only : dp

   implicit none


   public :: hsolve, approx_space, init_approx_space

   private :: projh, gensh, hconj, updrhsh, hmhzpf


   type approx_space

     real(DP), allocatable :: projectors(:,:) !>

     integer :: n_max !>

     integer :: n_sav !>

     integer :: next !>

     real(DP) :: dt !>


     real(DP), allocatable :: h_red(:,:)

   end type approx_space


 contains


 subroutine init_approx_space(apx, n_max, ntot)

   use kinds, only : dp

   implicit none

   type(approx_space), intent(out) :: apx

   integer, intent(in) :: n_max, ntot

   apx%n_max = n_max

   apx%n_sav = 0

   apx%next  = 0

   allocate(apx%projectors(ntot, 0:n_max), apx%H_red(n_max, n_max))

   apx%projectors = 0._dp

   apx%H_red      = 0._dp

   apx%dt         = 0._dp

 end subroutine init_approx_space


 subroutine projh(r,h1,h2,bi,vml,vmk, apx, wl,ws,name4)

   use kinds, only : dp, qp

   use size_m, only : nx1, ny1, nz1, nelv, nid

   use geom, only : voltm1, volvm1

   use tstep, only : istep, ifield, nelfld

   use parallel, only : nid

   use ctimer, only : nproj, tproj, proj_flop, proj_mop

   use ctimer, only : dnekclock

   implicit none


   real(DP), intent(inout) :: r(*)   !>

   real(DP), intent(in)    :: h1(*)  !>

   real(DP), intent(in)    :: h2(*)  !>

   real(DP), intent(in)    :: vml(*) !>

   real(DP), intent(in)    :: vmk(*) !>

   real(DP), intent(in)    :: bi(*)  !>

   real(DP), intent(out)   :: wl(*)  !>

   real(DP), intent(out)   :: ws(*)  !>

   type(approx_space), intent(inout) :: apx !>

   character(4), intent(in) :: name4 !>


   integer :: nel, ntot, i, j, n10

   real(DP) :: vol, alpha1, alpha2, ratio

   real(DP), external :: glsc23

   real(DP), allocatable :: evecs(:,:), ev(:)

   integer :: ierr

   real(DP), parameter :: one = 1._dp, zero = 0._dp

   real(DP) :: etime

   real(QP) :: qsum


   nproj = nproj + 1

   etime = dnekclock()


   if (apx%n_sav == 0) then

     apx%projectors(:,0) = 0._dp

     return

   endif


   nel =nelfld(ifield)

   ntot=nx1*ny1*nz1*nel


   vol = voltm1

   if (nel == nelv) vol = volvm1


   ! Diag to see how much reduction in the residual is attained.

   alpha1 = glsc23(r,bi,vml,ntot)

   if (alpha1 > 0) alpha1 = sqrt(alpha1/vol)


   ! Update approximation space if dt has changed

   etime = etime - dnekclock()

   call updrhsh(apx,h1,h2,vml,vmk,ws)

   etime = etime + dnekclock()


   ! Orthogonalize the approximation space

   if (10 * apx%n_sav + 100 > ntot) write(*,*) "wl isn't big enough to be dsyev's work"

   allocate(evecs(apx%n_sav, apx%n_sav), ev(apx%n_sav))

   evecs = apx%H_red(1:apx%n_sav,1:apx%n_sav)

   call dsyev('V', 'U', apx%n_sav, &

              evecs, apx%n_sav, &

              ev, &

              wl, ntot, ierr)

   if (nid == 0 .and. ierr /= 0) write(*,*) "DSYEV failed", ierr


   ! Compute overlap of residual and (non-orthogonal) projectors

   proj_flop = proj_flop + ntot

   proj_mop  = proj_mop + 3*ntot

   wl(1:ntot) = r(1:ntot) * vml(1:ntot)

 #if 0

   do i = 1, apx%n_sav

     !ws(i) = glsc3(wl, approx(:,i,1), vml, ntot)

     ws(i) = glsc2(wl, apx%projectors(:,i), ntot)

   enddo

 #else

   proj_flop = proj_flop + (2*ntot-1)*apx%n_sav

   proj_mop  = proj_mop + apx%n_sav * (ntot+1)

   call dgemv('T', ntot, apx%n_sav, &

              one,  apx%projectors(:,1:apx%n_sav), ntot, &

                    wl, 1, &

              zero, ws, 1)

   call gop(ws, ws(1+apx%n_sav), '+  ', apx%n_sav)

 #endif


   ! Mix the overlaps to get the orthogonal projection

   ! and take the inverse by dividing by \lambda

   ! \todo sort the sums for more precision


   do i = 1, apx%n_sav

     qsum = 0._qp

     do j = 1, apx%n_sav

       qsum = qsum + evecs(j,i) * ws(j)

     enddo

   enddo

   do i = 1, apx%n_sav

     qsum = 0._qp

     do j = 1, apx%n_sav

       qsum = qsum + evecs(j,i) * ws(j)

     enddo

     ev(i) = qsum / ev(i)

   enddo


   ! Compute the weights for the approximate solution

   do i = 1, apx%n_sav

     qsum = 0._qp

     do j = 1, apx%n_sav

       qsum = qsum + evecs(i,j) * ev(j)

     enddo

     ws(i) = qsum

   enddo


   ! Expand the approximate solution wrt (non-orth.) projectors

   proj_flop = proj_flop + ntot*(2*apx%n_sav-1)

   proj_mop  = proj_mop + (apx%n_sav+1) * ntot

   call dgemv('N', ntot, apx%n_sav, &

              one,  apx%projectors(:,1:apx%n_sav), ntot, &

                    ws, 1, &

              zero, apx%projectors(:,0), 1)


   ! Compute the new residual explicitly

   ! This fixes any numerical precision issues in the previous sections

   etime = etime - dnekclock()

   call axhelm(wl,apx%projectors(:,0),h1,h2,1,1)

   etime = etime + dnekclock()

   proj_flop = proj_flop + ntot

   proj_mop  = proj_mop + 2*ntot

   wl(1:ntot) = wl(1:ntot) * vmk(1:ntot)

   call dssum(wl)

   proj_flop = proj_flop + ntot

   proj_mop  = proj_mop + 2*ntot

   r(1:ntot) = r(1:ntot) - wl(1:ntot)


   !...............................................................

   ! Recompute the norm of the residual to show how much its shrunk

   alpha2 = glsc23(r,bi,vml,ntot)

   if (alpha2 > 0) alpha2 = sqrt(alpha2/vol)

   ratio  = alpha1/alpha2


   tproj = tproj + (dnekclock() - etime)

   n10=min(10,apx%n_sav)


   if (nid == 0) write(6,10) istep,name4,alpha1,alpha2,ratio,apx%n_sav

   10 format(4x,i7,4x,a4,' alph1n',1p3e12.4,i6)


   if (nid == 0) write(6,11) istep,name4,apx%n_sav,(ev(i),i=1,n10)

   11 format(4x,i7,4x,a4,' halpha',i6,10(1p10e12.4,/,17x))


   return

 end subroutine projh


 !-----------------------------------------------------------------------

 subroutine gensh(v1,h1,h2,vml,vmk,apx,ws)

   use kinds, only : dp

   use mesh, only : niterhm

   implicit none


   REAL(DP), intent(inout) :: V1 (*) !>

   REAL(DP), intent(in)    :: H1 (*) !>

   REAL(DP), intent(in)    :: H2 (*) !>

   REAL(DP), intent(in)    :: vmk(*) !>

   REAL(DP), intent(in)    :: vml(*) !>

   real(DP), intent(out)   :: ws(:)  !>

   type(approx_space), intent(inout) :: apx !>


   integer :: ntot

   real(DP) :: norm

   real(DP), external :: dnrm2

   ntot = size(apx%projectors,1)


   ! Reconstruct solution

   v1(1:ntot) = v1(1:ntot) + apx%projectors(:,0)


   ! If the new vector is in the space already, don't re-add it.

   if (niterhm < 1) return


   ! Add the solution to the approximation space

   apx%n_sav = min(apx%n_sav + 1, apx%n_max)

   apx%next  = mod(apx%next, apx%n_max) + 1

   call copy(apx%projectors(:,apx%next),v1,ntot)

   call hconj(apx,apx%next,h1,h2,vml,vmk,ws)


   return

 end subroutine gensh


 !-----------------------------------------------------------------------

 subroutine hconj(apx,k,h1,h2,vml,vmk,ws)

   use kinds,  only : dp

   use ctimer, only : nhconj, thconj, hconj_flop, hconj_mop, dnekclock

   implicit none


   type(approx_space), intent(inout) :: apx !>

   integer,  intent(in) :: k      !>

   real(DP), intent(in) :: h1(*)  !>

   real(DP), intent(in) :: h2(*)  !>

   real(DP), intent(in) :: vml(*) !>

   real(DP), intent(in) :: vmk(*) !>

   real(DP), intent(out) :: ws(*) !>


   integer :: i, ntot

   real(DP), parameter :: one = 1._dp, zero = 0._dp

   real(DP) :: etime


   ntot= size(apx%projectors, 1)

   hconj_flop = hconj_flop + apx%n_sav*(2*ntot-1)

   hconj_mop  = hconj_mop + apx%n_sav * (ntot +1)

   hconj_flop = hconj_flop + ntot

   hconj_mop  = hconj_flop + 3*ntot

   hconj_flop = hconj_flop + ntot

   hconj_mop  = hconj_flop + 3*ntot

   nhconj = nhconj + 1


   ! Compute H| projectors(:,k) >

   call axhelm(apx%projectors(:,0),apx%projectors(:,k),h1,h2,1,1)

   etime = dnekclock()

   apx%projectors(:,0) = apx%projectors(:,0) * vmk(1:ntot)

   call dssum(apx%projectors(:,0))

   apx%projectors(:,0) = apx%projectors(:,0) * vml(1:ntot)


   ! Compute < projectors(:,i) | H | projectors(:,k) > for i \in [1,n_sav]

   call dgemv('T', ntot, apx%n_sav, &

              one,  apx%projectors(1,1), ntot, &

                    apx%projectors(1,0), 1, &

              zero, apx%H_red(1,k), 1)

   call gop(apx%H_red(:,k), ws, '+  ', apx%n_sav)


   ! Re-symmetrize

   do i = 1, apx%n_sav

     apx%H_red(k,i) = apx%H_red(i,k)

   enddo

   thconj = thconj + (dnekclock() - etime)


   return

 end subroutine hconj


 !-----------------------------------------------------------------------

 subroutine updrhsh(apx,h1,h2,vml,vmk,ws)

   use kinds, only : dp

   use input, only : ifvarp, iflomach

   use tstep, only : dt, ifield

   implicit none


   type(approx_space), intent(inout) :: apx !>

   real(DP), intent(in) :: h1(*)  !>

   real(DP), intent(in) :: h2(*)  !>

   real(DP), intent(in) :: vml(*) !>

   real(DP), intent(in) :: vmk(*) !>

   real(DP), intent(out) :: ws(*) !>


   logical :: ifupdate

   logical, save :: ifnewdt = .false.

   integer :: n_sav, k


   ! First, we have to decide if the dt has changed.

   ifupdate = .false.

   if (dt /= apx%dt) then

       apx%dt   = dt

       ifnewdt  = .true.

       ifupdate = .true.

   elseif (ifnewdt) then

       ifnewdt = .false.

   endif

   if (ifvarp(ifield)) ifupdate = .true.

   if (iflomach)       ifupdate = .true.


   ! If it has, recompute apx%H_red column by column

   if (ifupdate) then

     n_sav = apx%n_sav

     ! Loops over columns to update

     do k=1,n_sav

       apx%n_sav = k

       call hconj(apx, apx%n_sav, h1,h2,vml,vmk,ws)

     enddo

   endif


   return

 end subroutine updrhsh


 !-----------------------------------------------------------------------

 subroutine hmhzpf(name,u,r,h1,h2,mask,mult,imesh,tli,maxit,isd,bi)

   use kinds, only : dp

   use size_m, only : lx1, ly1, lz1

   use size_m, only : nx1, ny1, nz1, nelv, nelt, ndim

   use ctimer, only : etime1, dnekclock, thmhz

   use fdmh1, only : kfldfdm

   use input, only : param

   implicit none


   CHARACTER(4) ::    NAME

   REAL(DP), intent(out) :: U    (lx1,ly1,lz1,1) !>

   REAL(DP), intent(in)  :: R    (lx1,ly1,lz1,1) !>

   REAL(DP), intent(in)  :: H1   (lx1,ly1,lz1,1) !>

   REAL(DP), intent(in)  :: H2   (lx1,ly1,lz1,1) !>

   REAL(DP), intent(in)  :: MASK (lx1,ly1,lz1,1) !>

   REAL(DP), intent(in)  :: MULT (lx1,ly1,lz1,1) !>

   REAL(DP), intent(in)  :: bi   (lx1,ly1,lz1,1) !>

   real(DP) :: tli

   integer :: imesh, maxit, isd


   integer :: ntot

   real(DP) :: tol


   etime1=dnekclock()


   IF (imesh == 1) ntot = nx1*ny1*nz1*nelv

   IF (imesh == 2) ntot = nx1*ny1*nz1*nelt


   tol = tli

   if (param(22) /= 0) tol = abs(param(22))

   CALL chktcg1(tol,r,h1,h2,mask,mult,imesh,isd)


 !   Set flags for overlapping Schwarz preconditioner (pff 11/12/98)


   kfldfdm = -1

 !   if (name.eq.'TEMP') kfldfdm =  0

 !   if (name.eq.'VELX') kfldfdm =  1

 !   if (name.eq.'VELY') kfldfdm =  2

 !   if (name.eq.'VELZ') kfldfdm =  3

   if (name == 'PRES') kfldfdm =  ndim+1


   call cggo &

   (u,r,h1,h2,mask,mult,imesh,tol,maxit,isd,bi,name)

   thmhz=thmhz+(dnekclock()-etime1)


   return

 end subroutine hmhzpf


 !-----------------------------------------------------------------------

 subroutine hsolve(name,u,r,h1,h2,vmk,vml,imsh,tol,maxit,isd &

     ,apx,bi)

   use kinds, only : dp

   use size_m, only : lx1, ly1, lz1, lelv

   use input, only : param

   use string, only : capit

   use tstep, only : ifield, nelfld, istep

   implicit none


   CHARACTER(4), intent(in) :: NAME !>

   REAL(DP), intent(out)   :: U    (lx1,ly1,lz1,lelv) !>

   REAL(DP), intent(inout) :: R    (lx1,ly1,lz1,lelv) !>

   REAL(DP), intent(in)    :: H1   (lx1,ly1,lz1,lelv) !>

   REAL(DP), intent(in)    :: H2   (lx1,ly1,lz1,lelv) !>

   REAL(DP), intent(in)    :: vmk  (lx1,ly1,lz1,lelv) !>

   REAL(DP), intent(in)    :: vml  (lx1,ly1,lz1,lelv) !>

   integer,  intent(in)    :: imsh                 !>

   real(DP), intent(in)    :: tol                  !>

   integer,  intent(in)    :: maxit                !>

   integer,  intent(in)    :: isd                  !>

   type(approx_space), intent(inout) :: apx !>

   REAL(DP), intent(in)    :: bi   (lx1,ly1,lz1,*) !>


   real(DP), allocatable :: w1(:)

   real(DP), allocatable :: w2(:)


   logical :: ifstdh

   character(4) ::  cname

   integer :: nel

   real(DP) :: rinit

   real(DP), external :: glsc23


   call chcopy(cname,name,4)

   call capit(cname,4)


   ! figure out if we're projecting or not

   ifstdh = .true.

   ! Is this a pressure solve?

   if (cname == 'PRES') then

     if (param(95) /= 0 .AND. istep > param(95) .and. param(93) > 0) then

       ifstdh = .false.

     endif

   ! Is this a velocity solve?

   elseif (cname == 'VELX' .or. cname == 'VELY' .or. cname == 'VELZ') then

     if (param(94) /= 0 .AND. istep > param(94) .and. param(92) > 0) then

       ifstdh = .false.

     endif

   endif


   if (ifstdh) then


     call hmholtz(name,u,r,h1,h2,vmk,vml,imsh,tol,maxit,isd)


   else


       nel = nelfld(ifield)


       call dssum(r)

       r(:,:,:,1:nel) = r(:,:,:,1:nel) * vmk(:,:,:,1:nel)


       allocate(w2(2+2*apx%n_max))

       allocate(w1(lx1*ly1*lz1*lelv))

       call projh(r,h1,h2,bi,vml,vmk,apx,w1,w2,name)

       deallocate(w1)


       call hmhzpf(name,u,r,h1,h2,vmk,vml,imsh,tol,maxit,isd,bi)

       call gensh(u,h1,h2,vml,vmk,apx,w2)


   endif


   return

 end subroutine hsolve

 !-----------------------------------------------------------------------


 end module helmholtz

helmholtz::updrhsh
subroutine, private updrhsh(apx, h1, h2, vml, vmk, ws)
Recompute H_red if dt has changed.
Definition: navier4.F90:301

dssum
subroutine dssum(u)
Direct stiffness sum.
Definition: dssum.F90:54

tstep
cleaned
Definition: tstep_mod.F90:2

input
Input parameters from preprocessors.
Definition: input_mod.F90:11

helmholtz::approx_space
Type to hold the approximation space. Should not be modified outside this module, so more of a handle...
Definition: navier4.F90:25

ctimer
Definition: ctimer_mod.F90:1

helmholtz::hmhzpf
subroutine, private hmhzpf(name, u, r, h1, h2, mask, mult, imesh, tli, maxit, isd, bi)
Definition: navier4.F90:344

axhelm
subroutine axhelm(au, u, helm1, helm2, imesh, isd)
Compute the (Helmholtz) matrix-vector product, AU = helm1*[A]u + helm2*[B]u, for NEL elements...
Definition: hmholtz.F90:79

helmholtz::hsolve
subroutine, public hsolve(name, u, r, h1, h2, vmk, vml, imsh, tol, maxit, isd, apx, bi)
Either std. Helmholtz solve, or a projection + Helmholtz solve.
Definition: navier4.F90:396

mesh
cleaned
Definition: mesh_mod.F90:2

ctimer::dnekclock
real(dp) function dnekclock()
Definition: ctimer_mod.F90:103

glsc2
real(dp) function glsc2(x, y, n)
Perform inner-product in double precision.
Definition: math.F90:210

copy
subroutine copy(a, b, n)
Definition: math.F90:52

chktcg1
subroutine chktcg1(tol, res, h1, h2, mask, mult, imesh, isd)
Check that the tolerances are not too small for the CG-solver. Important when calling the CG-solver (...
Definition: hmholtz.F90:637

parallel
cleaned
Definition: parallel_mod.F90:2

helmholtz::init_approx_space
subroutine, public init_approx_space(apx, n_max, ntot)
Initialize approximation space object.
Definition: navier4.F90:41

helmholtz
Definition: navier4.F90:15

string::capit
subroutine capit(lettrs, n)
Capitalizes string of length n.
Definition: string_mod.F90:161

helmholtz::projh
subroutine, private projh(r, h1, h2, bi, vml, vmk, apx, wl, ws, name4)
Project out the part of the residual in the approx space.
Definition: navier4.F90:62

geom
Geometry arrays.
Definition: geom_mod.F90:2

helmholtz::gensh
subroutine, private gensh(v1, h1, h2, vml, vmk, apx, ws)
Reconstruct the solution to the original problem by adding back the approximate solution, add solution to approximation space.
Definition: navier4.F90:215

fdmh1
Definition: fdmh1_mod.F90:1

gop
subroutine gop(x, w, op, n)
Global vector commutative operation.
Definition: comm_mpi.F90:104

cggo
subroutine cggo(x, f, h1, h2, mask, mult, imsh, tin, maxit, isd, binv, name)
Solve the Helmholtz equation, H*U = RHS, using preconditioned conjugate gradient iteration. Preconditioner: diag(H).
Definition: hmholtz.F90:744

chcopy
subroutine chcopy(a, b, n)
Definition: math.F90:63

kinds
Definition: kinds_mod.F90:1

helmholtz::hconj
subroutine, private hconj(apx, k, h1, h2, vml, vmk, ws)
Update the k-th row/column of H_red.
Definition: navier4.F90:250

string
Definition: string_mod.F90:1

hmholtz
subroutine hmholtz(name, u, rhs, h1, h2, mask, mult, imsh, tli, maxit, isd)
Definition: hmholtz.F90:2