Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
gmres.F90
Go to the documentation of this file.
1 
2 
3 
4 !-----------------------------------------------------------------------
6 subroutine ax(w,x,h1,h2,n)
7  use kinds, only : dp
8  use soln, only : pmask
9  implicit none
10 
11  integer :: n
12  real(DP) :: w(n),x(n),h1(n),h2(n)
13  integer :: imsh, isd
14 
15  imsh = 1
16  isd = 1
17  call axhelm(w,x,h1,h2,imsh,isd)
18  call dssum(w)
19  w = w * reshape(pmask, (/ n /))
20 
21  return
22 end subroutine ax
23 
24 !-----------------------------------------------------------------------
27 subroutine hmh_gmres(res,h1,h2,wt,iter)
28  use kinds, only : dp
29  use size_m, only : lx1, ly1, lz1, lx2, ly2, lz2, lelv, lgmres
30  use size_m, only : nx1, ny1, nz1, nelv, nid
31  use ctimer, only : tgmres, ngmres, gmres_flop, gmres_mop, dnekclock
32  use input, only : param, ifmgrid, ifprint
33  use geom, only : volvm1
34  use soln, only : pmask, vmult
35  use tstep, only : tolps, istep
36  use hsmg_routines, only : h1mg_solve
37  implicit none
38 
39  real(DP) :: res (lx1*ly1*lz1*lelv)
40  real(DP) :: h1 (lx1,ly1,lz1,lelv)
41  real(DP) :: h2 (lx1,ly1,lz1,lelv)
42  real(DP) :: wt (lx1*ly1*lz1*lelv)
43  integer :: iter
44 
45  real(DP), allocatable :: x(:),r(:),w(:)
46  real(DP), allocatable :: h(:,:),gamma(:)
47  real(DP), allocatable :: c(:),s(:) ! store the Givens rotations
48  real(DP), allocatable :: v(:,:) ! stores the orthogonal Krylov subspace basis
49  real(DP), allocatable :: z(:,:) ! Z = M**(-1) V
50 
51  real(DP) :: etime
52 
53  real(DP) :: divex
54 
55  real(DP) :: wk1(lgmres)
56 
57  real(DP) :: alpha, l, temp
58  integer :: outer
59 
60  logical, save :: iflag = .false., if_hyb = .false.
61  real(DP), save :: norm_fac
62 
63  real(DP) :: etime1, etime2, etime_p
64  real(DP) :: rnorm, tolpss, div0, ratio
65  real(DP), external :: glsc3, vlsc3
66  real(DP), parameter :: one = 1._dp, zero = 0._dp, mone = -1._dp
67 
68  integer :: m, n
69  integer :: i, j, k, iconv
70 
71  ngmres = ngmres + 1
72  !call hpm_start('gmres')
73  etime = dnekclock()
74 
76  allocate(h(lgmres,lgmres))
77  allocate(gamma(lgmres+1))
78  allocate(c(lgmres), s(lgmres))
79 
80  allocate(v(lx2*ly2*lz2*lelv,lgmres)) ! verified
81  allocate(z(lx2*ly2*lz2*lelv,lgmres)) ! verified
82 
83 
84 ! if (.not. allocated(ml)) allocate(ml(lx2*ly2*lz2*lelv))
85 ! if (.not. allocated(mu)) allocate(mu(lx2*ly2*lz2*lelv))
86 
88  h = 0._dp; gamma=0._dp; c = 0._dp; s = 0._dp
89 
90 
91  n = nx1*ny1*nz1*nelv
92 
93  etime1 = dnekclock()
94  etime_p = 0.
95  divex = 0.
96  iter = 0
97  m = lgmres
98 
99  if( .NOT. iflag) then
100  iflag= .true.
101 ! call uzawa_gmres_split(ml,mu,n)
102  norm_fac = 1./sqrt(volvm1)
103  endif
104 
106  !allocate(d(lx1*ly1*lz1*lelv))
107  !if (param(100) /= 2) call set_fdm_prec_h1b(d,h1,h2,nelv)
108  !deallocate(d)
109 
110  call chktcg1(tolps,res,h1,h2,pmask,vmult,1,1)
111  if (param(21) > 0 .AND. tolps > abs(param(21))) &
112  tolps = abs(param(21))
113  if (istep == 0) tolps = 1.e-4
114  tolpss = tolps
115 
116  iconv = 0
117 
118  outer = 0
119  allocate(x(lx2*ly2*lz2*lelv)); x = 0._dp
120  allocate(w(lx2*ly2*lz2*lelv))
121  allocate(r(lx2*ly2*lz2*lelv))
122  do while (iconv == 0 .AND. iter < 50)
123  outer = outer+1
124 
125  if(iter == 0) then ! -1
126  r = res ! r = L res
127  ! call copy(r,res,n)
128  else
129  ! update residual
130  gmres_mop = gmres_mop + 5*n
131  gmres_flop = gmres_flop + n
132  r = res
133  etime = etime - dnekclock()
134  !call hpm_stop('gmres')
135  call ax(w,x,h1,h2,n) ! w = A x
136  !call hpm_start('gmres')
137  etime = etime + dnekclock()
138  r = r - w
139  ! -1
140  ! r = r * ml ! r = L r
141  endif
142  ! ______
143  gmres_mop = gmres_mop + 2*n
144  gmres_flop = gmres_flop + 3*n
145 #if 1
146  gamma(1) = sum(r*r*wt)
147  call gop(gamma,wk1,'+ ',1) ! sum over P procs
148  gamma(1) = sqrt(gamma(1))
149 #else
150  gamma(1) = sqrt(glsc3(r,r,wt,n)) ! gamma = \/ (r,r)
151 #endif
152  ! 1
153  if(iter == 0) then
154  div0 = gamma(1)*norm_fac
155  if (param(21) < 0) tolpss=abs(param(21))*div0
156  endif
157 
158  ! check for lucky convergence
159  rnorm = 0.
160  if(gamma(1) == 0.) goto 9000
161  gmres_flop = gmres_flop + n
162  gmres_mop = gmres_mop + 2*n
163  temp = 1./gamma(1)
164  v(:,1) = r(:) * temp ! v = r / gamma
165  !w(i) = v(i,1)
166  ! 1 1
167  do j=1,m
168  iter = iter+1
169  ! -1
170  ! gmres_mop = gmres_mop + n
171  ! w = v(:,j) ! w = U v
172  ! j
173 
174  ! . . . . . Overlapping Schwarz + coarse-grid . . . . . . .
175 
176  etime2 = dnekclock()
177 
178  ! if (outer.gt.2) if_hyb = .true. ! Slow outer convergence
179  if (ifmgrid) then
180  etime = etime - dnekclock()
181  !call hpm_stop('gmres')
182  call h1mg_solve(z(1,j),v(1,j),if_hyb) ! z = M w
183  !call hpm_start('gmres')
184  etime = etime + dnekclock()
185  else ! j
186  write(*,*) "Oops: ifmgrid"
187 #if 0
188  kfldfdm = ndim+1
189  if (param(100) == 2) then
190  call h1_overlap_2(z(1,j),w,pmask)
191  else
192  call fdm_h1 &
193  (z(1,j),w,d,pmask,vmult,nelv,ktype(1,1,kfldfdm),wk)
194  endif
195  call crs_solve_h1(wk,w) ! z = M w
196  call add2(z(1,j),wk,n) ! j
197 #endif
198  endif
199 
200  gmres_flop = gmres_flop + 2*n
201  gmres_mop = gmres_mop + 3*n
202  call ortho(z(1,j)) ! Orthogonalize wrt null space, if present
203  etime_p = etime_p + dnekclock()-etime2
204  ! . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
205 
206 
207  etime = etime - dnekclock()
208  !call hpm_stop('gmres')
209  call ax(w,z(1,j),h1,h2,n) ! w = A z
210  !call hpm_start('gmres')
211  etime = etime + dnekclock()
212 
213  gmres_mop = gmres_mop + 3*n + (j+1)*n
214  gmres_flop = gmres_flop + (2*n-1)*j + n
215  r = w * wt
216  call dgemv('T', n, j, &
217  one, v, n, &
218  r, 1, &
219  zero, h(1,j), 1)
220 
221  call gop(h(1,j),wk1,'+ ',j) ! sum over P procs
222 
223  gmres_mop = gmres_mop + (j+2)*n
224  gmres_flop = gmres_flop + (j*3)*n ! because alpha = -1
225  call dgemv('N', n, j, &
226  mone, v, n, &
227  h(1,j), 1, &
228  one, w, 1)
229 
230  ! apply Givens rotations to new column
231  do i=1,j-1
232  temp = h(i,j)
233  h(i ,j)= c(i)*temp + s(i)*h(i+1,j)
234  h(i+1,j)= -s(i)*temp + c(i)*h(i+1,j)
235  enddo
236  ! ______
237  gmres_mop = gmres_mop + 2*n
238  gmres_flop = gmres_flop + 3*n
239 #if 1
240  alpha = sum(w*w*wt)
241  call gop(alpha, wk1, '+ ', 1)
242  alpha = sqrt(alpha)
243 #else
244  alpha = sqrt(glsc3(w,w,wt,n)) ! alpha = \/ (w,w)
245 #endif
246  rnorm = 0.
247  if(alpha == 0.) goto 900 !converged
248  l = sqrt(h(j,j)*h(j,j)+alpha*alpha)
249  temp = 1./l
250  c(j) = h(j,j) * temp
251  s(j) = alpha * temp
252  h(j,j) = l
253  gamma(j+1) = -s(j) * gamma(j)
254  gamma(j) = c(j) * gamma(j)
255  rnorm = abs(gamma(j+1))*norm_fac
256  ratio = rnorm/div0
257  if (ifprint .AND. nid == 0) &
258  write (6,66) iter,tolpss,rnorm,div0,ratio,istep
259  66 format(i5,1p4e12.5,i8,' Divergence')
260 
261 #ifndef TST_WSCAL
262  if (rnorm < tolpss) goto 900 !converged
263 #else
264  if (iter > param(151)-1) goto 900
265 #endif
266  if (j == m) goto 1000 !not converged, restart
267 
268  gmres_mop = gmres_mop + 2*n
269  gmres_flop = gmres_flop + n
270  temp = 1./alpha
271  v(:,j+1) = w(:) * temp ! v = w / alpha
272  !w(i) = v(i,j+1)
273  ! j+1
274  enddo
275  900 iconv = 1
276  1000 continue
277  ! back substitution
278  ! -1
279  !c = H gamma
280  do k=j,1,-1
281  temp = gamma(k)
282  do i=j,k+1,-1
283  temp = temp - h(k,i)*c(i)
284  enddo
285  c(k) = temp/h(k,k)
286  enddo
287  ! sum up Arnoldi vectors
288  gmres_flop = gmres_flop + 2*n * j
289  gmres_mop = gmres_mop + (j+2)*n
290  call dgemv('N', n, j, &
291  one, z, n, &
292  c, 1, &
293  one, x, 1)
294  !do i=1,j
295  ! x = x + z(:,i) * c(i) ! x = x + c z
296  !enddo ! i i
297  ! if(iconv.eq.1) call dbg_write(x,nx1,ny1,nz1,nelv,'esol',3)
298  enddo
299  9000 continue
300 
301  divex = rnorm
302  gmres_mop = gmres_mop + 2*n
303  res = x
304 
305  gmres_flop = gmres_flop + 2*n
306  gmres_mop = gmres_mop + 3*n
307  call ortho(res) ! Orthogonalize wrt null space, if present
308 
309  etime1 = dnekclock()-etime1
310  if (nid == 0) write(6,9999) istep,iter,divex,tolpss,div0,etime_p, &
311  etime1,if_hyb
312 ! call flush_hack
313  9999 format(i9,' PRES gmres:',i5,1p5e12.4,1x,l4)
314 
315  if (outer <= 2) if_hyb = .false.
316 
317  tgmres = tgmres + (dnekclock() - etime)
318  !call hpm_stop('gmres')
319 
320  return
321 end subroutine hmh_gmres
322 !-----------------------------------------------------------------------
323 
static double sum(struct xxt *data, double v, uint n, uint tag)
Definition: xxt.c:400
subroutine ax(w, x, h1, h2, n)
w = A*x for pressure iteration
Definition: gmres.F90:6
subroutine dssum(u)
Direct stiffness sum.
Definition: dssum.F90:54
cleaned
Definition: tstep_mod.F90:2
Input parameters from preprocessors.
Definition: input_mod.F90:11
Definition: soln_mod.F90:1
subroutine axhelm(au, u, helm1, helm2, imesh, isd)
Compute the (Helmholtz) matrix-vector product, AU = helm1*[A]u + helm2*[B]u, for NEL elements...
Definition: hmholtz.F90:79
real(dp) function dnekclock()
Definition: ctimer_mod.F90:103
subroutine, public h1mg_solve(z, rhs, if_hybrid)
Solve preconditioner: z = M rhs, where .
Definition: hsmg.F90:111
subroutine hmh_gmres(res, h1, h2, wt, iter)
Solve the Helmholtz equation by right-preconditioned GMRES iteration.
Definition: gmres.F90:27
subroutine chktcg1(tol, res, h1, h2, mask, mult, imesh, isd)
Check that the tolerances are not too small for the CG-solver. Important when calling the CG-solver (...
Definition: hmholtz.F90:637
Geometry arrays.
Definition: geom_mod.F90:2
subroutine gop(x, w, op, n)
Global vector commutative operation.
Definition: comm_mpi.F90:104