Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
hsmg.F90
Go to the documentation of this file.
1 !-----------------------------------------------------------------------
18 !-----------------------------------------------------------------------
19 
20 ! Some relevant parameters
21 
22 ! param(41):
23 ! 0 - use additive SEMG
24 ! 1 - use hybrid SEMG (not yet working... but coming soon!)
25 
26 ! param(42): navier0.f, fasts.f
27 ! 0 - use GMRES for iterative solver, use non-symmetric weighting
28 ! 1 - use PCG for iterative solver, do not use weighting
29 
30 ! param(43): uzawa_gmres.f, navier6.f
31 ! 0 - use additive multilevel scheme (requires param(42).eq.0)
32 ! 1 - use original 2 level scheme
33 
34 ! param(44): fast3d.f, navier6.f
35 ! 0 - base top-level additive Schwarz on restrictions of E
36 ! 1 - base top-level additive Schwarz on restrictions of A
37 
38 !----------------------------------------------------------------------
39 
41 
42  private
43  public :: h1mg_setup, h1mg_solve
44  public :: hsmg_setup
45 
46 contains
47 
48 !----------------------------------------------------------------------
50 subroutine hsmg_setup()
51  use hsmg, only : mg_fld
52  use tstep, only : ifield
53  implicit none
54 
55  mg_fld = 1
56  if (ifield > 1) mg_fld = 2
57  if (ifield == 1) call hsmg_index_0 ! initialize index sets
58 
59  call hsmg_setup_mg_nx ! set nx values for each level of multigrid
60  call hsmg_setup_semhat ! set spectral element hat matrices
61  call hsmg_setup_intp
62  call hsmg_setup_dssum ! set direct stiffness summation handles
63  call hsmg_setup_wtmask ! set restriction weight matrices and bc masks
64  call hsmg_setup_fdm ! set up fast diagonalization method
65  call hsmg_setup_schwarz_wt( .false. )
66  call hsmg_setup_solve ! set up the solver
67 ! call hsmg_setup_dbg
68 
69  return
70 end subroutine hsmg_setup
71 
72 !----------------------------------------------------------------------
74 subroutine h1mg_setup()
75  use size_m, only : nx1, ny1, nz1, nelt
76  use hsmg, only : mg_h1_lmax
77  use input, only : param
78  implicit none
79 
80  integer :: p_msk, n, l
81 
82  param(59) = 1
83  call geom_reset(1) ! Recompute g1m1 etc. with deformed only
84 
85  n = nx1*ny1*nz1*nelt
86 
87  call h1mg_setup_mg_nx
88  call h1mg_setup_semhat ! SEM hat matrices for each level
89  call hsmg_setup_intp ! Interpolation operators
90  call h1mg_setup_dssum ! set direct stiffness summation handles
91  call h1mg_setup_wtmask ! set restriction weight matrices and bc masks
92  call h1mg_setup_fdm ! set up fast diagonalization method
93  call h1mg_setup_schwarz_wt( .false. )
94  call hsmg_setup_solve ! set up the solver
95 
96  l=mg_h1_lmax
99 ! call mg_set_h1 (p_h1 ,l)
100 ! call mg_set_h2 (p_h2 ,l)
101 ! call mg_set_gb (p_g,p_b,l)
102  call mg_set_msk(p_msk,l)
103 
104  return
105 end subroutine h1mg_setup
106 
107 !----------------------------------------------------------------------
111 subroutine h1mg_solve(z,rhs,if_hybrid)
112  use kinds, only : dp
113  use size_m, only : lx1, ly1, lz1, lelt
114  use hsmg, only : mg_h1_lmax, mg_h1_n, p_mg_msk, mg_imask, mg_fld ! Same array space as HSMG
115  use tstep, only : nelfld, ifield
116  use ctimer, only : th1mg, nh1mg, h1mg_flop, h1mg_mop, dnekclock
117  implicit none
118 
119  real(DP), intent(out) :: z(*) !>
120  real(DP), intent(in) :: rhs(*) !>
121  logical, intent(in) :: if_hybrid !>
122 
123  integer, parameter :: lt=lx1*ly1*lz1*lelt
124  real(DP), allocatable :: e(:),w(:),r(:)
125  integer :: p_msk
126 
127  real(DP) :: op, om, sigma
128  integer :: nel, l, n, is, im, i1, i
129 
130  real(DP) :: etime
131 
132  nh1mg = nh1mg + 1
133  etime = dnekclock()
134 
135  nel = nelfld(ifield)
136 
137  op = 1. ! Coefficients for h1mg_ax
138  om = -1.
139  sigma = 1.
140  if (if_hybrid) sigma = 2./3.
141 
142  l = mg_h1_lmax
143  n = mg_h1_n(l,mg_fld)
144  is = 1 ! solve index
145  etime = etime - dnekclock()
146  call h1mg_schwarz(z,rhs,sigma,l) ! z := sigma W M rhs
147  etime = etime + dnekclock()
148 
149  allocate(r(lt))
150  h1mg_mop = h1mg_mop + 2*n
151  call copy(r,rhs,n) ! r := rhs
152 !max if (if_hybrid) call h1mg_axm(r,z,op,om,l,w) ! r := rhs - A z
153 ! l
154 
155  h1mg_mop = h1mg_mop + 2*lt
156  allocate(e(2*lt)); e = 0_dp
157  do l = mg_h1_lmax-1,2,-1 ! DOWNWARD Leg of V-cycle
158  is = is + n
159  n = mg_h1_n(l,mg_fld)
160  ! T
161  call h1mg_rstr(r,l, .true. ) ! r := J r
162  ! l l+1
163  ! OVERLAPPING Schwarz exchange and solve:
164  etime = etime - dnekclock()
165  call h1mg_schwarz(e(is),r,sigma,l) ! e := sigma W M r
166  etime = etime + dnekclock()
167  ! l Schwarz l
168 
169 !max if(if_hybrid)call h1mg_axm(r,e(is),op,om,l,w)! r := r - A e
170  ! l l
171  enddo
172  is = is+n
173 ! T
174  call h1mg_rstr(r,1, .false. ) ! r := J r
175 ! l l+1
176  p_msk = p_mg_msk(l,mg_fld)
177  etime = etime - dnekclock()
178  call h1mg_mask(r,mg_imask(p_msk),nel) ! -1
179  call hsmg_coarse_solve( e(is:is+mg_h1_n(1,mg_fld)-1) , r(1:mg_h1_n(1,mg_fld)) ) ! e := A r
180  call h1mg_mask(e(is),mg_imask(p_msk),nel) ! 1 1 1
181  etime = etime + dnekclock()
182  deallocate(r)
183 
184  h1mg_mop = h1mg_mop + lt
185  allocate(w(lt)); w = 0_dp
186  do l = 2,mg_h1_lmax-1 ! UNWIND. No smoothing.
187  im = is
188  is = is - n
189  n = mg_h1_n(l,mg_fld)
190  call hsmg_intp(w,e(im),l-1) ! w := J e
191  i1=is-1 ! l-1
192  h1mg_flop = h1mg_flop + n
193  h1mg_mop = h1mg_mop + 3*n
194  do i=1,n
195  e(i1+i) = e(i1+i) + w(i) ! e := e + w
196  enddo ! l l
197  enddo
198 
199  l = mg_h1_lmax
200  n = mg_h1_n(l,mg_fld)
201  im = is ! solve index
202  call hsmg_intp(w,e(im),l-1) ! w := J e
203  h1mg_flop = h1mg_flop + n
204  h1mg_mop = h1mg_mop + 3*n
205  do i = 1,n ! l-1
206  z(i) = z(i) + w(i) ! z := z + w
207  enddo
208  deallocate(w,e)
209 
210  call dsavg(z) ! Emergency hack --- to ensure continuous z!
211 
212  th1mg = th1mg + (dnekclock() - etime)
213 
214  return
215 end subroutine h1mg_solve
216 
217 !----------------------------------------------------------------------
218 !----------------------------------------------------------------------
219 ! PRIVATE IMPLEMENTATIONS
220 !----------------------------------------------------------------------
221 !----------------------------------------------------------------------
223  use input, only : if3d
224  use hsmg, only : mg_zh, mg_lmax, mg_ah, mg_bh, mg_dh, mg_dht, mg_zh
225  use hsmg, only : mg_nx, mg_nh, mg_nhz, mg_nz
226  use semhat, only : ah, bh, ch, dh, zh, dph, jph, bgl, zgl, dgl, jgl, wh
227  implicit none
228 
229  integer :: n,l
230 ! generate the SEM hat matrices for each level
231 ! top level
232  n = mg_nx(mg_lmax)
233  call generate_semhat(ah,bh,ch,dh,zh,dph,jph,bgl,zgl,dgl,jgl,n,wh)
234  call copy(mg_zh(1,mg_lmax),zgl,n-1) !top level based on gl points
235  mg_nh(mg_lmax)=n-1
236  mg_nhz(mg_lmax)=n-1
237  if( .NOT. if3d) mg_nhz(mg_lmax)=1
238 ! lower levels
239  do l=1,mg_lmax-1
240  n = mg_nx(l)
241  if(n > 1) then
242  call generate_semhat(ah,bh,ch,dh,zh,dph,jph,bgl,zgl,dgl,jgl,n,wh)
243  call copy(mg_ah(1,l),ah,(n+1)*(n+1))
244  call copy(mg_bh(1,l),bh,n+1)
245  call copy(mg_dh(1,l),dh,(n+1)*(n+1))
246  call transpose(mg_dht(1,l),n+1,dh,n+1)
247  call copy(mg_zh(1,l),zh,n+1)
248  else
249  mg_zh(1,l) = -1.
250  mg_zh(2,l) = 1.
251  endif
252  mg_nh(l)=n+1
253  mg_nhz(l)=mg_nz(l)+1
254  enddo
255 end subroutine hsmg_setup_semhat
256 
257 !----------------------------------------------------------------------
258 subroutine hsmg_setup_intp
259  use hsmg, only : mg_lmax, mg_nh, mg_jh, mg_zh, mg_jht!, mg_jhfc, mg_jhfct
260  implicit none
261 
262  integer :: l,nf,nc
263 
264  do l=1,mg_lmax-1
265 
266  nf=mg_nh(l+1)
267  nc=mg_nh(l)
268 
269  ! Standard multigrid coarse-to-fine interpolation
270  call hsmg_setup_intpm( &
271  mg_jh(1,l),mg_zh(1,l+1),mg_zh(1,l),nf,nc)
272  call transpose(mg_jht(1,l),nc,mg_jh(1,l),nf)
273 
274  ! Fine-to-coarse interpolation for variable-coefficient operators
275 ! call hsmg_setup_intpm( &
276 ! mg_jhfc(1,l),mg_zh(1,l),mg_zh(1,l+1),nc,nf)
277 ! call transpose(mg_jhfct(1,l),nf,mg_jhfc(1,l),nc)
278  ! call outmat(mg_jhfc(1,l),nc,nf,'MG_JHFC',l)
279 
280  enddo
281 end subroutine hsmg_setup_intp
282 
283 !----------------------------------------------------------------------
284 subroutine hsmg_setup_intpm(jh,zf,zc,nf,nc)
285  use kinds, only : dp
286  use size_m, only : lx1
287  implicit none
288  integer :: nf,nc
289  real(DP) :: jh(nf,nc),zf(*),zc(*)
290  real(DP) :: w(2*lx1+2)
291 
292  integer :: i, j
293  do i=1,nf
294  call fd_weights_full(zf(i),zc,nc-1,1,w)
295  do j=1,nc
296  jh(i,j)=w(j)
297  enddo
298  enddo
299  return
300 end subroutine hsmg_setup_intpm
301 
302 !----------------------------------------------------------------------
304  use kinds, only : i8
305  use size_m, only : lx1, ly1, lz1, lelv, nelv, ndim
306  use input, only : if3d
307  use hsmg, only : mg_lmax, mg_nh, mg_nhz, mg_fld
308  use hsmg, only : mg_gsh_handle, mg_gsh_schwarz_handle
309  use mesh, only : vertex
310  use parallel, only : nelgv
311  implicit none
312 
313  integer, parameter :: lxyz=(lx1+2)*(ly1+2)*(lz1+2)
314 
315  integer(i8), allocatable :: glo_num(:)
316  integer :: nx,ny,nz, ncrnr
317  integer :: l
318 
319 
320 ! set up direct stiffness summation for each level
321  ncrnr = 2**ndim
322  call get_vert()
323 
324  allocate(glo_num(lxyz*lelv))
325 
326  do l=1,mg_lmax-1
327  nx=mg_nh(l)
328  ny=mg_nh(l)
329  nz=mg_nhz(l)
330  call setupds(mg_gsh_handle(l,mg_fld),nx,ny,nz &
331  ,nelv,nelgv,vertex,glo_num)
332  nx=nx+2
333  ny=ny+2
334  nz=nz+2
335  if( .NOT. if3d) nz=1
336  call setupds(mg_gsh_schwarz_handle(l,mg_fld),nx,ny,nz &
337  ,nelv,nelgv,vertex,glo_num)
338  enddo
339 end subroutine hsmg_setup_dssum
340 
341 !----------------------------------------------------------------------
343  use kinds, only : dp
344  use size_m, only : ndim, ldim, nelv, lelv
345  use hsmg, only : mg_mask_index, mg_lmax, mg_rstr_wt_index, mg_nh, mg_nhz
346  use hsmg, only : mg_fld, lmgs, lmg_rwt, mg_rstr_wt
347  implicit none
348 
349  real(DP), allocatable :: work(:)
350  integer :: i,l, itmp
351 
352  allocate(work(maxval(mg_nh(1:mg_lmax))*maxval(mg_nh(1:mg_lmax))*maxval(mg_nhz(1:mg_lmax))* nelv))
353 
354  i = mg_mask_index(mg_lmax,mg_fld-1)
355  do l=1,mg_lmax
356  mg_rstr_wt_index(l,mg_fld)=i
357  mg_mask_index(l,mg_fld)=i
358  i=i+mg_nh(l)*mg_nhz(l)*2*ndim*nelv
359  if(i > lmgs*lmg_rwt*2*ldim*lelv) then
360  itmp = i/(2*ldim*lelv)
361  write(6,*) 'parameter lmg_rwt too small',i,itmp,lmg_rwt
362  call exitt
363  endif
364  call hsmg_setup_rstr_wt( mg_rstr_wt(mg_rstr_wt_index(l,mg_fld)) &
365  , mg_nh(l),mg_nh(l),mg_nhz(l),l, work)
366 ! call hsmg_setup_mask( mg_mask(mg_mask_index(l,mg_fld)) &
367 ! , mg_nh(l),mg_nh(l),mg_nhz(l),l, work)
368  enddo
369  mg_mask_index(l,mg_fld)=i
370 
371 end subroutine h1mg_setup_wtmask
372 
373 !----------------------------------------------------------------------
375  use kinds, only : dp
376  use size_m, only : ndim, ldim, nelv, lelv
377  use hsmg, only : mg_mask_index, mg_lmax, mg_rstr_wt_index, mg_nh, mg_nhz
378  use hsmg, only : lmgs, lmg_rwt, mg_rstr_wt, mg_fld
379  implicit none
380 
381  real(DP), allocatable :: work(:)
382  integer :: i,l, itmp
383 
384  allocate(work(maxval(mg_nh(1:mg_lmax-1)) &
385  *maxval(mg_nh(1:mg_lmax-1)) &
386  *maxval(mg_nhz(1:mg_lmax-1))* nelv))
387 
388  i = mg_mask_index(mg_lmax,mg_fld-1)
389  do l=1,mg_lmax-1
390  mg_rstr_wt_index(l,mg_fld)=i
391  mg_mask_index(l,mg_fld)=i
392  i=i+mg_nh(l)*mg_nhz(l)*2*ndim*nelv
393  if(i > lmgs*lmg_rwt*2*ldim*lelv) then
394  itmp = i/(2*ldim*lelv)
395  write(6,*) 'parameter lmg_rwt too small',i,itmp,lmg_rwt
396  call exitt
397  endif
398  call hsmg_setup_rstr_wt( mg_rstr_wt(mg_rstr_wt_index(l,mg_fld)) &
399  , mg_nh(l),mg_nh(l),mg_nhz(l),l,work)
400 ! call hsmg_setup_mask( mg_mask(mg_mask_index(l,mg_fld)) &
401 ! , mg_nh(l),mg_nh(l),mg_nhz(l),l,work)
402  enddo
403  mg_mask_index(l,mg_fld)=i
404 end subroutine hsmg_setup_wtmask
405 
406 !----------------------------------------------------------------------
407 subroutine hsmg_intp(uf,uc,l) ! l is coarse level
408  use kinds, only : dp
409  use hsmg, only : mg_nh, mg_jh, mg_jht
410  implicit none
411  real(DP) :: uf(*),uc(*)
412  integer :: l
413  call hsmg_tnsr(uf,mg_nh(l+1),uc,mg_nh(l),mg_jh(1,l),mg_jht(1,l))
414  return
415 end subroutine hsmg_intp
416 
417 !----------------------------------------------------------------------
419 subroutine hsmg_tnsr(v,nv,u,nu,A,At)
420  use kinds, only : dp
421  use input, only : if3d
422  implicit none
423 
424  integer :: nv,nu
425  real(DP) :: v(*),u(*),A(*),At(*)
426 
427  if ( .NOT. if3d) then
428 !max call hsmg_tnsr2d(v,nv,u,nu,A,At)
429  else
430  call hsmg_tnsr3d(v,nv,u,nu,a,at,at)
431  endif
432  return
433 end subroutine hsmg_tnsr
434 
435 !----------------------------------------------------------------------
437 subroutine hsmg_tnsr3d(v,nv,u,nu,A,Bt,Ct)
438  use kinds, only : dp
439  use ctimer, only : h1mg_flop, h1mg_mop
440  use size_m
441  implicit none
442 
443  integer :: nv,nu
444  real(DP) :: v(nv*nv*nv,nelv),u(nu*nu*nu,nelv),A(*),Bt(*),Ct(*)
445 
446  integer, parameter :: lwk=(lx1+2)*(ly1+2)*(lz1+2)
447  real(DP) :: work(0:lwk-1),work2(0:lwk-1)
448  integer :: ie, i
449 
450  h1mg_flop = h1mg_flop + nelv * nv * (2*nu - 1) * nu * nu
451  h1mg_flop = h1mg_flop + nelv * nv * nv * (2*nu - 1) * nu
452  h1mg_flop = h1mg_flop + nelv * nv * nv * nv * (2*nu - 1)
453  h1mg_mop = h1mg_mop + nv**3 + nu**3
454 
455  do ie=1,nelv
456  call mxm(a,nv,u(1,ie),nu,work,nu*nu)
457  do i=0,nu-1
458  call mxm(work(nv*nu*i),nv,bt,nu,work2(nv*nv*i),nv)
459  enddo
460  call mxm(work2,nv*nv,ct,nu,v(1,ie),nv)
461  enddo
462  return
463 end subroutine hsmg_tnsr3d
464 
465 !----------------------------------------------------------------------
467 subroutine hsmg_tnsr3d_el(v,nv,u,nu,A,Bt,Ct)
468  use kinds, only : dp
469  use ctimer, only : schw_flop, schw_mop
470  use size_m, only : lx1, ly1, lz1
471  implicit none
472 
473  integer :: nv,nu
474  real(DP) :: v(nv*nv*nv),u(nu*nu*nu),A(*),Bt(*),Ct(*)
475 
476  integer, parameter :: lwk=(lx1+2)*(ly1+2)*(lz1+2)
477  real(DP) :: work(0:lwk-1),work2(0:lwk-1)
478  integer :: i
479 
480  schw_flop = schw_flop + nv*nu*nu*(2*nu-1)+ nv*nv*nu*(2*nu-1)+ nv*nv*nv*(2*nu-1)
481  schw_mop = schw_mop + nu**3 + nv**3
482 
483  call mxm(a,nv,u,nu,work,nu*nu)
484  do i=0,nu-1
485  call mxm(work(nv*nu*i),nv,bt,nu,work2(nv*nv*i),nv)
486  enddo
487  call mxm(work2,nv*nv,ct,nu,v,nv)
488 
489  return
490 end subroutine hsmg_tnsr3d_el
491 
492 !----------------------------------------------------------------------
493 subroutine hsmg_dssum(u,l)
494  use kinds, only : dp
495  use ctimer, only : dnekclock, etime1, tdadd, ifsync
496  use hsmg, only : mg_gsh_handle, mg_fld
497  implicit none
498  real(DP) :: u(*)
499  integer :: l
500 
501  if (ifsync) call nekgsync()
502 #ifndef NOTIMER
503  etime1=dnekclock()
504 #endif
505  call gs_op(mg_gsh_handle(l,mg_fld),u,1,1,0)
506 #ifndef NOTIMER
507  tdadd =tdadd + dnekclock()-etime1
508 #endif
509 
510  return
511 end subroutine hsmg_dssum
512 
513 !----------------------------------------------------------------------
514 subroutine hsmg_dsprod(u,l)
515  use kinds, only : dp
516  use ctimer, only : ifsync
517  use hsmg, only : mg_gsh_handle, mg_fld
518  implicit none
519  real(DP) :: u(1)
520  integer :: l
521 
522  if (ifsync) call nekgsync()
523 
524  call gs_op(mg_gsh_handle(l,mg_fld),u,1,2,0)
525  return
526 end subroutine hsmg_dsprod
527 
528 !----------------------------------------------------------------------
529 subroutine hsmg_schwarz_dssum(u,l)
530  use kinds, only : dp
531  use ctimer, only : ifsync, etime1, dnekclock, tdadd
532  use hsmg, only : mg_gsh_schwarz_handle, mg_fld
533  implicit none
534  real(DP) :: u(1)
535  integer :: l
536 
537  if (ifsync) call nekgsync()
538 #ifndef NOTIMER
539  etime1=dnekclock()
540 #endif
541  call gs_op(mg_gsh_schwarz_handle(l,mg_fld),u,1,1,0)
542 #ifndef NOTIMER
543  tdadd =tdadd + dnekclock()-etime1
544 #endif
545  return
546 end subroutine hsmg_schwarz_dssum
547 
548 !----------------------------------------------------------------------
549 subroutine hsmg_extrude(arr1,l1,f1,arr2,l2,f2,nx,ny,nz)
550  use kinds, only : dp
551  use size_m, only : nelv
552  use ctimer, only : schw_flop, schw_mop
553  use input, only : if3d
554  implicit none
555 
556  integer :: l1,l2,nx,ny,nz
557  real(DP) :: arr1(nx,ny,nz,nelv),arr2(nx,ny,nz,nelv)
558  real(DP) :: f1,f2
559 
560  integer :: i,j,k,ie,i0,i1
561  i0=2
562  i1=nx-1
563 
564  if( .NOT. if3d) then
565  do ie=1,nelv
566  do j=i0,i1
567  arr1(l1+1 ,j,1,ie) = f1*arr1(l1+1 ,j,1,ie) &
568  +f2*arr2(l2+1 ,j,1,ie)
569  arr1(nx-l1,j,1,ie) = f1*arr1(nx-l1,j,1,ie) &
570  +f2*arr2(nx-l2,j,1,ie)
571  enddo
572  do i=i0,i1
573  arr1(i,l1+1 ,1,ie) = f1*arr1(i,l1+1 ,1,ie) &
574  +f2*arr2(i,l2+1 ,1,ie)
575  arr1(i,ny-l1,1,ie) = f1*arr1(i,ny-l1,1,ie) &
576  +f2*arr2(i,nx-l2,1,ie)
577  enddo
578  enddo
579  else
580  schw_flop = schw_flop + nelv * 3 * (nx-2)**2 * 6
581  if (l1 == 0) then
582  schw_mop = schw_mop + 2 * nelv * nx*ny*nz
583  else
584  schw_mop = schw_mop + 2 * nelv * nx*ny*(nz-2)
585  endif
586  if (l2 == 0) then
587  schw_mop = schw_mop + nelv * nx*ny*nz
588  else
589  schw_mop = schw_mop + nelv * nx*ny*(nz-2)
590  endif
591 
592  !schw_mop = schw_mop + nelv * 3 * (nx-2)**2 * 6
593  do ie=1,nelv
594  do j=i0,i1
595  do i=i0,i1
596  arr1(i,j,l1+1 ,ie) = f1*arr1(i,j,l1+1 ,ie) &
597  +f2*arr2(i,j,l2+1 ,ie)
598  enddo
599  enddo
600  do k=i0,i1
601  do i=i0,i1
602  arr1(i,l1+1 ,k,ie) = f1*arr1(i,l1+1 ,k,ie) &
603  +f2*arr2(i,l2+1 ,k,ie)
604  enddo
605  do j=i0,i1
606  arr1(l1+1 ,j,k,ie) = f1*arr1(l1+1 ,j,k,ie) &
607  +f2*arr2(l2+1 ,j,k,ie)
608  arr1(nx-l1,j,k,ie) = f1*arr1(nx-l1,j,k,ie) &
609  +f2*arr2(nx-l2,j,k,ie)
610  enddo
611  do i=i0,i1
612  arr1(i,nx-l1,k,ie) = f1*arr1(i,nx-l1,k,ie) &
613  +f2*arr2(i,nx-l2,k,ie)
614  enddo
615  enddo
616  do j=i0,i1
617  do i=i0,i1
618  arr1(i,j,nx-l1,ie) = f1*arr1(i,j,nx-l1,ie) &
619  +f2*arr2(i,j,nx-l2,ie)
620  enddo
621  enddo
622  enddo
623  endif
624  return
625 end subroutine hsmg_extrude
626 
627 !----------------------------------------------------------------------
628 subroutine h1mg_schwarz(e,r,sigma,l)
629  use kinds, only : dp
630  use hsmg, only : mg_h1_n, mg_fld
631  use ctimer, only : tschw, nschw, schw_flop, schw_mop, dnekclock
632  implicit none
633 
634  real(DP) :: e(*),r(*)
635  real(DP), intent(in) :: sigma
636  integer :: l, n
637  real(DP) :: etime
638 
639  nschw = nschw + 1
640  n = mg_h1_n(l,mg_fld)
641  schw_flop = schw_flop + n
642  schw_mop = schw_mop + 2*n
643  !call hpm_start('schwarz')
644  etime = dnekclock()
645 
646 
647  call h1mg_schwarz_part1(e,r,l)
648  call hsmg_schwarz_wt(e,l) ! e := W e
649  e(1:n) = e(1:n) * sigma
650 
651  tschw = tschw + (dnekclock() - etime)
652  !call hpm_stop('schwarz')
653 
654  return
655 end subroutine h1mg_schwarz
656 
657 !----------------------------------------------------------------------
658 subroutine h1mg_schwarz_part1 (e,r,l)
659  use kinds, only : dp
660  use size_m, only : nelv
661  use input, only : if3d
662  use hsmg, only : mg_h1_n, p_mg_msk, mg_imask, mg_nh, mg_fld
663  use ctimer, only : tschw, nschw, schw_flop, schw_mop, dnekclock
664  use tstep, only : ifield, nelfld
665  implicit none
666 
667  real(DP) :: e(*),r(*)
668  real(DP) :: etime
669 
670  integer :: enx,eny,enz,pm, n, i, l
671  real(DP) :: zero, one, onem
672  real(DP), allocatable :: work(:)
673 
674  etime = 0._dp
675 
676  zero = 0
677  one = 1
678  onem = -1
679 
680  n = mg_h1_n(l,mg_fld)
681  pm = p_mg_msk(l,mg_fld)
682 
683  enx=mg_nh(l)+2
684  eny=mg_nh(l)+2
685  enz=mg_nh(l)+2
686 
687  call h1mg_mask(r,mg_imask(pm),nelfld(ifield)) ! Zero Dirichlet nodes
688 
689  allocate(work(2*enx*eny*enz*nelv))
690 
691  if (if3d) then ! extended array
692  call hsmg_schwarz_toext3d(work,r,mg_nh(l))
693  else
694 !max call hsmg_schwarz_toext2d(mg_work,r,mg_nh(l))
695  endif
696 
697  if( .NOT. if3d) enz=1
698  i = enx*eny*enz*nelv+1
699 
700 ! exchange interior nodes
701  call hsmg_extrude(work,0,zero,work,2,one,enx,eny,enz)
702  etime = etime - dnekclock()
703  !call hpm_stop('schwarz')
704  call hsmg_schwarz_dssum(work,l)
705  !call hpm_start('schwarz')
706  etime = etime + dnekclock()
707  call hsmg_extrude(work,0,one ,work,2,onem,enx,eny,enz)
708 
709  call hsmg_fdm(work(i),work,l) ! Do the local solves
710 
711 ! Sum overlap region (border excluded)
712  call hsmg_extrude(work,0,zero,work(i),0,one ,enx,eny,enz)
713  etime = etime - dnekclock()
714  !call hpm_stop('schwarz')
715  call hsmg_schwarz_dssum(work(i),l)
716  !call hpm_start('schwarz')
717  etime = etime + dnekclock()
718  call hsmg_extrude(work(i),0,one ,work,0,onem,enx,eny,enz)
719  call hsmg_extrude(work(i),2,one,work(i),0,one,enx,eny,enz)
720 
721  if( .NOT. if3d) then ! Go back to regular size array
722 !max call hsmg_schwarz_toreg2d(e,mg_work(i),mg_nh(l))
723  else
724  call hsmg_schwarz_toreg3d(e,work(i),mg_nh(l))
725  endif
726 
727  etime = etime - dnekclock()
728  !call hpm_stop('schwarz')
729  call hsmg_dssum(e,l) ! sum border nodes
730  !call hpm_start('schwarz')
731  etime = etime + dnekclock()
732 
733  call h1mg_mask(e,mg_imask(pm),nelfld(ifield)) ! apply mask
734 
735  tschw = tschw - etime
736 
737  return
738 end subroutine h1mg_schwarz_part1
739 
740 !----------------------------------------------------------------------
741 subroutine hsmg_schwarz_toext3d(a,b,n)
742  use kinds, only : dp
743  use ctimer, only : schw_mop
744  use size_m, only : nelv
745  implicit none
746  integer :: n
747  real(DP) :: a(0:n+1,0:n+1,0:n+1,nelv),b(n,n,n,nelv)
748 
749  integer :: i,j,k,ie
750  schw_mop = schw_mop + nelv*((n+2)**3 + n**3)
751  do ie=1,nelv
752  a(:,:,0,ie) = 0._dp
753  do k=1,n
754  a(:,0,k,ie) = 0._dp
755  do j=1,n
756  a(0,j,k,ie) = 0._dp
757  do i=1,n
758  a(i,j,k,ie)=b(i,j,k,ie)
759  enddo
760  a(n+1,j,k,ie) = 0._dp
761  enddo
762  a(:,n+1,k,ie) = 0._dp
763  enddo
764  a(:,:,n+1,ie) = 0._dp
765  enddo
766  return
767 end subroutine hsmg_schwarz_toext3d
768 
769 !----------------------------------------------------------------------
770 subroutine hsmg_schwarz_toreg3d(b,a,n)
771  use kinds, only : dp
772  use ctimer, only : schw_mop
773  use size_m, only : nelv
774  implicit none
775 
776  integer :: n
777  real(DP) :: a(0:n+1,0:n+1,0:n+1,nelv),b(n,n,n,nelv)
778 
779  integer :: i,j,k,ie
780  schw_mop = schw_mop + nelv*n**3 + nelv * (n+2)**3
781  !schw_mop = schw_mop + 2*nelv*n**3
782  do ie=1,nelv
783  do k=1,n
784  do j=1,n
785  do i=1,n
786  b(i,j,k,ie)=a(i,j,k,ie)
787  enddo
788  enddo
789  enddo
790  enddo
791  return
792 end subroutine hsmg_schwarz_toreg3d
793 
794 !----------------------------------------------------------------------
795 subroutine h1mg_setup_fdm()
796  use size_m, only : ndim, ldim, nelv, lelv
797  use hsmg, only : mg_fast_d_index, mg_fast_s_index, mg_nx, mg_bh, mg_ah
798  use hsmg, only : mg_fast_d, mg_fast_s, lmg_fastd, lmg_fasts, mg_nh, mg_fld
799  use hsmg, only : mg_lmax
800  implicit none
801 
802  integer :: l,i,j,nl, itmp
803  i = mg_fast_s_index(mg_lmax,mg_fld-1)
804  j = mg_fast_d_index(mg_lmax,mg_fld-1)
805  do l=2,mg_lmax
806  mg_fast_s_index(l,mg_fld)=i
807  nl = mg_nh(l)+2
808  i=i+nl*nl*2*ndim*nelv
809  if(i > lmg_fasts*2*ldim*lelv) then
810  itmp = i/(2*ldim*lelv)
811  write(6,*) 'lmg_fasts too small',i,itmp,lmg_fasts,l
812  call exitt
813  endif
814  mg_fast_d_index(l,mg_fld)=j
815  j=j+(nl**ndim)*nelv
816  if(j > lmg_fastd*lelv) then
817  itmp = i/(2*ldim*lelv)
818  write(6,*) 'lmg_fastd too small',i,itmp,lmg_fastd,l
819  call exitt
820  endif
821  call hsmg_setup_fast( &
822  mg_fast_s(mg_fast_s_index(l,mg_fld)) &
823  ,mg_fast_d(mg_fast_d_index(l,mg_fld)) &
824  ,mg_nh(l)+2,mg_ah(1,l),mg_bh(1,l),mg_nx(l))
825  enddo
826  mg_fast_s_index(l,mg_fld)=i
827  mg_fast_d_index(l,mg_fld)=j
828  return
829 end subroutine h1mg_setup_fdm
830 
831 !----------------------------------------------------------------------
832 subroutine hsmg_setup_fdm()
833  use size_m, only : ndim, ldim, nelv, lelv
834  use hsmg, only : mg_fast_d_index, mg_fast_s_index, mg_nx, mg_bh, mg_ah
835  use hsmg, only : mg_fast_d, mg_fast_s, lmg_fastd, lmg_fasts, mg_nh
836  use hsmg, only : mg_lmax, mg_fld
837  implicit none
838 
839  integer :: l,i,j,nl, itmp
840  i = mg_fast_s_index(mg_lmax,mg_fld-1)
841  j = mg_fast_d_index(mg_lmax,mg_fld-1)
842  do l=2,mg_lmax-1
843  mg_fast_s_index(l,mg_fld)=i
844  nl = mg_nh(l)+2
845  i=i+nl*nl*2*ndim*nelv
846  if(i > lmg_fasts*2*ldim*lelv) then
847  itmp = i/(2*ldim*lelv)
848  write(6,*) 'lmg_fasts too small',i,itmp,lmg_fasts,l
849  call exitt
850  endif
851  mg_fast_d_index(l,mg_fld)=j
852  j=j+(nl**ndim)*nelv
853  if(j > lmg_fastd*lelv) then
854  itmp = i/(2*ldim*lelv)
855  write(6,*) 'lmg_fastd too small',i,itmp,lmg_fastd,l
856  call exitt
857  endif
858  call hsmg_setup_fast( &
859  mg_fast_s(mg_fast_s_index(l,mg_fld)) &
860  ,mg_fast_d(mg_fast_d_index(l,mg_fld)) &
861  ,mg_nh(l)+2,mg_ah(1,l),mg_bh(1,l),mg_nx(l))
862  enddo
863  mg_fast_s_index(l,mg_fld)=i
864  mg_fast_d_index(l,mg_fld)=j
865  return
866 end subroutine hsmg_setup_fdm
867 
868 !----------------------------------------------------------------------
870 subroutine hsmg_setup_fast(s,d,nl,ah,bh,n)
871  use kinds, only : dp
872  use hsmg, only : lr, llr, lrr, lmr, ls, lls, lms, lrs, lt, llt, lmt, lrt
873  use size_m, only : nid, ndim, nelv
874  use input, only : if3d
875  implicit none
876 
877  integer :: nl
878  real(DP) :: s(nl*nl,2,ndim,nelv)
879  real(DP) :: d(nl**ndim,nelv)
880  real(DP) :: ah(1),bh(1)
881  integer :: n
882 
883  integer :: i,j,k
884  integer :: ie,il,nr,ns,nt
885  integer :: lbr,rbr,lbs,rbs,lbt,rbt,two
886  integer :: ierr, ierrmx
887  integer, external :: iglmax
888  real(DP) :: eps,diag
889 
890  two = 2
891  ierr = 0
892  do ie=1,nelv
893  call get_fast_bc(lbr,rbr,lbs,rbs,lbt,rbt,ie,two,ierr)
894  nr=nl
895  ns=nl
896  nt=nl
897  call hsmg_setup_fast1d(s(1,1,1,ie),lr,nr,lbr,rbr &
898  ,llr(ie),lmr(ie),lrr(ie),ah,bh,n,ie)
899  call hsmg_setup_fast1d(s(1,1,2,ie),ls,ns,lbs,rbs &
900  ,lls(ie),lms(ie),lrs(ie),ah,bh,n,ie)
901  if(if3d) call hsmg_setup_fast1d(s(1,1,3,ie),lt,nt,lbt,rbt &
902  ,llt(ie),lmt(ie),lrt(ie),ah,bh,n,ie)
903  il=1
904  if( .NOT. if3d) then
905  eps = 1.e-5*(maxval(lr(2:nr-1)) + maxval(ls(2:ns-1)))
906  do j=1,ns
907  do i=1,nr
908  diag = lr(i)+ls(j)
909  if (diag > eps) then
910  d(il,ie) = 1.0/diag
911  else
912  ! write(6,2) ie,'Reset Eig in hsmg setup fast:',i,j,l
913  ! $ ,eps,diag,lr(i),ls(j)
914  ! 2 format(i6,1x,a21,3i5,1p4e12.4)
915  d(il,ie) = 0.0
916  endif
917  il=il+1
918  enddo
919  enddo
920  else
921  eps = 1.e-5 * (maxval(lr(2:nr-1)) &
922  + maxval(ls(2:ns-1)) + maxval(lt(2:nt-1)))
923  do k=1,nt
924  do j=1,ns
925  do i=1,nr
926  diag = lr(i)+ls(j)+lt(k)
927  if (diag > eps) then
928  d(il,ie) = 1.0/diag
929  else
930  ! write(6,3) ie,'Reset Eig in hsmg setup fast:',i,j,k,l
931  ! $ ,eps,diag,lr(i),ls(j),lt(k)
932  ! 3 format(i6,1x,a21,4i5,1p5e12.4)
933  d(il,ie) = 0.0
934  endif
935  il=il+1
936  enddo
937  enddo
938  enddo
939  endif
940  enddo
941 
942  ierrmx = iglmax(ierr,1)
943  if (ierrmx > 0) then
944  if (ierr > 0) write(6,*) nid,ierr,' BC FAIL'
945  call exitti('A INVALID BC FOUND in genfast$',ierrmx)
946  endif
947 
948  return
949 end subroutine hsmg_setup_fast
950 
951 !----------------------------------------------------------------------
952 subroutine hsmg_setup_fast1d(s,lam,nl,lbc,rbc,ll,lm,lr,ah,bh,n,ie)
953  use kinds, only : dp
954  use size_m, only : lx1
955  implicit none
956 
957  integer :: nl,lbc,rbc,n, ie
958  real(DP) :: s(nl,nl,2),lam(nl),ll,lm,lr
959  real(DP) :: ah(0:n,0:n),bh(0:n)
960 
961  integer, parameter :: lxm=lx1+2
962  real(DP) :: b(2*lxm*lxm),w(2*lxm*lxm)
963 
964  call hsmg_setup_fast1d_a(s,lbc,rbc,ll,lm,lr,ah,n)
965  call hsmg_setup_fast1d_b(b,lbc,rbc,ll,lm,lr,bh,n)
966 
967 ! if (nid.eq.0) write(6,*) 'THIS is generalev call',nl,lbc
968  call generalev(s,b,lam,nl,w)
969  if(lbc > 0) call row_zero(s,nl,nl,1)
970  if(lbc == 1) call row_zero(s,nl,nl,2)
971  if(rbc > 0) call row_zero(s,nl,nl,nl)
972  if(rbc == 1) call row_zero(s,nl,nl,nl-1)
973 
974  call transpose(s(1,1,2),nl,s,nl)
975  return
976 end subroutine hsmg_setup_fast1d
977 
978 !----------------------------------------------------------------------
979 subroutine hsmg_setup_fast1d_a(a,lbc,rbc,ll,lm,lr,ah,n)
980  use kinds, only : dp
981  implicit none
982 
983  integer :: lbc,rbc,n
984  real(DP) :: a(0:n+2,0:n+2),ll,lm,lr
985  real(DP) :: ah(0:n,0:n)
986 
987  real(DP) :: fac
988  integer :: i,j,i0,i1
989  i0=0
990  if(lbc == 1) i0=1
991  i1=n
992  if(rbc == 1) i1=n-1
993 
994  a = 0._dp
995  fac = 2.0/lm
996  a(1,1)=1.0
997  a(n+1,n+1)=1.0
998  do j=i0,i1
999  do i=i0,i1
1000  a(i+1,j+1)=fac*ah(i,j)
1001  enddo
1002  enddo
1003  if(lbc == 0) then
1004  fac = 2.0/ll
1005  a(0,0)=fac*ah(n-1,n-1)
1006  a(1,0)=fac*ah(n ,n-1)
1007  a(0,1)=fac*ah(n-1,n )
1008  a(1,1)=a(1,1)+fac*ah(n ,n )
1009  else
1010  a(0,0)=1.0
1011  endif
1012  if(rbc == 0) then
1013  fac = 2.0/lr
1014  a(n+1,n+1)=a(n+1,n+1)+fac*ah(0,0)
1015  a(n+2,n+1)=fac*ah(1,0)
1016  a(n+1,n+2)=fac*ah(0,1)
1017  a(n+2,n+2)=fac*ah(1,1)
1018  else
1019  a(n+2,n+2)=1.0
1020  endif
1021  return
1022 end subroutine hsmg_setup_fast1d_a
1023 
1024 !----------------------------------------------------------------------
1025 subroutine hsmg_setup_fast1d_b(b,lbc,rbc,ll,lm,lr,bh,n)
1026  use kinds, only : dp
1027  implicit none
1028 
1029  integer :: lbc,rbc,n
1030  real(DP) :: b(0:n+2,0:n+2),ll,lm,lr
1031  real(DP) :: bh(0:n)
1032 
1033  real(DP) :: fac
1034  integer :: i,i0,i1
1035  i0=0
1036  if(lbc == 1) i0=1
1037  i1=n
1038  if(rbc == 1) i1=n-1
1039 
1040  b = 0._dp
1041  fac = 0.5*lm
1042  b(1,1)=1.0
1043  b(n+1,n+1)=1.0
1044  do i=i0,i1
1045  b(i+1,i+1)=fac*bh(i)
1046  enddo
1047  if(lbc == 0) then
1048  fac = 0.5*ll
1049  b(0,0)=fac*bh(n-1)
1050  b(1,1)=b(1,1)+fac*bh(n )
1051  else
1052  b(0,0)=1.0
1053  endif
1054  if(rbc == 0) then
1055  fac = 0.5*lr
1056  b(n+1,n+1)=b(n+1,n+1)+fac*bh(0)
1057  b(n+2,n+2)=fac*bh(1)
1058  else
1059  b(n+2,n+2)=1.0
1060  endif
1061  return
1062 end subroutine hsmg_setup_fast1d_b
1063 
1064 !----------------------------------------------------------------------
1066 subroutine hsmg_fdm(e,r,l)
1067  use kinds, only : dp
1068  use hsmg, only : mg_fast_s, mg_fast_d, mg_fast_s_index, mg_fast_d_index
1069  use hsmg, only : mg_nh, mg_fld
1070  implicit none
1071  real(DP) :: e(*), r(*)
1072  integer :: l
1073  call hsmg_do_fast(e,r, &
1074  mg_fast_s(mg_fast_s_index(l,mg_fld)), &
1075  mg_fast_d(mg_fast_d_index(l,mg_fld)), &
1076  mg_nh(l)+2)
1077  return
1078 end subroutine hsmg_fdm
1079 
1080 !----------------------------------------------------------------------
1082 subroutine hsmg_do_fast(e,r,s,d,nl)
1083  use kinds, only : dp
1084  use size_m, only : ndim, nelv
1085  use size_m, only : lx1, ly1, lz1
1086  use ctimer, only : schw_flop, schw_mop
1087  use input, only : if3d
1088  implicit none
1089 
1090  integer :: nl
1091  real(DP) :: e(nl**ndim,nelv)
1092  real(DP) :: r(nl**ndim,nelv)
1093  real(DP) :: s(nl*nl,2,ndim,nelv)
1094  real(DP) :: d(nl**ndim,nelv)
1095 
1096  integer :: ie,nn,i
1097 
1098  integer, parameter :: lwk=(lx1+2)*(ly1+2)*(lz1+2)
1099  real(DP) :: work(0:lwk-1),work2(0:lwk-1)
1100 
1101  nn=nl**ndim
1102  if( .NOT. if3d) then
1103 #if 0
1104  do ie=1,nelv
1105  call hsmg_tnsr2d_el(e(1,ie),nl,r(1,ie),nl &
1106  ,s(1,2,1,ie),s(1,1,2,ie))
1107  do i=1,nn
1108  r(i,ie)=d(i,ie)*e(i,ie)
1109  enddo
1110  call hsmg_tnsr2d_el(e(1,ie),nl,r(1,ie),nl &
1111  ,s(1,1,1,ie),s(1,2,2,ie))
1112  enddo
1113 #endif
1114  else
1115  schw_flop = schw_flop + nelv*nn
1116  schw_mop = schw_mop + nelv*nn ! r and e should be in cache
1117  do ie=1,nelv
1118 #if 1
1119  schw_flop = schw_flop + 3*nn*(2*nl-1)
1120  schw_mop = schw_mop + nn + 3*nl*nl
1121 
1122  call mxm(s(1,2,1,ie),nl,r(1,ie),nl,work,nl*nl)
1123  do i=0,nl-1
1124  call mxm(work(nl*nl*i),nl,s(1,1,2,ie),nl,work2(nl*nl*i),nl)
1125  enddo
1126  call mxm(work2,nl*nl,s(1,1,3,ie),nl,work,nl)
1127 #endif
1128  do i=0,nn-1
1129  work2(i)=d(i+1,ie)*work(i)
1130  enddo
1131 #if 1
1132  schw_flop = schw_flop + 3*nn*(2*nl-1)
1133  schw_mop = schw_mop + 1*nn + 3*nl*nl
1134 
1135  call mxm(s(1,1,1,ie),nl,work2,nl,work,nl*nl)
1136  do i=0,nl-1
1137  call mxm(work(nl*nl*i),nl,s(1,2,2,ie),nl,work2(nl*nl*i),nl)
1138  enddo
1139  call mxm(work2,nl*nl,s(1,2,3,ie),nl,e(1,ie),nl)
1140 #endif
1141  enddo
1142  endif
1143  return
1144 end subroutine hsmg_do_fast
1145 
1146 !----------------------------------------------------------------------
1148 subroutine hsmg_do_wt(u,wt,nx,ny,nz)
1149  use kinds, only : dp
1150  use size_m, only : nelv, ndim
1151  use ctimer, only : h1mg_flop, h1mg_mop
1152  use input, only : if3d
1153  implicit none
1154 
1155  integer :: nx,ny,nz
1156  real(DP) :: u(nx,ny,nz,nelv)
1157  real(DP) :: wt(nx,nz,2,ndim,nelv)
1158 
1159  integer :: i, j, k, ie
1160 
1161 ! if (nx.eq.2) then
1162 ! do e=1,nelv
1163 ! call outmat(wt(1,1,1,1,e),nx,nz,'wt 1-1',e)
1164 ! call outmat(wt(1,1,2,1,e),nx,nz,'wt 2-1',e)
1165 ! call outmat(wt(1,1,1,2,e),nx,nz,'wt 1-2',e)
1166 ! call outmat(wt(1,1,2,2,e),nx,nz,'wt 2-2',e)
1167 ! enddo
1168 ! call exitti('hsmg_do_wt quit$',nelv)
1169 ! endif
1170 
1171  if ( .NOT. if3d) then
1172  do ie=1,nelv
1173  do j=1,ny
1174  u( 1,j,1,ie)=u( 1,j,1,ie)*wt(j,1,1,1,ie)
1175  u(nx,j,1,ie)=u(nx,j,1,ie)*wt(j,1,2,1,ie)
1176  enddo
1177  do i=2,nx-1
1178  u(i, 1,1,ie)=u(i, 1,1,ie)*wt(i,1,1,2,ie)
1179  u(i,ny,1,ie)=u(i,ny,1,ie)*wt(i,1,2,2,ie)
1180  enddo
1181  enddo
1182  else
1183  h1mg_flop = h1mg_flop + 2*nelv*nz*ny + 2*nelv*nz*(nx-2) + 2*nelv*(ny-2)*(nx-2)
1184  h1mg_mop = h1mg_mop + 6*nelv*nz*ny + 6*nelv*nz*(nx-2) + 6*nelv*(ny-2)*(nx-2)
1185 
1186  do ie=1,nelv
1187  do k=1,nz
1188  do j=1,ny
1189  u( 1,j,k,ie)=u( 1,j,k,ie)*wt(j,k,1,1,ie)
1190  u(nx,j,k,ie)=u(nx,j,k,ie)*wt(j,k,2,1,ie)
1191  enddo
1192  enddo
1193  do k=1,nz
1194  do i=2,nx-1
1195  u(i, 1,k,ie)=u(i, 1,k,ie)*wt(i,k,1,2,ie)
1196  u(i,ny,k,ie)=u(i,ny,k,ie)*wt(i,k,2,2,ie)
1197  enddo
1198  enddo
1199  do j=2,ny-1
1200  do i=2,nx-1
1201  u(i,j, 1,ie)=u(i,j, 1,ie)*wt(i,j,1,3,ie)
1202  u(i,j,nz,ie)=u(i,j,nz,ie)*wt(i,j,2,3,ie)
1203  enddo
1204  enddo
1205  enddo
1206  endif
1207  return
1208 end subroutine hsmg_do_wt
1209 
1210 !----------------------------------------------------------------------
1211 subroutine hsmg_setup_rstr_wt(wt,nx,ny,nz,l,w)
1212  use kinds, only : dp
1213  use size_m, only : nelv, ndim
1214  use input, only : if3d
1215  implicit none
1216 
1217  integer, intent(in) :: nx,ny,nz,l
1218  real(DP), intent(out) :: w(nx,ny,nz,nelv)
1219  real(DP), intent(out) :: wt(nx,nz,2,ndim,nelv)
1220 
1221  integer :: ie, i, j, k
1222 ! nit border nodes to 1
1223  w = 0._dp
1224 ! print *, 'Setup rstr wt: ',nx,ny,nz,nelv
1225  if ( .NOT. if3d) then
1226  do ie=1,nelv
1227  do i=1,nx
1228  w(i,1,1,ie)=1.0
1229  w(i,ny,1,ie)=1.0
1230  enddo
1231  do j=1,ny
1232  w(1,j,1,ie)=1.0
1233  w(nx,j,1,ie)=1.0
1234  enddo
1235  enddo
1236  else
1237  do ie=1,nelv
1238  do j=1,ny
1239  do i=1,nx
1240  w(i,j,1,ie)=1.0
1241  w(i,j,nz,ie)=1.0
1242  enddo
1243  enddo
1244  do k=1,nz
1245  do i=1,nx
1246  w(i,1,k,ie)=1.0
1247  w(i,ny,k,ie)=1.0
1248  enddo
1249  enddo
1250  do k=1,nz
1251  do j=1,ny
1252  w(1,j,k,ie)=1.0
1253  w(nx,j,k,ie)=1.0
1254  enddo
1255  enddo
1256  enddo
1257  endif
1258  call hsmg_dssum(w,l)
1259 ! nvert the count w to get the weight wt
1260  if ( .NOT. if3d) then
1261  do ie=1,nelv
1262  do j=1,ny
1263  wt(j,1,1,1,ie)=1.0/w(1,j,1,ie)
1264  wt(j,1,2,1,ie)=1.0/w(nx,j,1,ie)
1265  enddo
1266  do i=1,nx
1267  wt(i,1,1,2,ie)=1.0/w(i,1,1,ie)
1268  wt(i,1,2,2,ie)=1.0/w(i,ny,1,ie)
1269  enddo
1270  enddo
1271  else
1272  do ie=1,nelv
1273  do k=1,nz
1274  do j=1,ny
1275  wt(j,k,1,1,ie)=1.0/w(1,j,k,ie)
1276  wt(j,k,2,1,ie)=1.0/w(nx,j,k,ie)
1277  enddo
1278  enddo
1279  do k=1,nz
1280  do i=1,nx
1281  wt(i,k,1,2,ie)=1.0/w(i,1,k,ie)
1282  wt(i,k,2,2,ie)=1.0/w(i,ny,k,ie)
1283  enddo
1284  enddo
1285  do j=1,ny
1286  do i=1,nx
1287  wt(i,j,1,3,ie)=1.0/w(i,j,1,ie)
1288  wt(i,j,2,3,ie)=1.0/w(i,j,nz,ie)
1289  enddo
1290  enddo
1291  enddo
1292  endif
1293  return
1294 end subroutine hsmg_setup_rstr_wt
1295 
1296 !----------------------------------------------------------------------
1297 subroutine hsmg_setup_schwarz_wt(ifsqrt)
1298  use size_m, only : ndim, nelv, ldim, lelv
1299  use input, only : if3d
1300  use hsmg, only : mg_schwarz_wt_index, mg_lmax, mg_fld
1301  use hsmg, only : mg_nh, lmg_swt, mg_schwarz_wt
1302  implicit none
1303 
1304  logical :: ifsqrt
1305 
1306  integer :: l,i,nl,nlz, itmp
1307 
1308  i = mg_schwarz_wt_index(mg_lmax,mg_fld-1)
1309  do l=2,mg_lmax-1
1310  mg_schwarz_wt_index(l,mg_fld)=i
1311  nl = mg_nh(l)
1312  nlz = mg_nh(l)
1313  if( .NOT. if3d) nlz=1
1314  i=i+nl*nlz*4*ndim*nelv
1315  if(i > lmg_swt*4*ldim*lelv) then
1316  itmp = i/(4*ldim*lelv)
1317  write(6,*) 'lmg_swt too small',i,itmp,lmg_swt,l
1318  call exitt
1319  endif
1320 
1321  call h1mg_setup_schwarz_wt_1( &
1322  mg_schwarz_wt(mg_schwarz_wt_index(l,mg_fld)),l,ifsqrt)
1323 
1324  enddo
1325  mg_schwarz_wt_index(l,mg_fld)=i
1326 
1327  return
1328 end subroutine hsmg_setup_schwarz_wt
1329 
1330 !----------------------------------------------------------------------
1331 subroutine h1mg_setup_schwarz_wt(ifsqrt)
1332  use size_m, only : ndim, ldim, nelv, lelv
1333  use hsmg, only : mg_schwarz_wt_index, mg_schwarz_wt, mg_lmax, mg_fld
1334  use hsmg, only : mg_nh, mg_nhz, lmg_swt
1335  implicit none
1336 
1337  logical :: ifsqrt
1338 
1339  integer :: l,i,nl,nlz, itmp
1340 
1341  i = mg_schwarz_wt_index(mg_lmax,mg_fld-1)
1342  do l=2,mg_lmax
1343 
1344  mg_schwarz_wt_index(l,mg_fld)=i
1345  nl = mg_nh(l)
1346  nlz = mg_nhz(l)
1347  i = i+nl*nlz*4*ndim*nelv
1348 
1349  if (i > lmg_swt*4*ldim*lelv) then
1350  itmp = i/(4*ldim*lelv)
1351  write(6,*) 'lmg_swt too small',i,itmp,lmg_swt,l
1352  call exitt
1353  endif
1354 
1355  call h1mg_setup_schwarz_wt_1( &
1356  mg_schwarz_wt(mg_schwarz_wt_index(l,mg_fld)),l,ifsqrt)
1357 
1358  enddo
1359 
1360  mg_schwarz_wt_index(l,mg_fld)=i
1361 
1362  return
1363 end subroutine h1mg_setup_schwarz_wt
1364 
1365 !----------------------------------------------------------------------
1366 subroutine hsmg_schwarz_wt(e,l)
1367  use kinds, only : dp
1368  use input, only : if3d
1369  use hsmg, only : mg_schwarz_wt, mg_schwarz_wt_index, mg_fld, mg_nh
1370  implicit none
1371  real(DP) :: e(*)
1372  integer :: l
1373 
1374 #if 0
1375  if( .NOT. if3d) call hsmg_schwarz_wt2d( &
1376  e,mg_schwarz_wt(mg_schwarz_wt_index(l,mg_fld)),mg_nh(l))
1377 #endif
1378  if(if3d) call hsmg_schwarz_wt3d( &
1379  e,mg_schwarz_wt(mg_schwarz_wt_index(l,mg_fld)),mg_nh(l))
1380  return
1381 end subroutine hsmg_schwarz_wt
1382 
1383 !----------------------------------------------------------------------
1384 subroutine hsmg_schwarz_wt3d(e,wt,n)
1385  use kinds, only : dp
1386  use size_m, only : nelv
1387  use ctimer, only : schw_flop, schw_mop
1388  implicit none
1389 
1390  integer :: n
1391  real(DP) :: e(n,n,n,nelv)
1392  real(DP) :: wt(n,n,4,3,nelv)
1393  integer :: ie,j,k
1394 
1395  schw_flop = schw_flop + 4*nelv*n*n + 4*nelv*n*(n-4) + 4*nelv*(n-4)*(n-4)
1396  schw_mop = schw_mop + 12*nelv*n*n + 2*nelv*n*n*n
1397  !schw_mop = schw_mop + 12*nelv*n*n + 12*nelv*n*(n-4) + 12*nelv*(n-4)*(n-4)
1398 
1399  do ie=1,nelv
1400  e(:,:,1 ,ie)=e(:,:,1 ,ie)*wt(:,:,1,3,ie)
1401  e(:,:,2 ,ie)=e(:,:,2 ,ie)*wt(:,:,2,3,ie)
1402  do k=3,n-2
1403  e(:,1 ,k,ie)=e(:,1 ,k,ie)*wt(:,k,1,2,ie)
1404  e(:,2 ,k,ie)=e(:,2 ,k,ie)*wt(:,k,2,2,ie)
1405  do j=3,n-2
1406  e(1 ,j,k,ie)=e(1 ,j,k,ie)*wt(j,k,1,1,ie)
1407  e(2 ,j,k,ie)=e(2 ,j,k,ie)*wt(j,k,2,1,ie)
1408  e(n-1,j,k,ie)=e(n-1,j,k,ie)*wt(j,k,3,1,ie)
1409  e(n ,j,k,ie)=e(n ,j,k,ie)*wt(j,k,4,1,ie)
1410  enddo
1411  e(:,n-1,k,ie)=e(:,n-1,k,ie)*wt(:,k,3,2,ie)
1412  e(:,n ,k,ie)=e(:,n ,k,ie)*wt(:,k,4,2,ie)
1413  enddo
1414  e(:,:,n-1,ie)=e(:,:,n-1,ie)*wt(:,:,3,3,ie)
1415  e(:,:,n ,ie)=e(:,:,n ,ie)*wt(:,:,4,3,ie)
1416  enddo
1417  return
1418 end subroutine hsmg_schwarz_wt3d
1419 
1420 !----------------------------------------------------------------------
1421 subroutine hsmg_coarse_solve(e,r)
1422  use kinds, only : dp
1423  use ctimer, only : icalld, ncrsl, tcrsl, ifsync, etime1, dnekclock
1424  use parallel, only : xxth
1425  use tstep, only : ifield
1426  use poisson, only : spectral_solve
1427  use hsmg, only : use_spectral_coarse
1428 
1429  implicit none
1430  real(DP) :: e(:),r(:)
1431 
1432  if (icalld == 0) then ! timer info
1433  ncrsl=0
1434  tcrsl=0.0
1435  endif
1436  icalld = 1
1437 
1438  if (ifsync) call nekgsync()
1439 
1440  ncrsl = ncrsl + 1
1441 #ifndef NOTIMER
1442  etime1=dnekclock()
1443 #endif
1444 
1445 ! call spectral_solve(e, r)!, 0,0,0, 0, 0)
1446  if (use_spectral_coarse) then
1447  call spectral_solve(e, r)!, 0,0,0, 0, 0)
1448  else
1449  call crs_solve(xxth(ifield),e,r)
1450  endif
1451 
1452 #ifndef NOTIMER
1453  tcrsl=tcrsl+dnekclock()-etime1
1454 #endif
1455 
1456  return
1457 end subroutine hsmg_coarse_solve
1458 
1459 !----------------------------------------------------------------------
1461  use size_m, only : nelv, lelv
1462  use hsmg, only : mg_lmax, mg_nh, mg_nhz, lmg_solve, mg_fld, mg_solve_index
1463  implicit none
1464 
1465  integer :: l,i, itmp
1466 
1467  i = mg_solve_index(mg_lmax+1,mg_fld-1)
1468  do l=1,mg_lmax
1469  mg_solve_index(l,mg_fld)=i
1470  i=i+mg_nh(l)*mg_nh(l)*mg_nhz(l)*nelv
1471  if(i > lmg_solve*lelv) then
1472  itmp = i/lelv
1473  write(6,*) 'lmg_solve too small',i,itmp,lmg_solve,l
1474  call exitt
1475  endif
1476  enddo
1477  mg_solve_index(l,mg_fld)=i
1478 
1479  return
1480 end subroutine hsmg_setup_solve
1481 
1482 !----------------------------------------------------------------------
1483 subroutine hsmg_setup_mg_nx()
1484  use size_m, only : lx2, nx1, ly1, lz1, lx1, nid
1485  use input, only : if3d
1486  use hsmg, only : mg_lmax, mg_nz, mg_ny, mg_nx
1487  implicit none
1488 
1489  integer :: p82, mgnx1, mgnx2, i
1490 
1491  integer, save :: mgn2(10) = (/ 1, 2, 2, 2, 2, 3, 3, 5, 5, 5/)
1492 
1493 ! if (param(82).eq.0) param(82)=2 ! nek default
1494 ! if (np.eq.1) param(82)=2 ! single proc. too slow
1495  p82 = 2 ! potentially variable nxc
1496  mg_lmax = 3
1497 ! mg_lmax = 4
1498  if (lx1 == 4) mg_lmax = 2
1499 ! if (param(79).ne.0) mg_lmax = param(79)
1500  mgnx1 = p82-1 !1
1501  mg_nx(1) = mgnx1
1502  mg_ny(1) = mgnx1
1503  mg_nz(1) = mgnx1
1504  if ( .NOT. if3d) mg_nz(1) = 0
1505 
1506  mgnx2 = 2*(lx2/4) + 1
1507  if (lx1 == 5) mgnx2 = 3
1508 ! if (lx1.eq.6) mgnx2 = 3
1509  if (lx1 <= 10) mgnx2 = mgn2(nx1)
1510  if (lx1 == 8) mgnx2 = 4
1511  if (lx1 == 8) mgnx2 = 3
1512 
1513 ! mgnx2 = min(3,mgnx2)
1514 
1515  mg_nx(2) = mgnx2
1516  mg_ny(2) = mgnx2
1517  mg_nz(2) = mgnx2
1518  if ( .NOT. if3d) mg_nz(2) = 0
1519 
1520  mg_nx(3) = mgnx2+1
1521  mg_ny(3) = mgnx2+1
1522  mg_nz(3) = mgnx2+1
1523  if ( .NOT. if3d) mg_nz(3) = 0
1524 
1525  mg_nx(mg_lmax) = lx1-1
1526  mg_ny(mg_lmax) = ly1-1
1527  mg_nz(mg_lmax) = lz1-1
1528 
1529  if (nid == 0) write(*,*) 'mg_nx:',(mg_nx(i),i=1,mg_lmax)
1530  if (nid == 0) write(*,*) 'mg_ny:',(mg_ny(i),i=1,mg_lmax)
1531  if (nid == 0) write(*,*) 'mg_nz:',(mg_nz(i),i=1,mg_lmax)
1532 
1533  return
1534 end subroutine hsmg_setup_mg_nx
1535 
1536 !----------------------------------------------------------------------
1538 subroutine hsmg_index_0
1539  use hsmg, only : mg_rstr_wt_index, mg_solve_index, lmgn, lmgs
1540  use hsmg, only : mg_fast_s_index, mg_fast_d_index, mg_schwarz_wt_index
1541  use hsmg, only : mg_mask_index
1542  implicit none
1543 
1544  integer :: n
1545  n = lmgn*(lmgs+1)
1546 
1547  mg_mask_index = 0
1548  mg_rstr_wt_index = 0
1549  mg_solve_index = 0
1550  mg_fast_s_index = 0
1551  mg_fast_d_index = 0
1552  mg_schwarz_wt_index = 0
1553 
1554  return
1555 end subroutine hsmg_index_0
1556 
1557 !-----------------------------------------------------------------------
1558 subroutine h1mg_mask(w,mask,nel)
1559  use kinds, only : dp
1560  use ctimer, only : nmg_mask, tmg_mask, dnekclock
1561  implicit none
1562 
1563  real(DP) :: w(*)
1564  integer :: mask(*) ! Pointer to Dirichlet BCs
1565  integer :: nel
1566  real(DP) :: etime
1567 
1568  integer :: e, im
1569 
1570  nmg_mask = nmg_mask + 1
1571  etime = dnekclock()
1572  do e=1,nel
1573  im = mask(e)
1574  call mg_mask_e(w,mask(im)) ! Zero out Dirichlet conditions
1575  enddo
1576  tmg_mask = tmg_mask + (dnekclock() - etime)
1577 
1578  return
1579 end subroutine h1mg_mask
1580 
1581 !----------------------------------------------------------------------
1582 subroutine mg_mask_e(w,mask) ! Zero out Dirichlet conditions
1583  use kinds, only : dp
1584  implicit none
1585 
1586  real(DP) :: w(1)
1587  integer :: mask(0:1)
1588  integer :: n, i
1589  n=mask(0)
1590  do i=1,n
1591  ! write(6,*) i,mask(i),n,' MG_MASK'
1592  w(mask(i)) = 0.
1593  enddo
1594 
1595  return
1596 end subroutine mg_mask_e
1597 
1598 !-----------------------------------------------------------------------
1600 subroutine hsmg_tnsr1(v,nv,nu,A,At)
1601  use kinds, only : dp
1602  use input, only : if3d
1603  implicit none
1604 
1605  integer :: nv,nu
1606  real(DP) :: v(1),A(1),At(1)
1607 
1608  if ( .NOT. if3d) then
1609 !max call hsmg_tnsr1_2d(v,nv,nu,A,At)
1610  else
1611  call hsmg_tnsr1_3d(v,nv,nu,a,at,at)
1612  endif
1613  return
1614 end subroutine hsmg_tnsr1
1615 
1616 !-------------------------------------------------------T--------------
1618 subroutine hsmg_tnsr1_3d(v,nv,nu,A,Bt,Ct)
1619  use kinds, only : dp
1620  use ctimer, only : h1mg_flop, h1mg_mop
1621  use size_m, only : lx1, ly1, lz1, nelv
1622  implicit none
1623 
1624  integer :: nv,nu
1625  real(DP) :: v(*),A(*),Bt(*),Ct(*)
1626 
1627  integer, parameter :: lwk=(lx1+2)*(ly1+2)*(lz1+2)
1628  real(DP) :: work(0:lwk-1),work2(0:lwk-1)
1629  integer :: e,e0,ee,es
1630  integer :: nu3, nv3, iu, iv, i
1631 
1632  e0=1
1633  es=1
1634  ee=nelv
1635 
1636  if (nv > nu) then
1637  e0=nelv
1638  es=-1
1639  ee=1
1640  endif
1641 
1642  nu3 = nu**3
1643  nv3 = nv**3
1644 
1645  h1mg_flop = h1mg_flop + nelv * nv * (2*nu - 1) * nu * nu
1646  h1mg_flop = h1mg_flop + nelv * nv * nv * (2*nu - 1) * nu
1647  h1mg_flop = h1mg_flop + nelv * nv * nv * nv * (2*nu - 1)
1648  h1mg_mop = h1mg_mop + nv3 + nu3
1649 
1650  do e=e0,ee,es
1651  iu = 1 + (e-1)*nu3
1652  iv = 1 + (e-1)*nv3
1653  call mxm(a,nv,v(iu),nu,work,nu*nu)
1654  do i=0,nu-1
1655  call mxm(work(nv*nu*i),nv,bt,nu,work2(nv*nv*i),nv)
1656  enddo
1657  call mxm(work2,nv*nv,ct,nu,v(iv),nv)
1658  enddo
1659 
1660  return
1661 end subroutine hsmg_tnsr1_3d
1662 
1663 !------------------------------------------ T -----------------------
1665 subroutine h1mg_rstr(r,l,ifdssum)
1666  use kinds, only : dp
1667  use hsmg, only : mg_rstr_wt, mg_rstr_wt_index, mg_nh, mg_nhz, mg_jht
1668  use hsmg, only : mg_jh, mg_fld
1669  implicit none
1670  logical :: ifdssum
1671  real(DP) :: r(1)
1672  integer :: l
1673 
1674  call hsmg_do_wt(r,mg_rstr_wt(mg_rstr_wt_index(l+1,mg_fld)) &
1675  ,mg_nh(l+1),mg_nh(l+1),mg_nhz(l+1))
1676 
1677  call hsmg_tnsr1(r,mg_nh(l),mg_nh(l+1),mg_jht(1,l),mg_jh(1,l))
1678 
1679  if (ifdssum) call hsmg_dssum(r,l)
1680 
1681  return
1682 end subroutine h1mg_rstr
1683 
1684 !-----------------------------------------------------------------------
1685 subroutine h1mg_setup_mg_nx()
1686  use size_m, only : lx2, nx1, lx1, ly1, lz1, ldimt1, nid
1687  use input, only : if3d
1688  use hsmg, only : mg_h1_n, mg_lmax, mg_h1_lmax, mg_nz, mg_ny, mg_nx
1689  use tstep, only : nelfld
1690  implicit none
1691 
1692  integer :: mgn2(10) = (/ 1, 2, 2, 2, 2, 3, 3, 5, 5, 5/)
1693  integer :: p82, mgnx1, mgnx2, i, ifld, l
1694 
1695 ! if (param(82).eq.0) param(82)=2 ! nek default
1696 ! if (np.eq.1) param(82)=2 ! single proc. too slow
1697  p82 = 2 ! potentially variable nxc
1698  mg_h1_lmax = 3
1699 ! mg_h1_lmax = 4
1700  if (lx1 == 4) mg_h1_lmax = 2
1701 ! if (param(79).ne.0) mg_h1_lmax = param(79)
1702  mgnx1 = p82-1 !1
1703  mg_nx(1) = mgnx1
1704  mg_ny(1) = mgnx1
1705  mg_nz(1) = mgnx1
1706  if ( .NOT. if3d) mg_nz(1) = 0
1707 
1708  mgnx2 = 2*(lx2/4) + 1
1709  if (lx1 == 5) mgnx2 = 3
1710 ! if (lx1.eq.6) mgnx2 = 3
1711  if (lx1 <= 10) mgnx2 = mgn2(nx1)
1712  if (lx1 == 8) mgnx2 = 4
1713  if (lx1 == 8) mgnx2 = 3
1714 
1715  mgnx2 = min(3,mgnx2) ! This choice seems best (9/24/12)
1716 
1717  mg_nx(2) = mgnx2
1718  mg_ny(2) = mgnx2
1719  mg_nz(2) = mgnx2
1720  if ( .NOT. if3d) mg_nz(2) = 0
1721 
1722  mg_nx(3) = mgnx2+1
1723  mg_ny(3) = mgnx2+1
1724  mg_nz(3) = mgnx2+1
1725  if ( .NOT. if3d) mg_nz(3) = 0
1726 
1727  mg_nx(mg_h1_lmax) = lx1-1
1728  mg_ny(mg_h1_lmax) = ly1-1
1729  mg_nz(mg_h1_lmax) = lz1-1
1730 
1731  if (nid == 0) write(*,*) 'h1_mg_nx:',(mg_nx(i),i=1,mg_h1_lmax)
1732  if (nid == 0) write(*,*) 'h1_mg_ny:',(mg_ny(i),i=1,mg_h1_lmax)
1733  if (nid == 0) write(*,*) 'h1_mg_nz:',(mg_nz(i),i=1,mg_h1_lmax)
1734 
1735  do ifld=1,ldimt1
1736  do l=1,mg_lmax
1737  mg_h1_n(l,ifld)=(mg_nx(l)+1) &
1738  *(mg_ny(l)+1) &
1739  *(mg_nz(l)+1)*nelfld(ifld)
1740  enddo
1741  enddo
1742 
1743  return
1744 end subroutine h1mg_setup_mg_nx
1745 
1746 !----------------------------------------------------------------------
1749  use hsmg, only : mg_h1_lmax, mg_nx, mg_ah, mg_bh, mg_dh, mg_dht, mg_zh
1750  use hsmg, only : mg_nh, mg_nhz, mg_nz
1751  use semhat, only : ah, bh, ch, dh, zh, dph, jph, bgl, zgl, dgl, jgl, wh
1752  implicit none
1753 
1754  integer :: l, n
1755 
1756  do l=1,mg_h1_lmax
1757  n = mg_nx(l) ! polynomial order
1758  call generate_semhat(ah,bh,ch,dh,zh,dph,jph,bgl,zgl,dgl,jgl,n,wh)
1759  call copy(mg_ah(1,l),ah,(n+1)*(n+1))
1760  call copy(mg_bh(1,l),bh,n+1)
1761  call copy(mg_dh(1,l),dh,(n+1)*(n+1))
1762  call transpose(mg_dht(1,l),n+1,dh,n+1)
1763  call copy(mg_zh(1,l),zh,n+1)
1764 
1765  mg_nh(l)=n+1
1766  mg_nhz(l)=mg_nz(l)+1
1767 
1768  enddo
1769 end subroutine h1mg_setup_semhat
1770 
1771 !----------------------------------------------------------------------
1773  use kinds, only : i8
1774  use size_m, only : lx1, ly1, lz1, lelt, nelv, ndim
1775  use input, only : if3d
1776  use hsmg, only : mg_lmax, mg_nh, mg_nhz, mg_fld
1777  use hsmg, only : mg_gsh_handle, mg_gsh_schwarz_handle
1778  use mesh, only : vertex
1779  use parallel, only : nelgv
1780  implicit none
1781 
1782  integer, parameter :: lxyz=(lx1+2)*(ly1+2)*(lz1+2)
1783 
1784  integer(i8), allocatable :: glo_num(:)
1785  integer :: nx,ny,nz, ncrnr
1786  integer :: l
1787 
1788 
1789 ! set up direct stiffness summation for each level
1790  ncrnr = 2**ndim
1791  call get_vert()
1792 
1793  allocate(glo_num(lxyz*lelt))
1794  do l=1,mg_lmax
1795  nx=mg_nh(l)
1796  ny=mg_nh(l)
1797  nz=mg_nhz(l)
1798  call setupds(mg_gsh_handle(l,mg_fld),nx,ny,nz &
1799  ,nelv,nelgv,vertex,glo_num)
1800  nx=nx+2
1801  ny=ny+2
1802  nz=nz+2
1803  if( .NOT. if3d) nz=1
1804  call setupds(mg_gsh_schwarz_handle(l,mg_fld),nx,ny,nz &
1805  ,nelv,nelgv,vertex,glo_num)
1806  enddo
1807 end subroutine h1mg_setup_dssum
1808 
1809 !----------------------------------------------------------------------
1810 subroutine mg_set_msk(p_msk ,l0)
1811  use kinds, only : dp
1812  use hsmg, only : mg_h1_lmax, p_mg_msk, mg_nh, mg_nhz, mg_imask
1813  use hsmg, only : mg_h1_n, mg_fld
1814  use tstep, only : ifield, nelfld
1815  implicit none
1816 
1817  integer :: p_msk, l0
1818 
1819  real(DP), allocatable :: work(:)
1820  integer :: l, n, nx, ny, nz, nm
1821  l = mg_h1_lmax
1822  p_mg_msk(l,mg_fld) = 0
1823  n = mg_h1_n(l,mg_fld)
1824 
1825  allocate(work(maxval(mg_nh(1:mg_h1_lmax))*maxval(mg_nh(1:mg_h1_lmax))*maxval(mg_nhz(1:mg_h1_lmax))*nelfld(ifield)))
1826 
1827  do l=mg_h1_lmax,1,-1
1828  nx = mg_nh(l)
1829  ny = mg_nh(l)
1830  nz = mg_nhz(l)
1831 
1832  p_msk = p_mg_msk(l,mg_fld)
1833 
1834  call h1mg_setup_mask &
1835  (mg_imask(p_msk),nm,nx,ny,nz,nelfld(ifield),l,work)
1836 
1837  if (l > 1) p_mg_msk(l-1,mg_fld)=p_mg_msk(l,mg_fld)+nm
1838 
1839  enddo
1840 
1841  p_msk = p_mg_msk(l0,mg_fld)
1842 
1843  return
1844 end subroutine mg_set_msk
1845 
1846 !----------------------------------------------------------------------
1847 subroutine h1mg_setup_mask(mask,nm,nx,ny,nz,nel,l,w)
1848  use kinds, only : dp
1849  use size_m, only : nid
1850  use input, only : if3d
1851  implicit none
1852 
1853  integer :: mask(*) ! Pointer to Dirichlet BCs
1854  integer :: nx,ny,nz,nel,l
1855  real(DP) :: w(nx,ny,nz,nel)
1856 
1857  integer :: e,count,ptr
1858  integer :: lbr,rbr,lbs,rbs,lbt,rbt,two
1859  integer :: nxyz, n, ierrmx, i, ierr, nm
1860  integer, external :: iglmax
1861  real(DP) :: zero
1862  real(DP) :: w_flat(nx*ny*nz)
1863 
1864  zero = 0
1865  nxyz = nx*ny*nz
1866  n = nx*ny*nz*nel
1867 
1868  w = 1._dp ! Init everything to 1
1869 
1870  ierrmx = 0 ! BC verification
1871  two = 2
1872  do e=1,nel ! Set dirichlet nodes to zero
1873 
1874  call get_fast_bc(lbr,rbr,lbs,rbs,lbt,rbt,e,two,ierr)
1875  ! write(6,6) e,lbr,rbr,lbs,rbs,ierr,nx
1876  ! 6 format(i5,2x,4i3,2x,i2,3x,i5,' lbr,rbr,lbs')
1877 
1878  if (lbr == 1) call facev(w,e,4,zero,nx,ny,nz)
1879  if (rbr == 1) call facev(w,e,2,zero,nx,ny,nz)
1880  if (lbs == 1) call facev(w,e,1,zero,nx,ny,nz)
1881  if (rbs == 1) call facev(w,e,3,zero,nx,ny,nz)
1882  if (if3d) then
1883  if (lbt == 1) call facev(w,e,5,zero,nx,ny,nz)
1884  if (rbt == 1) call facev(w,e,6,zero,nx,ny,nz)
1885  endif
1886  ierrmx = max(ierrmx,ierr)
1887  enddo
1888 
1889  call hsmg_dsprod(w,l) ! direct stiffness multiply
1890 
1891 
1892 ! Prototypical mask layout, nel=5:
1893 
1894 ! e=1 ... 10
1895 ! 1 2 3 4 5 ... 10 | 11 12 13 14 | 15 | 16 |
1896 ! 11 15 16 ... | 3 p1 p2 p3 | 0 | 0 | ...
1897 ! ^
1898 ! |
1899 ! |_count for e=1
1900 
1901 
1902  nm = 1 ! store mask
1903  do e=1,nel
1904 
1905  mask(e) = nel+nm
1906  count = 0 ! # Dirchlet points on element e
1907  ptr = mask(e)
1908 
1909  w_flat = reshape(w(:,:,:,e), (/nxyz/))
1910  do i=1,nxyz
1911  if (w_flat(i) == 0) then
1912  nm = nm +1
1913  count = count+1
1914  ptr = ptr +1
1915  mask(ptr) = i + nxyz*(e-1) ! where I mask on element e
1916  endif
1917  enddo
1918 
1919 
1920  ptr = mask(e)
1921  mask(ptr) = count
1922 
1923  nm = nm+1 ! bump pointer to hold next count
1924 
1925  enddo
1926 
1927  nm = nel + nm-1 ! Return total number of mask pointers/counters
1928 
1929  ierrmx = iglmax(ierrmx,1)
1930  if (ierrmx > 0) then
1931  if (ierr > 0) write(6,*) nid,ierr,' BC FAIL h1'
1932  call exitti('D INVALID BC FOUND in h1mg_setup_mask$',ierrmx)
1933  endif
1934 
1935  return
1936 end subroutine h1mg_setup_mask
1937 
1938 !-----------------------------------------------------------------------
1939 subroutine h1mg_setup_schwarz_wt_2(wt,ie,n,work,ifsqrt)
1940  use kinds, only : dp
1941  use size_m, only : ndim
1942  implicit none
1943  real(DP) :: wt(1),work(1)
1944  integer :: ie, n
1945  logical :: ifsqrt
1946 
1947 !max if (ndim == 2) call h1mg_setup_schwarz_wt2d_2(wt,ie,n,work,ifsqrt)
1948  if (ndim == 3) call h1mg_setup_schwarz_wt3d_2(wt,ie,n,work,ifsqrt)
1949 
1950  return
1951 end subroutine h1mg_setup_schwarz_wt_2
1952 
1953 !----------------------------------------------------------------------
1954 subroutine h1mg_setup_schwarz_wt3d_2(wt,ie,n,work,ifsqrt)
1955  use kinds, only : dp
1956  use size_m, only : nelv
1957  implicit none
1958 
1959  logical :: ifsqrt
1960  integer :: ie, n
1961  real(DP) :: wt(n,n,4,3,nelv)
1962  real(DP) :: work(n,n,n)
1963 
1964  integer :: i,j,k, ii, ierr
1965 
1966  ierr = 0
1967  do k=1,n
1968  do j=1,n
1969  wt(j,k,1,1,ie)=1.0/work(1,j,k)
1970  wt(j,k,2,1,ie)=1.0/work(2,j,k)
1971  wt(j,k,3,1,ie)=1.0/work(n-1,j,k)
1972  wt(j,k,4,1,ie)=1.0/work(n,j,k)
1973  enddo
1974  enddo
1975  do k=1,n
1976  do i=1,n
1977  wt(i,k,1,2,ie)=1.0/work(i,1,k)
1978  wt(i,k,2,2,ie)=1.0/work(i,2,k)
1979  wt(i,k,3,2,ie)=1.0/work(i,n-1,k)
1980  wt(i,k,4,2,ie)=1.0/work(i,n,k)
1981  enddo
1982  enddo
1983  do j=1,n
1984  do i=1,n
1985  wt(i,j,1,3,ie)=1.0/work(i,j,1)
1986  wt(i,j,2,3,ie)=1.0/work(i,j,2)
1987  wt(i,j,3,3,ie)=1.0/work(i,j,n-1)
1988  wt(i,j,4,3,ie)=1.0/work(i,j,n)
1989  enddo
1990  enddo
1991  if(ifsqrt) then
1992  do ii=1,3
1993  do k=1,4
1994  do j=1,4
1995  do i=1,n
1996  wt(i,j,k,ii,ie)=sqrt(wt(i,j,k,ii,ie))
1997  enddo
1998  enddo
1999  enddo
2000  enddo
2001  endif
2002 
2003  return
2004 end subroutine h1mg_setup_schwarz_wt3d_2
2005 
2006 !----------------------------------------------------------------------
2007 subroutine h1mg_setup_schwarz_wt_1(wt,l,ifsqrt)
2008  use kinds, only : dp
2009  use input, only : if3d
2010  use hsmg, only : mg_h1_n, p_mg_msk, mg_nh, mg_fld
2011  use tstep, only : ifield, nelfld
2012  implicit none
2013 
2014  real(DP) :: wt(1)
2015  integer :: l
2016  logical :: ifsqrt
2017 
2018  integer :: enx,eny,enz,pm, n, ns, i, nx, ny, nz, nxyz, k, ie
2019  real(DP) :: zero, one, onem
2020  real(DP), allocatable :: work(:)
2021 
2022  zero = 0
2023  one = 1
2024  onem = -1
2025 
2026  n = mg_h1_n(l,mg_fld)
2027  pm = p_mg_msk(l,mg_fld)
2028 
2029  enx=mg_nh(l)+2
2030  eny=mg_nh(l)+2
2031  enz=mg_nh(l)+2
2032  if( .NOT. if3d) enz=1
2033  ns = enx*eny*enz*nelfld(ifield)
2034  i = ns+1
2035 
2036  allocate(work(2*ns))
2037  work(1:ns) = 0._dp; work(i:i+ns-1) = 1._dp
2038 
2039 ! Sum overlap region (border excluded)
2040  call hsmg_extrude(work,0,zero,work(i),0,one ,enx,eny,enz)
2041  call hsmg_schwarz_dssum(work(i),l)
2042  call hsmg_extrude(work(i),0,one ,work,0,onem,enx,eny,enz)
2043  call hsmg_extrude(work(i),2,one, work(i),0,one,enx,eny,enz)
2044 
2045  if( .NOT. if3d) then ! Go back to regular size array
2046 !max call hsmg_schwarz_toreg2d(mg_work,mg_work(i),mg_nh(l))
2047  else
2048  call hsmg_schwarz_toreg3d(work,work(i),mg_nh(l))
2049  endif
2050 
2051  call hsmg_dssum(work,l) ! sum border nodes
2052 
2053 
2054  nx = mg_nh(l)
2055  ny = mg_nh(l)
2056  nz = mg_nh(l)
2057  if ( .NOT. if3d) nz=1
2058  nxyz = nx*ny*nz
2059  k = 1
2060  do ie=1,nelfld(ifield)
2061  ! call outmat(mg_work(k),nx,ny,'NEW WT',ie)
2062  call h1mg_setup_schwarz_wt_2(wt,ie,nx,work(k),ifsqrt)
2063  k = k+nxyz
2064  enddo
2065 ! stop
2066 
2067  return
2068 end subroutine h1mg_setup_schwarz_wt_1
2069 
2070 !----------------------------------------------------------------------
2071 end module hsmg_routines
subroutine h1mg_setup_wtmask
Definition: hsmg.F90:342
subroutine, public hsmg_setup()
Sets up hybrid Schwarz multi-grid preconditioner.
Definition: hsmg.F90:50
subroutine hsmg_setup_fast1d_a(a, lbc, rbc, ll, lm, lr, ah, n)
Definition: hsmg.F90:979
subroutine hsmg_tnsr1_3d(v, nv, nu, A, Bt, Ct)
compute v = [C (x) B (x) A] v (in-place)
Definition: hsmg.F90:1618
subroutine row_zero(a, m, n, e)
zero the eth row of a
Definition: fast3d.F90:527
subroutine hsmg_setup_rstr_wt(wt, nx, ny, nz, l, w)
Definition: hsmg.F90:1211
subroutine get_fast_bc(lbr, rbr, lbs, rbs, lbt, rbt, e, bsym, ierr)
Definition: fast3d.F90:192
cleaned
Definition: tstep_mod.F90:2
subroutine hsmg_setup_fast1d_b(b, lbc, rbc, ll, lm, lr, bh, n)
Definition: hsmg.F90:1025
subroutine hsmg_schwarz_toreg3d(b, a, n)
Definition: hsmg.F90:770
Input parameters from preprocessors.
Definition: input_mod.F90:11
subroutine hsmg_dsprod(u, l)
Definition: hsmg.F90:514
subroutine hsmg_fdm(e, r, l)
clobbers r
Definition: hsmg.F90:1066
subroutine mxm(a, n1, b, n2, c, n3)
Compute matrix-matrix product C = A*B.
Definition: mxm_wrapper.F90:7
subroutine hsmg_tnsr3d(v, nv, u, nu, A, Bt, Ct)
computes: v = [C (x) B (x) A] u .
Definition: hsmg.F90:437
subroutine hsmg_intp(uf, uc, l)
Definition: hsmg.F90:407
subroutine h1mg_schwarz_part1(e, r, l)
Definition: hsmg.F90:658
subroutine h1mg_setup_dssum
Definition: hsmg.F90:1772
subroutine hsmg_schwarz_dssum(u, l)
Definition: hsmg.F90:529
Module containing data for HSMG.
Definition: hsmg_mod.F90:3
#define crs_solve
Definition: crs.h:9
subroutine geom_reset(icall)
Generate geometry data.
Definition: ic.F90:1513
void exitt()
Definition: comm_mpi.F90:411
subroutine dsavg(u)
Take direct stiffness avg of u.
Definition: ic.F90:1559
subroutine hsmg_index_0
initialize index sets
Definition: hsmg.F90:1538
cleaned
Definition: semhat_mod.F90:2
subroutine hsmg_setup_intp
Definition: hsmg.F90:258
subroutine hsmg_extrude(arr1, l1, f1, arr2, l2, f2, nx, ny, nz)
Definition: hsmg.F90:549
subroutine hsmg_setup_fast(s, d, nl, ah, bh, n)
not sure
Definition: hsmg.F90:870
subroutine hsmg_schwarz_wt3d(e, wt, n)
Definition: hsmg.F90:1384
subroutine fd_weights_full(xx, x, n, m, c)
This routine evaluates the derivative based on all points in the stencils. It is more memory efficien...
Definition: fast3d.F90:409
subroutine hsmg_do_fast(e, r, s, d, nl)
clobbers r
Definition: hsmg.F90:1082
cleaned
Definition: mesh_mod.F90:2
real(dp) function dnekclock()
Definition: ctimer_mod.F90:103
subroutine h1mg_setup_mask(mask, nm, nx, ny, nz, nel, l, w)
Definition: hsmg.F90:1847
subroutine copy(a, b, n)
Definition: math.F90:52
subroutine hsmg_schwarz_toext3d(a, b, n)
Definition: hsmg.F90:741
subroutine mg_set_msk(p_msk, l0)
Definition: hsmg.F90:1810
subroutine h1mg_setup_fdm()
Definition: hsmg.F90:795
subroutine hsmg_setup_fast1d(s, lam, nl, lbc, rbc, ll, lm, lr, ah, bh, n, ie)
Definition: hsmg.F90:952
subroutine hsmg_do_wt(u, wt, nx, ny, nz)
u = wt .* u
Definition: hsmg.F90:1148
subroutine, public h1mg_solve(z, rhs, if_hybrid)
Solve preconditioner: z = M rhs, where .
Definition: hsmg.F90:111
subroutine h1mg_schwarz(e, r, sigma, l)
Definition: hsmg.F90:628
subroutine, public spectral_solve(u, rhs)
Definition: poisson_mod.F90:48
subroutine h1mg_setup_mg_nx()
Definition: hsmg.F90:1685
subroutine h1mg_setup_schwarz_wt3d_2(wt, ie, n, work, ifsqrt)
Definition: hsmg.F90:1954
subroutine generalev(a, b, lam, n, w)
Solve the generalized eigenvalue problem A x = lam B x.
Definition: hmholtz.F90:1101
subroutine hsmg_setup_mg_nx()
Definition: hsmg.F90:1483
subroutine h1mg_rstr(r, l, ifdssum)
r =J r, l is coarse level
Definition: hsmg.F90:1665
subroutine h1mg_setup_schwarz_wt(ifsqrt)
Definition: hsmg.F90:1331
subroutine setupds(gs_handle, nx, ny, nz, nel, melg, vertex, glo_num)
setup data structures for direct stiffness operations
Definition: dssum.F90:3
subroutine h1mg_setup_semhat
SEM hat matrices for each level.
Definition: hsmg.F90:1748
subroutine hsmg_setup_semhat
Definition: hsmg.F90:222
subroutine generate_semhat(a, b, c, d, z, dgll, jgll, bgl, zgl, dgl, jgl, n, w)
Generate matrices for single element, 1D operators:
Definition: fast3d.F90:328
subroutine h1mg_setup_schwarz_wt_1(wt, l, ifsqrt)
Definition: hsmg.F90:2007
subroutine h1mg_setup_schwarz_wt_2(wt, ie, n, work, ifsqrt)
Definition: hsmg.F90:1939
subroutine hsmg_setup_solve
Definition: hsmg.F90:1460
subroutine, public h1mg_setup()
Sets up Poisson preconditioner.
Definition: hsmg.F90:74
cleaned
Definition: parallel_mod.F90:2
subroutine h1mg_mask(w, mask, nel)
Definition: hsmg.F90:1558
subroutine hsmg_coarse_solve(e, r)
Definition: hsmg.F90:1421
#define gs_op
Definition: gs.c:15
subroutine nekgsync()
Definition: comm_mpi.F90:318
subroutine hsmg_tnsr(v, nv, u, nu, A, At)
computes v = [A (x) A] u or v = [A (x) A (x) A] u
Definition: hsmg.F90:419
subroutine hsmg_dssum(u, l)
Definition: hsmg.F90:493
subroutine hsmg_setup_fdm()
Definition: hsmg.F90:832
subroutine hsmg_tnsr1(v, nv, nu, A, At)
compute v = [A (x) A] u or v = [A (x) A (x) A] u
Definition: hsmg.F90:1600
subroutine hsmg_setup_wtmask
Definition: hsmg.F90:374
subroutine hsmg_setup_dssum
Definition: hsmg.F90:303
subroutine facev(a, ie, iface, val, nx, ny, nz)
Assign the value VAL to face(IFACE,IE) of array A. IFACE is the input in the pre-processor ordering s...
Definition: connect1.F90:1041
subroutine hsmg_setup_intpm(jh, zf, zc, nf, nc)
Definition: hsmg.F90:284
subroutine exitti(stringi, idata)
Definition: comm_mpi.F90:328
subroutine hsmg_tnsr3d_el(v, nv, u, nu, A, Bt, Ct)
computes v = [C (x) B (x) A] u
Definition: hsmg.F90:467
subroutine hsmg_setup_schwarz_wt(ifsqrt)
Definition: hsmg.F90:1297
subroutine hsmg_schwarz_wt(e, l)
Definition: hsmg.F90:1366
subroutine mg_mask_e(w, mask)
Definition: hsmg.F90:1582