Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
poisson_mod.F90
Go to the documentation of this file.
1 !==============================================================================
9 module poisson
10  use, intrinsic :: iso_c_binding
11  use kinds, only : dp
12  implicit none
13 
14  public spectral_solve
15  private
16 
17  integer :: comm_xy, comm_yz
18  logical, save :: interface_initialized = .false.
19  logical, save :: mesh_to_grid_initialized = .false.
20 
21  integer :: alloc_local_xy, nin_local_xy, nout_local_xy, idx_in_local_xy, idx_out_local_xy
22  integer :: alloc_local_yz, nin_local_yz, nout_local_yz, idx_in_local_yz, idx_out_local_yz
23 
24  type real_p
25  real(DP), allocatable :: p(:)
26  end type real_p
27  type int_p
28  integer, allocatable :: p(:)
29  end type int_p
30 
31  type(real_p), allocatable :: send_buffers(:)
32  type(real_p), allocatable :: rec_buffers(:)
33  integer, allocatable :: dest_pids(:)
34  integer, allocatable :: dest_slots(:)
35  integer, allocatable :: dest_indexes(:)
36  integer, allocatable :: dest_lengths(:)
37 
38  integer, allocatable :: src_pids(:)
39  integer, allocatable :: src_lengths(:)
40  integer, allocatable :: src_slots(:,:,:)
41  integer, allocatable :: src_indexes(:,:,:)
42 
43  integer :: comm_size
44 
45 contains
46 
48 subroutine spectral_solve(u,rhs)!,h1,mask,mult,imsh,isd)
49  use kinds, only : dp
50  use geom, only : bm1
51  use mesh, only : shape_x, start_x, end_x
52  use parallel, only : nekcomm, lglel
53  use soln, only : vmult
54  use size_m, only : nx1, ny1, nz1
55  use ctimer, only : nscps, tscps, dnekclock
56 
57  use fft, only : p_forward, p_backward, w_forward, w_backward
58  use fft, only : fft_r2r, transpose_grid
59  use mesh, only : boundaries
60 
61  REAL(DP), intent(out) :: U (:)
62  REAL(DP), intent(inout) :: RHS (:)
63 
64  real(DP), allocatable :: rhs_coarse(:), soln_coarse(:)
65  real(DP), allocatable :: tmp_fine(:,:,:,:)
66  integer :: nelm
67  integer :: i
68  real(DP), allocatable :: plane_xy(:,:,:), plane_yx(:,:,:), plane_zy(:,:,:)
69  real(DP) :: rescale
70  real(DP) :: etime
71 
72 
73  nelm = size(rhs) / 8
74 
75  if (.not. interface_initialized) then
76  call init_comm_infrastructure(nekcomm, shape_x)
77  endif
78 
79  nscps = nscps + 1
80 
81  ! map onto fine mesh to use dssum
83  allocate(tmp_fine(nx1, ny1, nz1, nelm))
84  tmp_fine = 0._dp
85  forall (i = 1: nelm)
86  tmp_fine(1, 1, 1, i) = rhs(1 + (i-1)*8)
87  tmp_fine(nx1,1, 1, i) = rhs(2 + (i-1)*8)
88  tmp_fine(1, ny1,1, i) = rhs(3 + (i-1)*8)
89  tmp_fine(nx1,ny1,1, i) = rhs(4 + (i-1)*8)
90  tmp_fine(1, 1, nz1, i) = rhs(5 + (i-1)*8)
91  tmp_fine(nx1,1, nz1, i) = rhs(6 + (i-1)*8)
92  tmp_fine(1, ny1,nz1, i) = rhs(7 + (i-1)*8)
93  tmp_fine(nx1,ny1,nz1, i) = rhs(8 + (i-1)*8)
94  end forall
95  call dssum(tmp_fine)
96 
97  ! convert RHS to coarse mesh
98  allocate(rhs_coarse(nelm))
99  !forall(i = 1 : nelm) rhs_coarse(i) = (tmp_fine(1,1,1,i) + tmp_fine(1,1,nz1,i))/2._dp
100  forall(i = 1 : nelm) rhs_coarse(i) = ( &
101  + tmp_fine(1,1,1,i) &
102  + tmp_fine(nx1,1,1,i) &
103  + tmp_fine(1,ny1,1,i) &
104  + tmp_fine(nx1,ny1,1,i) &
105  + tmp_fine(1,1,nz1,i) &
106  + tmp_fine(nx1,1,nz1,i) &
107  + tmp_fine(1,ny1,nz1,i) &
108  + tmp_fine(nx1,ny1,nz1,i) &
109  )/8._dp
110 
111  ! reorder onto sticks
112  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1) )
113 
114  call mesh_to_grid(rhs_coarse, plane_xy, shape_x)
115  etime = dnekclock()
116 
117  ! forward FFT
118  rescale = 1._dp
119  if (boundaries(2) == 'P ') then
120  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), p_forward, rescale)
121  else
122  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), w_forward, rescale)
123  endif
124 
125  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
126  call transpose_grid(plane_xy, plane_yx, shape_x, 1, 2, comm_xy)
127  deallocate(plane_xy)
128 
129  if (boundaries(1) == 'P ') then
130  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), p_forward, rescale)
131  else
132  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), w_forward, rescale)
133  endif
134 
135  allocate(plane_zy(0:shape_x(3)-1, 0:nout_local_xy-1, 0:nout_local_yz-1) )
136  call transpose_grid(plane_yx, plane_zy, shape_x, 2, 3, comm_yz)
137  deallocate(plane_yx)
138 
139  if (boundaries(5) == 'P ') then
140  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), p_forward, rescale)
141  else
142  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), w_forward, rescale)
143  endif
144 
145  ! Poisson kernel
146  call poisson_kernel(plane_zy, shape_x, start_x, end_x, boundaries)
147 
148  ! reverse FFT
149  if (boundaries(5) == 'P ') then
150  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), p_backward, rescale)
151  else
152  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), w_backward, rescale)
153  endif
154 
155  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
156  call transpose_grid(plane_zy, plane_yx, shape_x, 3, 2, comm_yz)
157  deallocate(plane_zy)
158 
159  if (boundaries(1) == 'P ') then
160  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), p_backward, rescale)
161  else
162  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), w_backward, rescale)
163  endif
164 
165  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1))
166  call transpose_grid(plane_yx, plane_xy, shape_x, 2, 1, comm_xy)
167  deallocate(plane_yx)
168 
169  if (boundaries(2) == 'P ') then
170  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), p_backward, rescale)
171  else
172  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), w_backward, rescale)
173  endif
174 
175  ! normalize the FFTs
176  rescale = 1._dp / (rescale * sum(bm1(:,:,:,1)))
177  plane_xy = plane_xy * rescale
178 
179  tscps = tscps + (dnekclock() - etime)
180  ! reorder to local elements
181  allocate(soln_coarse(nelm)); soln_coarse = 0._dp
182  call grid_to_mesh(plane_xy, soln_coarse, shape_x)
183 
184  ! populate U
185  tmp_fine = 0._dp
186  forall (i = 1: nelm)
187  tmp_fine(1, 1, 1, i) = soln_coarse(i)
188  tmp_fine(nx1, 1, 1, i) = soln_coarse(i)
189  tmp_fine(1 , ny1, 1, i) = soln_coarse(i)
190  tmp_fine(nx1, ny1, 1, i) = soln_coarse(i)
191  tmp_fine(1, 1, nz1, i) = soln_coarse(i)
192  tmp_fine(nx1, 1, nz1, i) = soln_coarse(i)
193  tmp_fine(1 , ny1, nz1, i) = soln_coarse(i)
194  tmp_fine(nx1, ny1, nz1, i) = soln_coarse(i)
195  end forall
196  call dssum(tmp_fine)
197  tmp_fine = tmp_fine * vmult
198 
199  ! extract coarse values
200  u = 0._dp
201  forall (i = 1: nelm)
202  u(1+(i-1)*8) = tmp_fine(1, 1, 1, i)
203  u(2+(i-1)*8) = tmp_fine(nx1,1, 1, i)
204  u(3+(i-1)*8) = tmp_fine(1, ny1,1, i)
205  u(4+(i-1)*8) = tmp_fine(nx1,ny1,1, i)
206  u(5+(i-1)*8) = tmp_fine(1, 1, nz1, i)
207  u(6+(i-1)*8) = tmp_fine(nx1,1, nz1, i)
208  u(7+(i-1)*8) = tmp_fine(1, ny1,nz1, i)
209  u(8+(i-1)*8) = tmp_fine(nx1,ny1,nz1, i)
210  end forall
211 
212 
213  return
214 
215 end subroutine spectral_solve
216 
218 subroutine init_comm_infrastructure(comm_world, shape_x)
219  use input, only : param
220  integer, intent(in) :: comm_world !>
221  integer, intent(in) :: shape_x(3) !>
222 
223  integer(C_INTPTR_T) :: shape_c(3)
224  integer :: nxy, nyz, ixy, iyz
225  integer :: nid, ierr, i
226 
227  call mpi_comm_rank(comm_world, nid, ierr)
228  call mpi_comm_size(comm_world, comm_size, ierr)
229  !nid = nid/2 + mod(nid, 2) * comm_size / 2
230 
231  if (param(49) >= 1) then
232  comm_size = min(comm_size, int(param(49)))
233  endif
234  comm_size = min(comm_size, shape_x(1) * shape_x(2))
235  comm_size = min(comm_size, shape_x(2) * shape_x(3))
236  comm_size = min(comm_size, shape_x(1) * shape_x(3))
237 
238  nxy = int(2**int(log(real(comm_size))/log(2.) / 2))
239  comm_size = nxy*nxy
240  if (nid < comm_size) then
241  ixy = ((nxy*nid/comm_size) * shape_x(3)) / nxy
242  else
243  ixy = comm_size + 1
244  endif
245  call mpi_comm_split(comm_world, ixy, 0, comm_xy, ierr)
246  if (ierr /= 0) write(*,*) "Comm split xy failed", nid
247  if (nid >= comm_size) then
248  call mpi_comm_free(comm_xy, ierr)
249  if (ierr /= 0) write(*,*) "Comm free xy failed", nid
250  endif
251 
252  nyz = comm_size/nxy
253  if (nid < comm_size) then
254  iyz = (mod(nid,nyz) * shape_x(2)) / nyz
255  else
256  iyz = comm_size + 1
257  endif
258  call mpi_comm_split(comm_world, iyz, 0, comm_yz, ierr)
259  if (ierr /= 0) write(*,*) "Comm split yz failed", nid
260  if (nid >= comm_size) then
261  call mpi_comm_free(comm_yz, ierr)
262  if (ierr /= 0) write(*,*) "Comm free yz failed", nid
263  endif
264 
265  if (nid < comm_size) then
266  shape_c = shape_x
267  idx_in_local_xy = (mod(nid,nxy) * shape_x(2)) / nxy
268  idx_out_local_xy = (mod(nid,nxy) * shape_x(1)) / nxy
269  idx_in_local_yz = ((nxy*nid/comm_size) * shape_x(3)) / nyz
270  idx_out_local_yz = ((nxy*nid/comm_size) * shape_x(2)) / nyz
271  nin_local_yz = shape_x(3) / nxy; nout_local_yz = shape_x(2)/nxy
272  nin_local_xy = shape_x(2) / nyz; nout_local_xy = shape_x(1)/nyz
273 
274  else
275  nin_local_xy = 0; nout_local_xy = 0
276  nin_local_yz = 0; nout_local_yz = 0
277  idx_in_local_xy = -1; idx_out_local_xy = -1
278  idx_in_local_yz = -1; idx_out_local_yz = -1
279  alloc_local_xy = 0; alloc_local_yz = 0
280  endif
281 
282 #if 1
283  do i = 0, 65
284  call nekgsync()
285  if (nid == i) write(*,'(A,15(I5))') "MAX:", nid, alloc_local_xy, nin_local_xy, nout_local_xy, &
286  alloc_local_yz, nin_local_yz, nout_local_yz, &
287  idx_in_local_xy, idx_out_local_xy, idx_in_local_yz, idx_out_local_yz, &
288  nxy, nyz, ixy, iyz
289  !if (nid == i) write(*,'(A,6(I5))') "MAX:", nid, alloc_local_xy, nin_local_xy, nout_local_xy, idx_in_local_xy, idx_out_local_xy
290  enddo
291  call nekgsync()
292 ! write(*,'(A,6(I5))') "MAX:", nid, alloc_local_yz, nin_local_yz, idx_in_local_yz, nout_local_yz, idx_out_local_yz
293 #endif
294 
295  if (param(75) < 1) then
296  call transpose_test()
297  call shuffle_test()
298  call cos_test()
299  endif
300 
301  interface_initialized = .true.
302 
303 end subroutine init_comm_infrastructure
304 
305 integer function xyz_to_pid(ix, iy, iz, shape_x, shape_p)
306  integer, intent(in) :: ix, iy, iz
307  integer, intent(in) :: shape_x(3)
308  integer, intent(in) :: shape_p(2)
309 
310  xyz_to_pid = (iz * shape_p(2) / shape_x(3)) * shape_p(1) + (iy * shape_p(1) / shape_x(2))
311  !xyz_to_pid = xyz_to_pid * 2
312 
313 end function
314 
315 subroutine init_mesh_to_grid(nelm, shape_x)
316  use parallel, only : lglel, gllnid, nid
317  use mesh, only : ieg_to_xyz
318  integer, intent(in) :: nelm
319  integer, intent(in) :: shape_x(3)
320 
321  integer :: i, j, ieg, src_pid, dest_pid, npids, slot
322  integer :: idx, idy, idz
323  integer :: ix(3)
324  integer :: shape_p(2)
325 
326  shape_p(1) = int(sqrt(real(comm_size)))
327  shape_p(2) = int(sqrt(real(comm_size)))
328 
329  allocate(dest_pids(nelm))
330  npids = 0
331  do i = 1, nelm
332  ieg = lglel(i)
333  ix = ieg_to_xyz(ieg)
334  dest_pid = xyz_to_pid(ix(1), ix(2), ix(3), shape_x, shape_p)
335  slot = -1
336  do j = 1, npids
337  if (dest_pid == dest_pids(j)) then
338  slot = j
339  exit
340  endif
341  enddo
342  if (slot == -1) then
343  npids = npids + 1
344  slot = npids
345  dest_pids(slot) = dest_pid
346  endif
347  enddo
348  deallocate(dest_pids)
349  allocate(dest_pids(npids))
350  allocate(dest_lengths(npids))
351  allocate(dest_slots(nelm))
352  allocate(dest_indexes(nelm))
353  npids = 0
354  dest_lengths = 0
355  do i = 1, nelm
356  ieg = lglel(i)
357  ix = ieg_to_xyz(ieg)
358  dest_pid = xyz_to_pid(ix(1), ix(2), ix(3), shape_x, shape_p)
359  slot = -1
360  do j = 1, npids
361  if (dest_pid == dest_pids(j)) then
362  slot = j
363  exit
364  endif
365  enddo
366  if (slot == -1) then
367  npids = npids + 1
368  slot = npids
369  dest_pids(slot) = dest_pid
370  endif
371  dest_slots(i) = slot
372  dest_lengths(slot) = dest_lengths(slot) + 1
373  dest_indexes(i) = dest_lengths(slot)
374  enddo
375 
376  allocate(src_pids(shape_x(1) * nin_local_xy * nin_local_yz))
377  npids = 0
378  do idz = idx_in_local_yz, idx_in_local_yz + nin_local_yz - 1
379  do idy = idx_in_local_xy, idx_in_local_xy + nin_local_xy - 1
380  do idx = 0, shape_x(1)-1
381  ieg = 1 + idx + idy * shape_x(1) + idz * shape_x(1) * shape_x(2)
382  src_pid = gllnid(ieg)
383  slot = -1
384  do j = 1, npids
385  if (src_pid == src_pids(j)) then
386  slot = j
387  exit
388  endif
389  enddo
390  if (slot == -1) then
391  npids = npids + 1
392  slot = npids
393  src_pids(slot) = src_pid
394  endif
395  enddo
396  enddo
397  enddo
398  deallocate(src_pids)
399 
400  allocate(src_pids(npids))
401  allocate(src_lengths(npids))
402  allocate(src_slots(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1))
403  allocate(src_indexes(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1))
404  npids = 0
405  src_lengths = 0
406  do idz = idx_in_local_yz, idx_in_local_yz + nin_local_yz - 1
407  do idy = idx_in_local_xy, idx_in_local_xy + nin_local_xy - 1
408  do idx = 0, shape_x(1)-1
409  ieg = 1 + idx + idy * shape_x(1) + idz * shape_x(1) * shape_x(2)
410  src_pid = gllnid(ieg)
411  slot = -1
412  do j = 1, npids
413  if (src_pid == src_pids(j)) then
414  slot = j
415  exit
416  endif
417  enddo
418  if (slot == -1) then
419  npids = npids + 1
420  slot = npids
421  src_pids(slot) = src_pid
422  endif
423  src_slots(idx,idy-idx_in_local_xy, idz-idx_in_local_yz) = slot
424  src_lengths(slot) = src_lengths(slot) + 1
425  src_indexes(idx,idy-idx_in_local_xy, idz-idx_in_local_yz) = src_lengths(slot)
426  enddo
427  enddo
428  enddo
429 
430  allocate(send_buffers(size(dest_pids)))
431  do i = 1, size(dest_lengths)
432  allocate(send_buffers(i)%p(dest_lengths(i)))
433  enddo
434  allocate(rec_buffers(size(src_pids)))
435  do i = 1, size(src_lengths)
436  allocate(rec_buffers(i)%p(src_lengths(i)))
437  enddo
438 
439  call nekgsync()
440  if (nid == 0) write(*,*) "Finished init", nelm
441 
442 end subroutine init_mesh_to_grid
443 
444 subroutine mesh_to_grid(mesh, grid, shape_x)
445  use kinds, only : dp
446  use parallel, only : nekcomm, nekreal
447  use parallel, only : lglel, gllnid
448  use mpif, only : mpi_status_ignore
449 
450  real(DP), intent(in) :: mesh(:)
451  real(DP), intent(out) :: grid(0:,0:,0:)
452  integer, intent(in) :: shape_x(3)
453 
454  integer, allocatable :: mpi_reqs(:)
455  integer :: n_mpi_reqs
456  integer :: i, idx, idy, idz, ierr
457  integer :: nelm
458  integer :: slot, index_in_slot
459  nelm = size(mesh)
460 
461  if (.not. mesh_to_grid_initialized) then
462  call init_mesh_to_grid(nelm, shape_x)
463  mesh_to_grid_initialized = .true.
464  endif
465 
466  ! go through our stuff
467  do i = 1, nelm
468  send_buffers(dest_slots(i))%p(dest_indexes(i)) = mesh(i)
469  enddo
470 
471  allocate(mpi_reqs(size(src_pids)+size(dest_pids))); n_mpi_reqs = 0
472  do i = 1, size(dest_pids)
473  n_mpi_reqs = n_mpi_reqs + 1
474  call mpi_isend(send_buffers(i)%p, dest_lengths(i), &
475  nekreal, dest_pids(i), 0, nekcomm, mpi_reqs(n_mpi_reqs), ierr)
476  enddo
477 
478  do i = 1, size(src_pids)
479  n_mpi_reqs = n_mpi_reqs + 1
480  call mpi_irecv(rec_buffers(i)%p, src_lengths(i), &
481  nekreal, src_pids(i), 0, nekcomm, mpi_reqs(n_mpi_reqs), ierr)
482  enddo
483 
484  do i = 1, n_mpi_reqs
485  call mpi_wait(mpi_reqs(i), mpi_status_ignore, ierr)
486  enddo
487 
488 
489  do idx = 0, shape_x(1)-1
490  do idy = idx_in_local_xy, idx_in_local_xy + nin_local_xy - 1
491  do idz = idx_in_local_yz, idx_in_local_yz + nin_local_yz - 1
492  slot = src_slots(idx, idy-idx_in_local_xy, idz-idx_in_local_yz)
493  index_in_slot = src_indexes(idx, idy-idx_in_local_xy, idz-idx_in_local_yz)
494  grid(idx, idy-idx_in_local_xy, idz-idx_in_local_yz) = &
495  rec_buffers(slot)%p(index_in_slot)
496  enddo
497  enddo
498  enddo
499 
500 end subroutine mesh_to_grid
501 
502 subroutine grid_to_mesh(grid, mesh, shape_x)
503  use kinds, only : dp
504  use parallel, only : nekcomm, nekreal
505  use parallel, only : lglel, gllel, gllnid
506  use mpif, only : mpi_status_ignore
507 
508  real(DP), intent(in) :: grid(0:,0:,0:)
509  real(DP), intent(out) :: mesh(:)
510  integer, intent(in) :: shape_x(3)
511 
512  integer, allocatable :: mpi_reqs(:)
513  integer :: n_mpi_reqs
514  integer :: i, idx, idy, idz, ierr
515  integer :: slot, index_in_slot
516  integer :: nelm
517  nelm = size(mesh)
518 
519  do idx = 0, shape_x(1)-1
520  do idy = idx_in_local_xy, idx_in_local_xy + nin_local_xy - 1
521  do idz = idx_in_local_yz, idx_in_local_yz + nin_local_yz - 1
522  slot = src_slots(idx, idy-idx_in_local_xy, idz-idx_in_local_yz)
523  index_in_slot = src_indexes(idx, idy-idx_in_local_xy, idz-idx_in_local_yz)
524  rec_buffers(slot)%p(index_in_slot) = grid(idx, idy-idx_in_local_xy, idz-idx_in_local_yz)
525  enddo
526  enddo
527  enddo
528 
529  allocate(mpi_reqs(size(src_pids)+size(dest_pids))); n_mpi_reqs = 0
530  do i = 1, size(src_pids)
531  n_mpi_reqs = n_mpi_reqs + 1
532  call mpi_isend(rec_buffers(i)%p, src_lengths(i), &
533  nekreal, src_pids(i), 0, nekcomm, mpi_reqs(n_mpi_reqs), ierr)
534  enddo
535 
536 
537  do i = 1, size(dest_pids)
538  n_mpi_reqs = n_mpi_reqs + 1
539  call mpi_irecv(send_buffers(i)%p, dest_lengths(i), &
540  nekreal, dest_pids(i), 0, nekcomm, mpi_reqs(n_mpi_reqs), ierr)
541  enddo
542 
543  do i = 1, n_mpi_reqs
544  call mpi_wait(mpi_reqs(i), mpi_status_ignore, ierr)
545  enddo
546 
547 
548  ! go through our stuff
549  do i = 1, nelm
550  mesh(i) = send_buffers(dest_slots(i))%p(dest_indexes(i))
551  enddo
552 
553 end subroutine grid_to_mesh
554 
555 subroutine poisson_kernel(grid, shape_x, start_x, end_x, boundaries)
556  use kinds, only : dp
557  use fft, only : wavenumber, p_forward, w_forward
558 
559  real(DP), intent(inout) :: grid(0:,0:,0:)
560  integer, intent(in) :: shape_x(3)
561  real(DP), intent(in) :: start_x(3)
562  real(DP), intent(in) :: end_x(3)
563  character(3), intent(in) :: boundaries(6)
564  real(DP) :: kx, ky, kz
565  real(DP), allocatable, save :: ks(:,:,:)
566  integer :: idz, idy, idx
567 
568  ! if we don't have the kernel weights, generate them
569  if (.not. allocated(ks)) then
570  allocate(ks(0:shape_x(3)-1, 0:nout_local_xy-1, 0:nout_local_yz-1))
571  do idz = 0, shape_x(3) - 1
572  do idy = 0, nout_local_yz - 1
573  do idx = 0, nout_local_xy - 1
574  if (boundaries(2) == 'P ') then
575  kx = wavenumber(idx + idx_out_local_xy, shape_x(1), &
576  end_x(1)-start_x(1), p_forward)
577  else
578  kx = wavenumber(idx + idx_out_local_xy, shape_x(1), &
579  end_x(1)-start_x(1), w_forward)
580  endif
581 
582  if (boundaries(1) == 'P ') then
583  ky = wavenumber(idy + idx_out_local_yz, shape_x(2), &
584  end_x(2)-start_x(2), p_forward)
585  else
586  ky = wavenumber(idy + idx_out_local_yz, shape_x(2), &
587  end_x(2)-start_x(2), w_forward)
588  endif
589 
590  if (boundaries(5) == 'P ') then
591  kz = wavenumber(idz, shape_x(3), &
592  end_x(3)-start_x(3), p_forward)
593  else
594  kz = wavenumber(idz, shape_x(3), &
595  end_x(3)-start_x(3), w_forward)
596  endif
597 
598  if (kx**2. + ky**2. + kz**2. < 1.e-9_dp) then
599  ks(idz,idx,idy) = 0._dp
600  else
601  ks(idz, idx, idy) = 1._dp / ( &
602  (kz)**2._dp + &
603  (ky)**2._dp + &
604  (kx)**2._dp)
605  endif
606  enddo
607  enddo
608  enddo
609  endif
610 
611  ! vector-vector rescale
612  grid = grid * ks
613 
614 end subroutine poisson_kernel
615 
616 subroutine shuffle_test()
617  use kinds, only : dp
618  use size_m, only : nelv
619  use mesh, only : shape_x
620  use parallel, only : nid
621  use parallel, only : lglel
622 
623  use fft, only :p_forward, p_backward, w_forward, w_backward
624  use fft, only : fft_r2r, transpose_grid
625 
626  real(DP), allocatable :: rhs_coarse(:), soln_coarse(:)
627  integer :: nelm
628  integer :: i
629  real(DP), allocatable :: plane_xy(:,:,:), plane_yx(:,:,:), plane_zy(:,:,:)
630  real(DP) :: err
631  real(DP) :: rescale
632 
633 
634  ! convert RHS to coarse mesh
635  nelm = nelv
636  allocate(rhs_coarse(nelm))
637  do i = 1, nelm
638  rhs_coarse(i) = lglel(i)
639  enddo
640 
641  ! reorder onto sticks
642  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1) )
643  call mesh_to_grid(rhs_coarse, plane_xy, shape_x)
644 
645  ! forward FFT
646  rescale = 1._dp
647  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), p_forward, rescale)
648 
649  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
650  call transpose_grid(plane_xy, plane_yx, shape_x, 1, 2, comm_xy)
651  deallocate(plane_xy)
652 
653  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), p_forward, rescale)
654 
655  allocate(plane_zy(0:shape_x(3)-1, 0:nout_local_xy-1, 0:nout_local_yz-1) )
656  call transpose_grid(plane_yx, plane_zy, shape_x, 2, 3, comm_yz)
657  deallocate(plane_yx)
658 
659  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), w_forward, rescale)
660 
661  ! reverse FFT
662  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), w_backward, rescale)
663 
664  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
665  call transpose_grid(plane_zy, plane_yx, shape_x, 3, 2, comm_yz)
666  deallocate(plane_zy)
667 
668  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), p_backward, rescale)
669 
670  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1))
671  call transpose_grid(plane_yx, plane_xy, shape_x, 2, 1, comm_xy)
672  deallocate(plane_yx)
673 
674  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), p_backward, rescale)
675 
676  plane_xy = plane_xy * (1._dp/ rescale)
677 
678  allocate(soln_coarse(nelm)); soln_coarse = 0._dp
679  ! reorder to local elements
680  call grid_to_mesh(plane_xy, soln_coarse, shape_x)
681 
682  do i = 1, nelm
683  err = abs(soln_coarse(i) - lglel(i))
684  if (err > 0.001) then
685  write(*,*) "WARNING: shuffle not working", nid, i, nid*nelm + i, err
686  exit
687  endif
688  enddo
689  if (nid == 0) write(*,*) "Passed shuffle test"
690 end subroutine shuffle_test
691 
692 subroutine transpose_test()
693  use kinds, only : dp
694  use size_m, only : nelv
695  use mesh, only : shape_x
696  use parallel, only : nid, lglel
697 
698  use fft, only : transpose_grid
699 
700  real(DP), allocatable :: rhs_coarse(:)
701  integer :: nelm
702  integer :: i, ieg
703  integer :: idx, idy, idz
704  real(DP), allocatable :: plane_xy(:,:,:), plane_yx(:,:,:), plane_zy(:,:,:)
705  real(DP) :: err
706  real(DP) :: rescale
707 
708 
709  ! convert RHS to coarse mesh
710  nelm = nelv
711  allocate(rhs_coarse(nelm))
712 
713  do i = 1, nelm
714  rhs_coarse(i) = lglel(i)
715  enddo
716 
717  ! reorder onto sticks
718  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1) )
719 
720  call mesh_to_grid(rhs_coarse, plane_xy, shape_x)
721 
722  do idx = 0, shape_x(1)-1
723  do idy = idx_in_local_xy, idx_in_local_xy + nin_local_xy - 1
724  do idz = idx_in_local_yz, idx_in_local_yz + nin_local_yz - 1
725  ieg = 1 + idx + idy * shape_x(1) + idz * shape_x(1) * shape_x(2)
726  err = abs(plane_xy(idx, idy-idx_in_local_xy, idz-idx_in_local_yz) - ieg)
727  if (err > 0.001) then
728  write(*,'(A,6(I6))') "WARNING: confused about k after init", nid, idx, idy, idz, ieg, int(plane_xy(idy,idx,idz))
729  return
730  endif
731  enddo
732  enddo
733  enddo
734  if (nid == 0) write(*,*) "Passed init"
735 
736  ! forward FFT
737  rescale = 1._dp
738 
739  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
740  call transpose_grid(plane_xy, plane_yx, shape_x, 1, 2, comm_xy)
741  deallocate(plane_xy)
742 
743  do idx = 0, nout_local_xy - 1
744  do idy = 0, shape_x(2) - 1
745  do idz = 0, nin_local_yz - 1
746  ieg = 1 + idx + idx_out_local_xy + shape_x(1)*idy &
747  + shape_x(1) * shape_x(2) * (idz + idx_in_local_yz)
748  err = abs(plane_yx(idy,idx,idz) - ieg)
749  if (err > 0.001) then
750  write(*,'(A,6(I6))') "WARNING: confused about k after xy", nid, idx, idy, idz, ieg, int(plane_yx(idy,idx,idz))
751  return
752  endif
753  enddo
754  enddo
755  enddo
756  if (nid == 0) write(*,*) "Passed xy transpose"
757 
758  allocate(plane_zy(0:shape_x(3)-1, 0:nout_local_xy-1, 0:nout_local_yz-1) )
759  call transpose_grid(plane_yx, plane_zy, shape_x, 2, 3, comm_yz)
760  deallocate(plane_yx)
761 
762  do idx = 0, nout_local_xy - 1
763  do idy = 0, nout_local_yz - 1
764  do idz = 0, shape_x(3) - 1
765  ieg = 1 + idx + idx_out_local_xy + shape_x(1)*(idy + idx_out_local_yz) &
766  + shape_x(1) * shape_x(2) * idz
767  err = abs(plane_zy(idz,idx,idy) - ieg)
768  if (err > 0.001) then
769  write(*,'(A,6(I6))') "WARNING: confused about k after yz", nid, idx, idy, idz, ieg, int(plane_zy(idz,idx,idy))
770  return
771  endif
772  enddo
773  enddo
774  enddo
775  if (nid == 0) write(*,*) "Passed yz transpose"
776 
777 end subroutine transpose_test
778 
779 subroutine cos_test()
780  use kinds, only : dp
781  use size_m, only : lx1, ly1, lz1, nelv
782  use mesh, only : shape_x, start_x, end_x
783  use parallel, only : nid, lglel
784  use tstep, only : pi
785  use mesh, only : ieg_to_xyz
786  use mesh, only : boundaries
787 
788  use fft, only : p_forward, p_backward
789  use fft, only : w_forward, w_backward
790  use fft, only : fft_r2r, transpose_grid
791 
792  real(DP), allocatable :: rhs_fine(:,:,:,:)
793  real(DP), allocatable :: rhs_coarse(:), soln_coarse(:)
794  integer :: nelm
795  integer :: i
796  integer :: ix(3)
797  real(DP), allocatable :: plane_xy(:,:,:), plane_yx(:,:,:), plane_zy(:,:,:)
798  real(DP) :: rescale
799 
800  ! convert RHS to coarse mesh
801  nelm = nelv
802  allocate(rhs_fine(lx1,ly1,lz1,nelm))
803  allocate(rhs_coarse(nelm))
804  do i = 1, nelm
805  ix = ieg_to_xyz(lglel(i))
806  rhs_coarse(i) = cos(8.*pi * (ix(3) / shape_x(3))) &
807  + cos(2.*pi * (ix(3) / shape_x(3)))
808  enddo
809  if (nid == 0) write(*,*) "COS TEST: rhs ", sqrt(sum(rhs_coarse*rhs_coarse))
810 
811  ! reorder onto sticks
812  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1) )
813 
814  call mesh_to_grid(rhs_coarse, plane_xy, shape_x)
815 
816  ! forward FFT
817  rescale = 1._dp
818  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), p_forward, rescale)
819 
820  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
821  call transpose_grid(plane_xy, plane_yx, shape_x, 1, 2, comm_xy)
822  deallocate(plane_xy)
823 
824  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), p_forward, rescale)
825 
826  allocate(plane_zy(0:shape_x(3)-1, 0:nout_local_xy-1, 0:nout_local_yz-1) )
827  call transpose_grid(plane_yx, plane_zy, shape_x, 2, 3, comm_yz)
828  deallocate(plane_yx)
829 
830  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), w_forward, rescale)
831 
832  ! Poisson kernel
833  call poisson_kernel(plane_zy, shape_x, start_x, end_x, boundaries)
834 
835  ! reverse FFT
836  call fft_r2r(plane_zy, shape_x(3), int(nout_local_xy * nout_local_yz), w_backward, rescale)
837 
838  allocate(plane_yx(0:shape_x(2)-1, 0:nout_local_xy-1, 0:nin_local_yz-1) )
839  call transpose_grid(plane_zy, plane_yx, shape_x, 3, 2, comm_yz)
840  deallocate(plane_zy)
841 
842  call fft_r2r(plane_yx, shape_x(2), int(nout_local_xy * nin_local_yz), p_backward, rescale)
843 
844  allocate(plane_xy(0:shape_x(1)-1, 0:nin_local_xy-1, 0:nin_local_yz-1))
845  call transpose_grid(plane_yx, plane_xy, shape_x, 2, 1, comm_xy)
846  deallocate(plane_yx)
847 
848  call fft_r2r(plane_xy, shape_x(1), int(nin_local_xy * nin_local_yz), p_backward, rescale)
849  plane_xy = plane_xy * (1._dp/ rescale)
850 
851 
852  ! reorder to local elements
853  allocate(soln_coarse(nelm)); soln_coarse = 0._dp
854  call grid_to_mesh(plane_xy, soln_coarse, shape_x)
855  if (nid == 0) write(*,*) "COS TEST: u_c ", &
856  sqrt(sum(soln_coarse*soln_coarse)) * (2._dp * pi /(end_x(1)-start_x(1)))**2._dp
857 
858  return
859 
860 end subroutine cos_test
861 
862 
863 end module poisson
integer function gllel(ieg)
static double sum(struct xxt *data, double v, uint n, uint tag)
Definition: xxt.c:400
subroutine dssum(u)
Direct stiffness sum.
Definition: dssum.F90:54
cleaned
Definition: tstep_mod.F90:2
subroutine transpose_test()
Input parameters from preprocessors.
Definition: input_mod.F90:11
subroutine poisson_kernel(grid, shape_x, start_x, end_x, boundaries)
Definition: soln_mod.F90:1
subroutine mpi_wait(irequest, istatus, ierror)
Definition: mpi_dummy.F90:941
subroutine shuffle_test()
subroutine init_mesh_to_grid(nelm, shape_x)
subroutine grid_to_mesh(grid, mesh, shape_x)
integer function, dimension(3) ieg_to_xyz(ieg)
Definition: mesh_mod.F90:32
subroutine mpi_comm_split(comm, icolor, ikey, comm_new, ierror)
Definition: mpi_dummy.F90:430
subroutine mpi_irecv(data, n, datatype, iproc, itag, comm, irequest, ierror)
Definition: mpi_dummy.F90:593
cleaned
Definition: mesh_mod.F90:2
real(dp) function dnekclock()
Definition: ctimer_mod.F90:103
integer function lglel(iel)
subroutine cos_test()
subroutine, public transpose_grid(grid, grid_t, shape_x, idx, idx_t, comm)
Global (parallel) transform of grid data.
p
Definition: xxt_test2.m:1
integer function gllnid(ieg)
subroutine init_comm_infrastructure(comm_world, shape_x)
one-time setup of communication infrastructure for poisson_mod
integer function xyz_to_pid(ix, iy, iz, shape_x, shape_p)
subroutine mesh_to_grid(mesh, grid, shape_x)
subroutine, public spectral_solve(u, rhs)
Definition: poisson_mod.F90:48
cleaned
Definition: parallel_mod.F90:2
subroutine mpi_comm_rank(comm, me, ierror)
Definition: mpi_dummy.F90:388
subroutine, public fft_r2r(u, length, num, kind, rescale)
Compute a number of 1D FFTs or DCTs.
subroutine nekgsync()
Definition: comm_mpi.F90:318
Geometry arrays.
Definition: geom_mod.F90:2
Definition: mpif.F90:1
subroutine mpi_comm_free(comm, ierror)
Definition: mpi_dummy.F90:369
subroutine mpi_comm_size(comm, nprocs, ierror)
Definition: mpi_dummy.F90:409
real(dp) function, public wavenumber(i, N, L, kind)
Get the wavenumber of the ith mode.
subroutine mpi_isend(data, n, datatype, iproc, itag, comm, request, ierror)
Definition: mpi_dummy.F90:624