Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
findpts.c
Go to the documentation of this file.
1 #include <stddef.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <limits.h>
5 #include <math.h>
6 #include "c99.h"
7 #include "name.h"
8 #include "types.h"
9 #include "fail.h"
10 #include "mem.h"
11 #include "poly.h"
12 #include "obbox.h"
13 #include "findpts_el.h"
14 #include "findpts_local.h"
15 #include "gs_defs.h"
16 #include "comm.h"
17 #include "crystal.h"
18 #include "sarray_transfer.h"
19 #include "sort.h"
20 #include "sarray_sort.h"
21 /*
22 #define DIAGNOSTICS
23 */
24 #ifdef DIAGNOSTICS
25 #include <stdio.h>
26 #endif
27 
28 #define CODE_INTERNAL 0
29 #define CODE_BORDER 1
30 #define CODE_NOT_FOUND 2
31 
32 struct ulong_range { ulong min, max; };
33 struct proc_index { uint proc, index; };
34 
35 static slong lfloor(double x) { return floor(x); }
36 static slong lceil (double x) { return ceil (x); }
37 
38 static ulong hash_index_aux(double low, double fac, ulong n, double x)
39 {
40  const slong i = lfloor((x-low)*fac);
41  return i<0 ? 0 : (n-1<(ulong)i ? n-1 : (ulong)i);
42 }
43 
44 static void set_bit(unsigned char *const p, const uint i)
45 {
46  const uint byte = i/CHAR_BIT;
47  const unsigned bit = i%CHAR_BIT;
48  p[byte] |= 1u<<bit;
49 }
50 
51 static unsigned get_bit(const unsigned char *const p, const uint i)
52 {
53  const uint byte = i/CHAR_BIT;
54  const unsigned bit = i%CHAR_BIT;
55  return p[byte]>>bit & 1u;
56 }
57 
58 static unsigned byte_bits(const unsigned char x)
59 {
60  unsigned bit, sum=0;
61  for(bit=0;bit<CHAR_BIT;++bit) sum += x>>bit & 1u;
62  return sum;
63 }
64 
65 static uint count_bits(unsigned char *p, uint n)
66 {
67  uint sum=0;
68  for(;n;--n) sum+=byte_bits(*p++);
69  return sum;
70 }
71 
72 #define D 2
73 #define WHEN_3D(a)
74 #include "findpts_imp.h"
75 #undef WHEN_3D
76 #undef D
77 
78 #define D 3
79 #define WHEN_3D(a) a
80 #include "findpts_imp.h"
81 #undef WHEN_3D
82 #undef D
83 
84 /*--------------------------------------------------------------------------
85 
86  FORTRAN Interface
87 
88  --------------------------------------------------------------------------
89  call findpts_setup(h, comm,np, ndim, xm,ym,zm, nr,ns,nt,nel,
90  mr,ms,mt, bbox_tol, loc_hash_size, gbl_hash_size,
91  npt_max, newt_tol)
92 
93  (zm,nt,mt all ignored when ndim==2)
94 
95  h: (output) handle
96  comm,np: MPI communicator and # of procs (checked against MPI_Comm_size)
97  ndim: 2 or 3
98  xm,ym,zm: element geometry (nodal x,y,z values)
99  nr,ns,nt,nel: element dimensions --- e.g., xm(nr,ns,nt,nel)
100 
101  mr,ms,mt: finer mesh size for bounding box computation;
102  must be larger than nr,ns,nt for correctness,
103  recommend at least 2*nr,2*ns,2*nt
104  bbox_tol: e.g., 0.01 - relative size to expand bounding boxes by;
105  prevents points from falling through "cracks",
106  and prevents "not found" failures for points just outside mesh
107  (returning instead the closest point inside the mesh)
108 
109  loc_hash_size: e.g., nr*ns*nt*nel
110  maximum number of integers to use for local geom hash table;
111  minimum is nel+2 for the trivial table with one cell
112 
113  gbl_hash_size: e.g., nr*ns*nt*nel
114  approx number of cells per proc for the distributed
115  global geometric hash table
116  NOTE: gbl_hash_size*np needs to fit in a "global" integer
117  (controlled by -DGLOBAL_LONG or -DGLOBAL_LONG_LONG;
118  see "types.h")
119  actual number of cells per proc will be greater by
120  ~ 3 gbl_hash_size^(2/3) / np^(1/3)
121 
122  npt_max: e.g., 256
123  number of points to iterate on simultaneously
124  enables dominant complexity to be matrix-matrix products
125  (there is a sweet spot --- too high and the cache runs out)
126  the memory allocation term dependent on npt_max is
127  (12 + 2*(nr+ns+nt+nr*ns)) * npt_max doubles
128 
129  newt_tol: e.g., 1024*DBL_EPSILON
130  the iteration stops for a point when
131  the 1-norm of the step in (r,s,t) is smaller than newt_tol
132  or the objective (dist^2) increases while the predicted (model)
133  decrease is smaller than newt_tol * (the objective)
134 
135  --------------------------------------------------------------------------
136  call findpts_free(h)
137 
138  --------------------------------------------------------------------------
139  call findpts(h, code_base, code_stride,
140  proc_base, proc_stride,
141  el_base, el_stride,
142  r_base, r_stride,
143  dist2_base, dist2_stride,
144  x_base, x_stride,
145  y_base, y_stride,
146  z_base, z_stride, npt)
147 
148  (z_base, z_stride ignored when ndim==2)
149 
150  conceptually, locates npt points;
151  data for each point is:
152  ouput:
153  code: 0 - inside an element
154  1 - closest point on a border
155  (perhaps exactly, or maybe just near --- check dist2)
156  2 - not found (bbox_tol controls cut-off between code 1 and 2)
157  proc: remote processor on which the point was found
158  el: element on remote processor in which the point was found
159  r(ndim): parametric coordinates for point
160  dist2: distance squared from found to sought point (in xyz space)
161  input:
162  x, y, z: coordinates of sought point
163 
164  the *_base arguments point to the data for the first point,
165  each is advanced by the corresponding *_stride argument for the next point
166  this allows fairly arbitrary data layout,
167  but note the r,s,t coordinates for each point must be packed together
168  (consequently, r_stride must be at least ndim)
169 
170 
171  --------------------------------------------------------------------------
172  call findpts_eval(h, out_base, out_stride,
173  code_base, code_stride,
174  proc_base, proc_stride,
175  el_base, el_stride,
176  r_base, r_stride, npt,
177  input_field)
178 
179  may be called immediately after findpts (or any other time)
180  to evaluate input_field at the given points ---
181  these specified by code,proc,el,r(ndim) and possibly remote
182  --- storing the interpolated values in out
183  [that is, at out_base(1+out_stride*(point_index-1)) ]
184 
185  for example, following a call to findpts, a call to findpts_eval with
186  input_field = xm, will ideally result in out = x(1) for each point,
187  or x(2) for ym, x(3) for zm
188 
189 
190  --------------------------------------------------------------------------*/
191 
192 #define ffindpts_setup FORTRAN_NAME(findpts_setup,FINDPTS_SETUP)
193 #define ffindpts_free FORTRAN_NAME(findpts_free ,FINDPTS_FREE )
194 #define ffindpts FORTRAN_NAME(findpts ,FINDPTS )
195 #define ffindpts_eval FORTRAN_NAME(findpts_eval ,FINDPTS_EVAL )
196 
197 struct handle { void *data; unsigned ndim; };
198 static struct handle *handle_array = 0;
199 static int handle_max = 0;
200 static int handle_n = 0;
201 
203  const MPI_Fint *const comm, const sint *const np,
204  const sint *ndim,
205  const double *const xm, const double *const ym, const double *const zm,
206  const sint *const nr, const sint *const ns, const sint *const nt,
207  const sint *const nel,
208  const sint *const mr, const sint *const ms, const sint *const mt,
209  const double *const bbox_tol,
210  const sint *const loc_hash_size, const sint *const gbl_hash_size,
211  const sint *const npt_max,
212  const double *const newt_tol)
213 {
214  struct handle *h;
215  if(handle_n==handle_max)
216  handle_max+=handle_max/2+1,
217  handle_array=trealloc(struct handle,handle_array,handle_max);
218  h = &handle_array[handle_n];
219  h->ndim = *ndim;
220  if(h->ndim==2) {
221  struct findpts_data_2 *const fd = tmalloc(struct findpts_data_2,1);
222  const double *elx[2];
223  uint n[2], m[2];
224  elx[0]=xm,elx[1]=ym;
225  n[0]=*nr,n[1]=*ns;
226  m[0]=*mr,m[1]=*ms;
227  h->data = fd;
228  comm_init_check(&fd->cr.comm, *comm, *np);
229  buffer_init(&fd->cr.data,1000);
230  buffer_init(&fd->cr.work,1000);
231  setup_aux_2(fd, elx,n,*nel,m,*bbox_tol,
232  *loc_hash_size,*gbl_hash_size, *npt_max, *newt_tol);
233  } else if(h->ndim==3) {
234  struct findpts_data_3 *const fd = tmalloc(struct findpts_data_3,1);
235  const double *elx[3];
236  uint n[3], m[3];
237  elx[0]=xm,elx[1]=ym,elx[2]=zm;
238  n[0]=*nr,n[1]=*ns,n[2]=*nt;
239  m[0]=*mr,m[1]=*ms,m[2]=*mt;
240  h->data = fd;
241  comm_init_check(&fd->cr.comm, *comm, *np);
242  buffer_init(&fd->cr.data,1000);
243  buffer_init(&fd->cr.work,1000);
244  setup_aux_3(fd, elx,n,*nel,m,*bbox_tol,
245  *loc_hash_size,*gbl_hash_size, *npt_max, *newt_tol);
246  } else
247  fail(1,__FILE__,__LINE__,
248  "findpts_setup: ndim must be 2 or 3; given ndim=%u",(unsigned)h->ndim);
249  *handle = handle_n++;
250 }
251 
252 #define CHECK_HANDLE(func) \
253  struct handle *h; \
254  if(*handle<0 || *handle>=handle_n || !(h=&handle_array[*handle])->data) \
255  fail(1,__FILE__,__LINE__,func ": invalid handle")
256 
257 void ffindpts_free(const sint *const handle)
258 {
259  CHECK_HANDLE("findpts_free");
260  if(h->ndim==2)
261  PREFIXED_NAME(findpts_free_2)(h->data);
262  else
263  PREFIXED_NAME(findpts_free_3)(h->data);
264  h->data = 0;
265 }
266 
267 void ffindpts(const sint *const handle,
268  sint *const code_base, const sint *const code_stride,
269  sint *const proc_base, const sint *const proc_stride,
270  sint *const el_base, const sint *const el_stride,
271  double *const r_base, const sint *const r_stride,
272  double *const dist2_base, const sint *const dist2_stride,
273  const double *const x_base, const sint *const x_stride,
274  const double *const y_base, const sint *const y_stride,
275  const double *const z_base, const sint *const z_stride,
276  const sint *const npt)
277 {
278  CHECK_HANDLE("findpts");
279  if(h->ndim==2) {
280  const double *xv_base[2];
281  unsigned xv_stride[2];
282  xv_base[0]=x_base, xv_base[1]=y_base;
283  xv_stride[0] = *x_stride*sizeof(double),
284  xv_stride[1] = *y_stride*sizeof(double);
286  (uint*)code_base,(* code_stride)*sizeof(sint ),
287  (uint*)proc_base,(* proc_stride)*sizeof(sint ),
288  (uint*) el_base,(* el_stride)*sizeof(sint ),
289  r_base,(* r_stride)*sizeof(double),
290  dist2_base,(*dist2_stride)*sizeof(double),
291  xv_base, xv_stride,
292  *npt, h->data);
293  } else {
294  const double *xv_base[3];
295  unsigned xv_stride[3];
296  xv_base[0]=x_base, xv_base[1]=y_base, xv_base[2]=z_base;
297  xv_stride[0] = *x_stride*sizeof(double),
298  xv_stride[1] = *y_stride*sizeof(double),
299  xv_stride[2] = *z_stride*sizeof(double);
301  (uint*)code_base,(* code_stride)*sizeof(sint ),
302  (uint*)proc_base,(* proc_stride)*sizeof(sint ),
303  (uint*) el_base,(* el_stride)*sizeof(sint ),
304  r_base,(* r_stride)*sizeof(double),
305  dist2_base,(*dist2_stride)*sizeof(double),
306  xv_base, xv_stride,
307  *npt, h->data);
308  }
309 }
310 
311 void ffindpts_eval(const sint *const handle,
312  double *const out_base, const sint *const out_stride,
313  const sint *const code_base, const sint *const code_stride,
314  const sint *const proc_base, const sint *const proc_stride,
315  const sint *const el_base, const sint *const el_stride,
316  const double *const r_base, const sint *const r_stride,
317  const sint *const npt, const double *const in)
318 {
319  CHECK_HANDLE("findpts_eval");
320  if(h->ndim==2)
322  out_base,(* out_stride)*sizeof(double),
323  (uint*)code_base,(*code_stride)*sizeof(sint ),
324  (uint*)proc_base,(*proc_stride)*sizeof(sint ),
325  (uint*) el_base,(* el_stride)*sizeof(sint ),
326  r_base,(* r_stride)*sizeof(double),
327  *npt, in, h->data);
328  else
330  out_base,(* out_stride)*sizeof(double),
331  (uint*)code_base,(*code_stride)*sizeof(sint ),
332  (uint*)proc_base,(*proc_stride)*sizeof(sint ),
333  (uint*) el_base,(* el_stride)*sizeof(sint ),
334  r_base,(* r_stride)*sizeof(double),
335  *npt, in, h->data);
336 }
static unsigned byte_bits(const unsigned char x)
Definition: findpts.c:58
#define slong
Definition: types.h:74
#define uint
Definition: types.h:70
static double sum(struct xxt *data, double v, uint n, uint tag)
Definition: xxt.c:400
uint proc
Definition: findpts.c:33
static uint count_bits(unsigned char *p, uint n)
Definition: findpts.c:65
#define tmalloc(type, count)
Definition: mem.h:91
static const unsigned mr[D]
static slong lfloor(double x)
Definition: findpts.c:35
#define sint
Definition: types.h:69
unsigned ndim
Definition: findpts.c:197
ulong min
Definition: findpts.c:32
void findpts_3(uint *const code_base, const unsigned code_stride, uint *const proc_base, const unsigned proc_stride, uint *const el_base, const unsigned el_stride, double *const r_base, const unsigned r_stride, double *const dist2_base, const unsigned dist2_stride, const double *const x_base[3], const unsigned x_stride[3], const uint npt, struct findpts_data_3 *const fd)
n
Definition: xxt_test.m:73
#define x
#define trealloc(type, ptr, count)
Definition: mem.h:95
static void set_bit(unsigned char *const p, const uint i)
Definition: findpts.c:44
void findpts_2(uint *const code_base, const unsigned code_stride, uint *const proc_base, const unsigned proc_stride, uint *const el_base, const unsigned el_stride, double *const r_base, const unsigned r_stride, double *const dist2_base, const unsigned dist2_stride, const double *const x_base[2], const unsigned x_stride[2], const uint npt, struct findpts_data_2 *const fd)
Definition: comm.h:85
ns
Definition: xxt_test.m:43
#define ffindpts_eval
Definition: findpts.c:195
p
Definition: xxt_test2.m:1
#define comm_init_check(c, ce, np)
Definition: comm.h:161
void findpts_free_2(struct findpts_data_2 *fd)
static unsigned get_bit(const unsigned char *const p, const uint i)
Definition: findpts.c:51
#define ffindpts_free
Definition: findpts.c:193
void * data
Definition: findpts.c:197
void findpts_eval_2(double *const out_base, const unsigned out_stride, const uint *const code_base, const unsigned code_stride, const uint *const proc_base, const unsigned proc_stride, const uint *const el_base, const unsigned el_stride, const double *const r_base, const unsigned r_stride, const uint npt, const double *const in, struct findpts_data_2 *const fd)
#define ulong
Definition: types.h:75
static int handle_max
Definition: findpts.c:199
ulong max
Definition: findpts.c:32
uint index
Definition: findpts.c:33
static double elx[NR *NS]
static ulong hash_index_aux(double low, double fac, ulong n, double x)
Definition: findpts.c:38
void findpts_eval_3(double *const out_base, const unsigned out_stride, const uint *const code_base, const unsigned code_stride, const uint *const proc_base, const unsigned proc_stride, const uint *const el_base, const unsigned el_stride, const double *const r_base, const unsigned r_stride, const uint npt, const double *const in, struct findpts_data_3 *const fd)
for i
Definition: xxt_test.m:74
static struct handle * handle_array
Definition: findpts.c:198
#define ffindpts_setup
Definition: findpts.c:192
#define buffer_init(b, max)
Definition: mem.h:155
#define ffindpts
Definition: findpts.c:194
void findpts_free_3(struct findpts_data_3 *fd)
int MPI_Fint
Definition: comm.h:71
#define CHECK_HANDLE(func)
Definition: findpts.c:252
static const unsigned nr[3]
establishes some macros to establish naming conventions
static uint np
Definition: findpts_test.c:63
static int handle_n
Definition: findpts.c:200
static slong lceil(double x)
Definition: findpts.c:36
#define PREFIXED_NAME(x)
Definition: name.h:25
void fail(int status, const char *file, unsigned line, const char *fmt,...)
Definition: fail.c:47