Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
findpts_el_2.c
Go to the documentation of this file.
1 #include <stdio.h>
2 
3 #include <stddef.h>
4 #include <stdlib.h>
5 #include <string.h>
6 #include <math.h>
7 #include <float.h>
8 #include "c99.h"
9 #include "name.h"
10 #include "fail.h"
11 #include "types.h"
12 #include "mem.h"
13 #include "tensor.h"
14 #include "poly.h"
15 
16 #define findpts_el_setup_2 PREFIXED_NAME(findpts_el_setup_2)
17 #define findpts_el_free_2 PREFIXED_NAME(findpts_el_free_2 )
18 #define findpts_el_2 PREFIXED_NAME(findpts_el_2 )
19 #define findpts_el_eval_2 PREFIXED_NAME(findpts_el_eval_2 )
20 /*
21 #define DIAGNOSTICS_1
22 #define DIAGNOSTICS_2
23 */
24 #define DIAGNOSTICS_ITERATIONS 0
25 
26 #if defined(DIAGNOSTICS_1) || defined(DIAGNOSTICS_2) \
27  || DIAGNOSTICS_ITERATIONS > 0
28 #include <stdio.h>
29 #endif
30 
31 /* A is row-major */
32 static void lin_solve_2(double x[2], const double A[4], const double y[2])
33 {
34  const double idet = 1/(A[0]*A[3] - A[1]*A[2]);
35  x[0] = idet*(A[3]*y[0] - A[1]*y[1]);
36  x[1] = idet*(A[0]*y[1] - A[2]*y[0]);
37 }
38 
39 struct findpts_el_pt_2 {
40  double x[2],r[2],oldr[2],dist2,dist2p,tr;
41  unsigned index,flags;
42 };
43 
44 /* the bit structure of flags is CSSRR
45  the C bit --- 1<<4 --- is set when the point is converged
46  RR is 0 = 00b if r is unconstrained,
47  1 = 01b if r is constrained at -1
48  2 = 10b if r is constrained at +1
49  SS is similarly for s constraints
50 */
51 
52 #define CONVERGED_FLAG (1u<<4)
53 #define FLAG_MASK 0x1fu
54 
55 static unsigned num_constrained(const unsigned flags)
56 {
57  const unsigned y = flags | flags>>1;
58  return (y&1u) + (y>>2 & 1u);
59 }
60 
61 static unsigned pt_flags_to_bin_noC(const unsigned flags)
62 {
63  return (flags>>2 & 3u)*3 + (flags & 3u);
64 }
65 
66 /* map flags to 9 if the C bit is set,
67  else to [0,8] --- the 9 valid configs of SSRR */
68 static unsigned pt_flags_to_bin(const unsigned flags)
69 {
70  const unsigned mask = 0u - (flags>>4); /* 0 or 0xfff... when converged */
71  return (mask & 9u) | (~mask & pt_flags_to_bin_noC(flags));
72 }
73 
74 /* assumes x = 0, or 1 */
75 static unsigned plus_1_mod_2(const unsigned x) { return x^1u; }
76 
77 /* assumes x = 1 << i, with i < 4, returns i+1 */
78 static unsigned which_bit(const unsigned x)
79 {
80  const unsigned y = x&7u;
81  return (y-(y>>2)) | ((x-1)&4u);
82 }
83 
84 static unsigned edge_index(const unsigned x) { return which_bit(x)-1; }
85 
86 static unsigned point_index(const unsigned x)
87 {
88  return ((x>>1)&1u) | ((x>>2)&2u);
89 }
90 
91 /* extra data
92 
93  we need x, dx/dn for each edge
94  r: x at 0, nrs - nr,
95  4*nr extra for dx/dn
96  s: 8*ns extra
97 
98 */
99 
100 struct findpts_el_gedge_2 { const double *x[2], *dxdn[2]; };
101 struct findpts_el_gpt_2 { double x[2], jac[4], hes[4]; };
102 
103 struct findpts_el_data_2 {
104  unsigned npt_max;
105  struct findpts_el_pt_2 *p;
106 
107  unsigned n[2];
108  double *z[2];
109  lagrange_fun *lag[2];
110  double *lag_data[2];
111  double *wtend[2];
112 
113  const double *x[2];
114 
115  unsigned side_init;
116  double *sides;
117  struct findpts_el_gedge_2 edge[4]; /* R=-1 S; R=1 S; ... */
118  struct findpts_el_gpt_2 pt[4];
119 
120  double *work;
121 };
122 
123 /* work[2*(nr+ns)] */
124 /* work[4*(nr+ns)] */
125 /* work[6*(nr+6)] */
126 /* work[(6+2*(2*nr+ns))*pn] */
127 /* work[(10+3*n)*pn] */
128 static unsigned work_size(
129  const unsigned nr, const unsigned ns, const unsigned npt_max)
130 {
131  const unsigned n = ns>nr?ns:nr;
132  unsigned wsize;
133  #define DO_MAX(x) do { const unsigned temp=(x); \
134  wsize=temp>wsize?temp:wsize; } while(0)
135  wsize = (6 + 2*(2*nr+ns)) * npt_max;
136  DO_MAX(4*(nr+ns));
137  DO_MAX(6*(nr+6));
138  DO_MAX(npt_max*(10+3*n));
139  #undef DO_MAX
140  return wsize;
141 }
142 
143 void findpts_el_setup_2(struct findpts_el_data_2 *const fd,
144  const unsigned n[2],
145  const unsigned npt_max)
146 {
147  const unsigned nr=n[0], ns=n[1];
148  const unsigned tot = 8*ns + 4*nr;
149  unsigned d,i, lag_size[2];
150 
151  fd->npt_max = npt_max;
152  fd->p = tmalloc(struct findpts_el_pt_2, npt_max*2);
153 
154  fd->n[0]=nr, fd->n[1]=ns;
155  for(d=0;d<2;++d) lag_size[d] = gll_lag_size(fd->n[d]);
156 
157  fd->z[0] = tmalloc(double,lag_size[0]+lag_size[1]
158  +7*(nr+ns) + tot +
159  work_size(nr,ns,npt_max));
160  fd->z[1] = fd->z[0]+nr;
161  fd->lag_data[0] = fd->z[1]+ns;
162  fd->lag_data[1] = fd->lag_data[0]+lag_size[0];
163  fd->wtend[0] = fd->lag_data[1]+lag_size[1];
164  fd->wtend[1] = fd->wtend[0]+6*nr;
165  fd->sides = fd->wtend[1]+6*ns;
166  fd->work = fd->sides + tot;
167 
168  fd->side_init = 0;
169 
170  for(d=0;d<2;++d) {
171  double *wt=fd->wtend[d]; unsigned n=fd->n[d];
172  lobatto_nodes(fd->z[d],n);
173  fd->lag[d] = gll_lag_setup(fd->lag_data[d],n);
174  fd->lag[d](wt , fd->lag_data[d],n,2,-1);
175  fd->lag[d](wt+3*n, fd->lag_data[d],n,2, 1);
176 
177  wt[0]=1; for(i=1;i<n;++i) wt[i]=0;
178  wt+=3*n; { for(i=0;i<n-1;++i) wt[i]=0; } wt[i]=1;
179  }
180 
181  for(d=0;d<2;++d)
182  fd->edge[0].x[d] = fd->sides + d *ns, \
183  fd->edge[0].dxdn[d] = fd->sides + (2+d)*ns, \
184  fd->edge[1].x[d] = fd->sides + (4+d)*ns, \
185  fd->edge[1].dxdn[d] = fd->sides + (6+d)*ns; \
186 
187  for(d=0;d<2;++d)
188  fd->edge[2].x[d] = 0, /* will point to user data */
189  fd->edge[2].dxdn[d] = fd->sides + 8*ns + d *nr,
190  fd->edge[3].x[d] = 0, /* will point to user data */
191  fd->edge[3].dxdn[d] = fd->sides + 8*ns + (2+d)*nr;
192 }
193 
194 void findpts_el_free_2(struct findpts_el_data_2 *const fd)
195 {
196  free(fd->p);
197  free(fd->z[0]);
198 }
199 
200 typedef void compute_edge_data_fun(struct findpts_el_data_2 *fd);
201 
202 /* work[2*(nr+ns)] */
203 static void compute_edge_data_r(struct findpts_el_data_2 *fd)
204 {
205  const unsigned nr = fd->n[0], ns=fd->n[1], nrsm1 = nr*(ns-1);
206  unsigned d;
207  double *work = fd->work, *out = fd->sides + 8*ns;
208  memcpy(work , fd->wtend[1]+ ns, ns*sizeof(double));
209  memcpy(work+ns, fd->wtend[1]+4*ns, ns*sizeof(double));
210  for(d=0;d<2;++d) {
211  tensor_mxm(work+2*ns,nr, fd->x[d],ns, work,2);
212  memcpy(out+ d *nr, work+2*ns , nr*sizeof(double));
213  memcpy(out+(2+d)*nr, work+2*ns+nr , nr*sizeof(double));
214  fd->edge[2].x[d] = fd->x[d];
215  fd->edge[3].x[d] = fd->x[d] + nrsm1;
216  }
217 }
218 
219 /* work[4*(nr+ns)] */
220 static void compute_edge_data_s(struct findpts_el_data_2 *fd)
221 {
222  const unsigned nr = fd->n[0], ns=fd->n[1];
223  unsigned d;
224  double *work = fd->work, *out = fd->sides;
225  memcpy(work , fd->wtend[0] , 2*nr*sizeof(double));
226  memcpy(work+2*nr, fd->wtend[0]+3*nr, 2*nr*sizeof(double));
227  for(d=0;d<2;++d) {
228  tensor_mtxm(work+4*nr,ns, fd->x[d],nr, work,4);
229  memcpy(out+ d *ns, work+4*nr , ns*sizeof(double));
230  memcpy(out+(2+d)*ns, work+4*nr+ ns, ns*sizeof(double));
231  memcpy(out+(4+d)*ns, work+4*nr+2*ns, ns*sizeof(double));
232  memcpy(out+(6+d)*ns, work+4*nr+3*ns, ns*sizeof(double));
233  }
234 }
235 
236 static const struct findpts_el_gedge_2 *get_edge(
237  struct findpts_el_data_2 *fd, unsigned ei)
238 {
239  const unsigned mask = 1u<<(ei/2);
240  if((fd->side_init&mask)==0) {
241  compute_edge_data_fun *const fun[2] = {
244  };
245  fun[ei/2](fd);
246  fd->side_init |= mask;
247  }
248  return &fd->edge[ei];
249 }
250 
251 /* work[6*(nr+6)] */
252 static void compute_pt_data(struct findpts_el_data_2 *fd)
253 {
254  const unsigned nr = fd->n[0], ns = fd->n[1];
255  double *work = fd->work, *work2 = work+6*nr;
256  unsigned d,i,j;
257  for(d=0;d<2;++d) {
258  tensor_mxm(work,nr, fd->x[d],ns, fd->wtend[1],6);
259  tensor_mtxm(work2,6, fd->wtend[0],nr, work,6);
260  for(j=0;j<2;++j) for(i=0;i<2;++i) {
261  fd->pt[2*j+i].x[d] = work2[6*(3*j+0)+(3*i+0)];
262  fd->pt[2*j+i].jac[2*d+0] = work2[6*(3*j+0)+(3*i+1)];
263  fd->pt[2*j+i].jac[2*d+1] = work2[6*(3*j+1)+(3*i+0)];
264  fd->pt[2*j+i].hes[2*d+0] = work2[6*(3*j+0)+(3*i+2)];
265  fd->pt[2*j+i].hes[2*d+1] = work2[6*(3*j+2)+(3*i+0)];
266  }
267  }
268 }
269 
270 static const struct findpts_el_gpt_2 *get_pt(
271  struct findpts_el_data_2 *fd, unsigned pi)
272 {
273  if((fd->side_init&4u)==0)
274  compute_pt_data(fd), fd->side_init |= 4u;
275  return &fd->pt[pi];
276 }
277 
278 /* check reduction in objective against prediction, and adjust
279  trust region radius (p->tr) accordingly;
280  may reject the prior step, returning 1; otherwise returns 0
281  sets out->dist2, out->index, out->x, out->oldr in any event,
282  leaving out->r, out->dr, out->flags to be set when returning 0 */
283 static int reject_prior_step_q(struct findpts_el_pt_2 *const out,
284  const double resid[2],
285  const struct findpts_el_pt_2 *const p,
286  const double tol)
287 {
288  const double old_dist2 = p->dist2;
289  const double dist2 = resid[0]*resid[0]+resid[1]*resid[1];
290  const double decr = old_dist2-dist2;
291  const double pred = p->dist2p;
292  out->x[0]=p->x[0],out->x[1]=p->x[1];
293  out->oldr[0]=p->r[0],out->oldr[1]=p->r[1];
294  out->index=p->index;
295  out->dist2=dist2;
296 #ifdef DIAGNOSTICS_2
297  printf("Checking prior step:\n"
298  " old r = (%.17g,%.17g), old flags = %x\n"
299  " old_dist2 = %.17g\n"
300  " r = (%.17g,%.17g), flags = %x\n"
301  " dist2 = %.17g\n"
302  " difference = %.17g\n"
303  " predicted = %.17g\n"
304  " rho = %.17g\n",
305  p->oldr[0],p->oldr[1],(p->flags>>5)&FLAG_MASK,old_dist2,
306  p->r[0],p->r[1],p->flags&FLAG_MASK,dist2,
307  decr, pred, decr/pred);
308 #endif
309  if(decr>= 0.01 * pred) {
310  if(decr>= 0.9 * pred) {
311  out->tr = p->tr*2;
312 #ifdef DIAGNOSTICS_2
313  printf(" very good iteration; tr -> %g\n", out->tr);
314 #endif
315  } else {
316 #ifdef DIAGNOSTICS_2
317  printf(" good iteration; tr = %g\n", p->tr);
318 #endif
319  out->tr = p->tr;
320  }
321  return 0;
322  } else {
323  /* reject step; note: the point will pass through this routine
324  again, and we set things up here so it gets classed as a
325  "very good iteration" --- this doubles the trust radius,
326  which is why we divide by 4 below */
327  double v0 = fabs(p->r[0]-p->oldr[0]),
328  v1 = fabs(p->r[1]-p->oldr[1]);
329  out->tr = (v0>v1?v0:v1)/4;
330 #ifdef DIAGNOSTICS_2
331  printf(" bad iteration; tr -> %g\n", out->tr);
332 #endif
333  out->dist2=old_dist2;
334  out->r[0]=p->oldr[0],out->r[1]=p->oldr[1];
335  out->flags=p->flags>>5;
336  out->dist2p=-DBL_MAX;
337  if(pred < dist2*tol) out->flags|=CONVERGED_FLAG;
338  return 1;
339  }
340 }
341 
342 /* minimize ||resid - jac * dr||_2, with |dr| <= tr, |r0+dr|<=1
343  (exact solution of trust region problem) */
344 static void newton_area(struct findpts_el_pt_2 *const out,
345  const double jac[4], const double resid[2],
346  const struct findpts_el_pt_2 *const p, const double tol)
347 {
348  const double tr = p->tr;
349  double bnd[4] = { -1,1, -1,1 };
350  double r0[2];
351  double dr[2], fac;
352  unsigned d, mask, flags;
353 
354  r0[0] = p->r[0], r0[1] = p->r[1];
355 
356 #ifdef DIAGNOSTICS_1
357  printf("newton_area:\n");
358  printf(" resid = (%g,%g); r^T r / 2 = %g\n",resid[0],resid[1],
359  (resid[0]*resid[0]+resid[1]*resid[1])/2);
360  printf(" jac = %g\t%g\n"
361  " %g\t%g\n",
362  jac[0],jac[1],jac[2],jac[3]);
363  printf(" r = (%.17g,%.17g)\n",r0[0],r0[1]);
364 #endif
365 
366  mask = 0xfu;
367  for(d=0;d<2;++d) {
368  if(r0[d]-tr>-1) bnd[2*d ]=r0[d]-tr, mask^=1u<<(2*d);
369  if(r0[d]+tr< 1) bnd[2*d+1]=r0[d]+tr, mask^=2u<<(2*d);
370  }
371 
372  lin_solve_2(dr, jac,resid);
373 
374 #ifdef DIAGNOSTICS_1
375  printf(" min at r = (%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1]);
376 #endif
377 
378  fac = 1, flags = 0;
379  for(d=0;d<2;++d) {
380  double nr = r0[d]+dr[d];
381  if((nr-bnd[2*d])*(bnd[2*d+1]-nr)>=0) continue;
382  if(nr<bnd[2*d]) {
383  double f = (bnd[2*d ]-r0[d])/dr[d];
384  if(f<fac) fac=f, flags = 1u<<(2*d);
385  } else {
386  double f = (bnd[2*d+1]-r0[d])/dr[d];
387  if(f<fac) fac=f, flags = 2u<<(2*d);
388  }
389  }
390 
391 #ifdef DIAGNOSTICS_1
392  printf(" flags = %x, fac = %.15g\n",flags,fac);
393 #endif
394 
395  if(flags==0) goto newton_area_fin;
396 
397  for(d=0;d<2;++d) dr[d]*=fac;
398 
399  newton_area_edge: {
400  const unsigned ei = edge_index(flags);
401  const unsigned dn = ei>>1, de = plus_1_mod_2(dn);
402  const double res0 = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]),
403  res1 = resid[1]-(jac[2]*dr[0]+jac[3]*dr[1]);
404  /* y = J_u^T res */
405  const double y = jac[de]*res0+jac[2+de]*res1;
406  /* JtJ = J_u^T J_u */
407  const double JtJ = jac[ de]*jac[ de]
408  +jac[2+de]*jac[2+de];
409  const double drc = y/JtJ;
410  double fac = 1;
411  unsigned new_flags = 0;
412 #ifdef DIAGNOSTICS_1
413  printf(" edge %u, de=%u\n",ei,de);
414  printf(" r=(%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1]);
415  printf(" resid = (%g,%g); r^T r / 2 = %g\n",res0,res1,
416  (res[0]*res[0]+res[1]*res[1])/2);
417  printf(" min at %.17g\n", r0[de]+dr[de]+drc);
418 #endif
419  {
420  const double rz = r0[de]+dr[de], lb=bnd[2*de],ub=bnd[2*de+1];
421  const double nr = r0[de]+(dr[de]+drc);
422  if((nr-lb)*(ub-nr)<0) {
423  if(nr<lb) {
424  double f = (lb-rz)/drc;
425  if(f<fac) fac=f, new_flags = 1u<<(2*de);
426  } else {
427  double f = (ub-rz)/drc;
428  if(f<fac) fac=f, new_flags = 2u<<(2*de);
429  }
430  }
431  }
432 #ifdef DIAGNOSTICS_1
433  printf(" new_flags = %x, fac = %.17g\n",new_flags,fac);
434 #endif
435  dr[de] += fac*drc;
436  flags |= new_flags;
437  goto newton_area_relax;
438  }
439 
440  /* check and possibly relax constraints */
441  newton_area_relax: {
442  const unsigned old_flags = flags;
443  /* res := res_0 - J dr */
444  const double res0 = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]),
445  res1 = resid[1]-(jac[2]*dr[0]+jac[3]*dr[1]);
446  /* y := J^T res */
447  double y[2]; y[0] = jac[0]*res0+jac[2]*res1,
448  y[1] = jac[1]*res0+jac[3]*res1;
449  #define SETDR(d) do { \
450  unsigned f = flags>>(2*d) & 3u; \
451  if(f) dr[d] = bnd[2*d+(f-1)] - r0[d]; \
452  } while(0)
453  SETDR(0); SETDR(1);
454  #undef SETDR
455  for(d=0;d<2;++d) {
456  unsigned c = flags>>(2*d) & 3u;
457  if(c==0) continue;
458  else if(dr[d]*y[d]<0) flags &= ~(3u<<(2*d));
459 #ifdef DIAGNOSTICS_1
460  if( (c==1&&dr[d]>0) || (c==2&&dr[d]<0) )
461  printf("FAIL! c=%u, dr[d]=%g\n",c,dr[d]);
462 #endif
463  }
464 #ifdef DIAGNOSTICS_1
465  printf(" checking constraints (%x)\n",old_flags);
466  printf(" r=(%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1]);
467  printf(" resid = (%g,%g); r^T r / 2 = %g\n",res[0],res[1],
468  (res[0]*res[0]+res[1]*res[1])/2);
469  printf(" relaxed %x -> %x\n",old_flags,flags);
470 #endif
471  if(flags==old_flags) goto newton_area_fin;
472  switch(num_constrained(flags)) {
473  case 1: goto newton_area_edge;
474  }
475  }
476 
477 newton_area_fin:
478 #ifdef DIAGNOSTICS_1
479  {
480  const double res[2]={ resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]),
481  resid[1]-(jac[2]*dr[0]+jac[3]*dr[1]) };
482  printf(" r=(%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1]);
483  printf(" resid = (%g,%g); r^T r / 2 = %g\n",res[0],res[1],
484  (res[0]*res[0]+res[1]*res[1])/2);
485  }
486 #endif
487  flags &= mask;
488  if(fabs(dr[0])+fabs(dr[1]) < tol) flags |= CONVERGED_FLAG;
489  {
490  const double res0 = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]),
491  res1 = resid[1]-(jac[2]*dr[0]+jac[3]*dr[1]);
492  out->dist2p=resid[0]*resid[0]+resid[1]*resid[1]
493  -(res0*res0+res1*res1);
494  }
495  #define SETR(d) do { \
496  unsigned f = flags>>(2*d) & 3u; \
497  out->r[d] = f==0 ? r0[d]+dr[d] : ( f==1 ? -1 : 1 ); \
498  } while(0)
499  SETR(0); SETR(1);
500  #undef SETR
501  out->flags = flags | (p->flags<<5);
502 }
503 
504 static void newton_edge(struct findpts_el_pt_2 *const out,
505  const double jac[4], const double rhes, const double resid[2],
506  const unsigned de, const unsigned dn,
507  unsigned flags,
508  const struct findpts_el_pt_2 *const p, const double tol)
509 {
510  const double tr = p->tr;
511  /* A = J^T J - resid_d H_d */
512  const double A = jac[ de]*jac[ de]
513  +jac[2+de]*jac[2+de] - rhes;
514  /* y = J^T r */
515  const double y = jac[ de]*resid[0]
516  +jac[2+de]*resid[1];
517 
518  const double oldr = p->r[de];
519  double dr,nr,tdr,tnr;
520  double v,tv; unsigned new_flags=0, tnew_flags=0;
521 
522 #ifdef DIAGNOSTICS_1
523  printf("Newton edge %u (dn=%u) flags=%x\n",de,dn,flags);
524  printf(" A=%g, y=%g\n",A,y);
525  if(A<=0) printf(" A not positive\n");
526  printf(" r=(%.17g,%.17g)\n",p->r[0],p->r[1]);
527 #endif
528 
529  #define EVAL(dr) (dr*A-2*y)*dr
530 
531  /* if A is not SPD, quadratic model has no minimum */
532  if(A>0) {
533  dr = y/A, nr = oldr+dr;
534  if(fabs(dr)<tr && fabs(nr)<1) { v=EVAL(dr); goto newton_edge_fin; }
535  }
536 
537  if(( nr=oldr-tr)>-1) dr=-tr;
538  else nr=-1, dr=-1-oldr, new_flags = flags | 1u<<(2*de);
539  v =EVAL( dr);
540 
541  if((tnr=oldr+tr)< 1) tdr=tr;
542  else tnr= 1, tdr= 1-oldr, tnew_flags = flags | 2u<<(2*de);
543  tv=EVAL(tdr);
544 
545  if(tv<v) nr=tnr, dr=tdr, v=tv, new_flags=tnew_flags;
546 
547 newton_edge_fin:
548  /* check convergence */
549  if(fabs(dr) < tol) new_flags |= CONVERGED_FLAG;
550  out->r[de]=nr;
551  out->r[dn]=p->r[dn];
552  out->dist2p = -v;
553  out->flags = flags | new_flags | (p->flags<<5);
554 #ifdef DIAGNOSTICS_1
555  printf(" new r = (%.17g,%.17g)\n",out->r[0],out->r[1]);
556 #endif
557 }
558 
559 typedef void findpt_fun(
560  struct findpts_el_pt_2 *const out,
561  struct findpts_el_data_2 *const fd,
562  const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol);
563 
564 /* work[(6+2*(2*nr+ns))*pn] */
565 static void findpt_area(
566  struct findpts_el_pt_2 *const out,
567  struct findpts_el_data_2 *const fd,
568  const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
569 {
570  const unsigned nr=fd->n[0],ns=fd->n[1];
571  double *const resid = fd->work, *const jac = resid + 2*pn,
572  *const wtr = jac+4*pn, *const wts = wtr+2*nr*pn,
573  *const slice = wts+2*ns*pn;
574  unsigned i; unsigned d;
575  /* evaluate x(r) and jacobian */
576  for(i=0;i<pn;++i)
577  fd->lag[0](wtr+2*i*nr, fd->lag_data[0], nr, 1, p[i].r[0]);
578  for(i=0;i<pn;++i)
579  fd->lag[1](wts+2*i*ns, fd->lag_data[1], ns, 1, p[i].r[1]);
580  for(d=0;d<2;++d) {
581  tensor_mxm(slice,nr, fd->x[d],ns, wts,2*pn);
582  for(i=0;i<pn;++i) {
583  const double *const wtr_i = wtr+2*i*nr, *const slice_i = slice+2*i*nr;
584  double *const jac_i = jac+4*i+2*d;
585  resid[2*i+d] = p[i].x[d] - tensor_ig1(jac_i,
586  wtr_i,nr, slice_i);
587  jac_i[1] = tensor_i1(wtr_i,nr, slice_i+nr);
588  }
589  }
590  /* perform Newton step */
591  for(i=0;i<pn;++i) {
592  if(reject_prior_step_q(out+i,resid+2*i,p+i,tol)) continue;
593  else newton_area(out+i, jac+4*i, resid+2*i, p+i, tol);
594  }
595 }
596 
597 /* work[(10+3*n)*pn] */
598 static void findpt_edge(
599  struct findpts_el_pt_2 *const out,
600  struct findpts_el_data_2 *const fd,
601  const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
602 {
603  const unsigned pflag = p->flags & FLAG_MASK;
604  const unsigned ei = edge_index(pflag);
605  const unsigned dn = ei>>1, de = plus_1_mod_2(dn);
606  const unsigned n = fd->n[de];
607  double *const resid=fd->work, *const jac=resid+2*pn, *const hes=jac+4*pn,
608  *const wt = hes+pn, *const slice = wt+3*n*pn;
609  const struct findpts_el_gedge_2 *const edge = get_edge(fd,ei);
610  unsigned i; unsigned d;
611 
612 #ifdef DIAGNOSTICS_1
613  printf("Edge %u\n",ei);
614  printf(" pflag = %u\n",pflag);
615  printf(" ei = %u\n",ei);
616  printf(" dn, de = %u, %u\n",dn,de);
617  printf(" n = %u \n", n);
618 #endif
619 
620  /* evaluate x(r), jacobian, hessian */
621  for(i=0;i<pn;++i)
622  fd->lag[de](wt+3*i*n, fd->lag_data[de], n, 2, p[i].r[de]);
623  for(i=0;i<pn;++i) hes[i]=0;
624  for(d=0;d<2;++d) {
625  tensor_mtxv(slice,3*pn, wt, edge->x[d],n);
626  for(i=0;i<pn;++i) {
627  const double *const slice_i = slice+3*i;
628  double r;
629  resid[2*i+d] = r = p[i].x[d] - slice_i[0];
630  jac[4*i+2*d+de] = slice_i[1];
631  hes[i] += r * slice_i[2];
632  }
633  }
634  for(i=1;i<pn;++i) memcpy(wt+i*n, wt+3*i*n, n*sizeof(double));
635  for(d=0;d<2;++d) {
636  tensor_mtxv(slice,pn, wt, edge->dxdn[d],n);
637  for(i=0;i<pn;++i) jac[4*i+2*d+dn] = slice[i];
638  }
639  /* perform Newton step */
640  for(i=0;i<pn;++i) {
641  double *const resid_i=resid+2*i, *const jac_i=jac+4*i, *const hes_i=hes+i;
642  /* check prior step */
643  if(!reject_prior_step_q(out+i,resid_i,p+i,tol)) {
644  /* check constraint */
645  const double steep = resid_i[0] * jac_i[ dn]
646  +resid_i[1] * jac_i[2+dn];
647 #ifdef DIAGNOSTICS_1
648  printf("jacobian = %g\t%g\n"
649  " %g\t%g\n",jac_i[0],jac_i[1],jac_i[2],jac_i[3]);
650  printf("resid_i = (%g,%g)\n", resid_i[0],resid_i[1]);
651  printf("steep = %g (%s)\n", steep, steep * p[i].r[dn] < 0 ? "in" : "out");
652 #endif
653  if(steep * p[i].r[dn] < 0) /* relax constraint */
654  newton_area(out+i, jac_i, resid_i, p+i, tol);
655  else
656  newton_edge(out+i, jac_i, *hes_i, resid_i, de,dn,pflag, p+i, tol);
657  }
658  }
659 }
660 
661 static void findpt_pt(
662  struct findpts_el_pt_2 *const out,
663  struct findpts_el_data_2 *const fd,
664  const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
665 {
666  const unsigned pflag = p->flags & FLAG_MASK;
667  const unsigned pi = point_index(pflag);
668  const struct findpts_el_gpt_2 *gpt = get_pt(fd,pi);
669  const double *const x = gpt->x, *const jac = gpt->jac, *const hes = gpt->hes;
670  unsigned i;
671 
672 #ifdef DIAGNOSTICS_1
673  printf("Point %u\n",pi);
674  printf(" pflag = %u\n",pflag);
675  printf(" pi = %u\n",pi);
676 #endif
677 
678  for(i=0;i<pn;++i) {
679  double resid[2], steep[2], sr[2];
680  unsigned dn,de;
681  resid[0] = p[i].x[0]-x[0],
682  resid[1] = p[i].x[1]-x[1];
683  steep[0] = jac[0]*resid[0] + jac[2]*resid[1],
684  steep[1] = jac[1]*resid[0] + jac[3]*resid[1];
685  sr[0] = steep[0]*p[i].r[0],
686  sr[1] = steep[1]*p[i].r[1];
687  /* check prior step */
688  if(reject_prior_step_q(out+i,resid,p+i,tol)) continue;
689  /* check constraints */
690  if(sr[0]<0) {
691  if(sr[1]<0) goto findpt_pt_area;
692  else { de=0,dn=1; goto findpt_pt_edge; }
693  }
694  else if(sr[1]<0) { de=1,dn=0; goto findpt_pt_edge; }
695  out[i].r[0]=p[i].r[0],out[i].r[1]=p[i].r[1];
696  out[i].dist2p=0;
697  out[i].flags = pflag | CONVERGED_FLAG;
698  continue;
699  findpt_pt_area:
700  newton_area(out+i, jac,resid, p+i, tol);
701  continue;
702  findpt_pt_edge: {
703  const double rh = resid[0]*hes[de]+resid[1]*hes[2+de];
704  newton_edge(out+i, jac,rh,resid, de,dn,
705  pflag&(3u<<(2*dn)), p+i, tol);
706  } continue;
707  }
708 }
709 
710 static void seed(struct findpts_el_data_2 *const fd,
711  struct findpts_el_pt_2 *const pt, const unsigned npt)
712 {
713  struct findpts_el_pt_2 *p, *const pe = pt+npt;
714  const unsigned nr=fd->n[0], ns=fd->n[1];
715  unsigned i,j, ii=0;
716  for(p=pt;p!=pe;++p) p->dist2=DBL_MAX;
717  for(j=0;j<ns;++j) {
718  const double zs=fd->z[1][j];
719  for(i=0;i<nr;++i) {
720  const double zr=fd->z[0][i];
721  const double x=fd->x[0][ii], y=fd->x[1][ii];
722  ++ii;
723  for(p=pt;p!=pe;++p) {
724  const double dx=p->x[0]-x,dy=p->x[1]-y;
725  const double dist2 = dx*dx+dy*dy;
726  if(p->dist2<=dist2) continue;
727  p->dist2=dist2;
728  p->r[0]=zr, p->r[1]=zs;
729  }
730  }
731  }
732 }
733 
734 void findpts_el_2(struct findpts_el_data_2 *const fd, const unsigned npt,
735  const double tol)
736 {
737  findpt_fun *const fun[3] =
739  struct findpts_el_pt_2 *const pbuf = fd->p, *const pstart = fd->p + npt;
740  unsigned nconv = npt;
741  unsigned step = 0;
742  unsigned count[9] = { 0,0,0, 0,0,0, 0,0,0 } ;
743  count[0]=npt;
744  seed(fd,pbuf,npt);
745  { unsigned i;
746  for(i=0;i<npt;++i) {
747  pstart[i].x[0]=pbuf[i].x[0];
748  pstart[i].x[1]=pbuf[i].x[1];
749  pstart[i].r[0]=pbuf[i].r[0];
750  pstart[i].r[1]=pbuf[i].r[1];
751  pstart[i].index=i,pstart[i].flags=0;
752  pstart[i].dist2=DBL_MAX,pstart[i].dist2p=0,pstart[i].tr=1;
753  }
754  }
755  while(nconv && step++ < 50) {
756  /* advance each group of points */
757  struct findpts_el_pt_2 *p, *const pe=pstart+nconv, *pout; unsigned pn;
758 
759 #if DIAGNOSTICS_ITERATIONS>1
760  { unsigned i;
761  printf("findpts_el_2 Newton step (%u), %u unconverged:\n ", step,nconv);
762  for(i=0;i<9;++i) printf(" %u",count[i]);
763  printf("\n");
764  }
765 #endif
766 
767  for(p=pstart,pout=pbuf; p!=pe; p+=pn,pout+=pn) {
768  const unsigned pflags = p->flags & FLAG_MASK;
769  pn = count[pt_flags_to_bin_noC(pflags)];
770  fun[num_constrained(pflags)](pout, fd, p,pn, tol);
771  }
772  /* group points by contsraints */
773  {
774  unsigned offset[10] = { 0,0,0, 0,0,0, 0,0,0, 0 };
775  struct findpts_el_pt_2 *const pe = pbuf+nconv;
776  for(pout=pbuf; pout!=pe; ++pout)
777  ++offset[pt_flags_to_bin(pout->flags & FLAG_MASK)];
778  {
779  unsigned i; unsigned sum=0;
780  for(i=0;i<9;++i) {
781  unsigned ci=offset[i]; count[i]=ci, offset[i]=sum, sum+=ci;
782  }
783  nconv = offset[9] = sum; /* last bin is converged; forget it */
784  }
785  for(pout=pbuf; pout!=pe; ++pout)
786  pstart[offset[pt_flags_to_bin(pout->flags & FLAG_MASK)]++] = *pout;
787  }
788  }
789  { struct findpts_el_pt_2 *p, *const pe=pstart+npt;
790  for(p=pstart;p!=pe;++p)
791  pbuf[p->index]=*p, pbuf[p->index].flags&=FLAG_MASK;
792  }
793 #if DIAGNOSTICS_ITERATIONS
794  printf("findpts_el_2 took %u steps\n ", step);
795 #endif
796 }
797 
799  double *const out_base, const unsigned out_stride,
800  const double *const r_base, const unsigned r_stride, const unsigned pn,
801  const double *const in, struct findpts_el_data_2 *const fd)
802 {
803  const unsigned nr=fd->n[0],ns=fd->n[1];
804  double *const wtr = fd->work, *const wts = wtr+nr*pn,
805  *const slice = wts+ns*pn;
806  unsigned i; const double *r; double *out;
807  for(i=0,r=r_base;i<pn;++i) {
808  fd->lag[0](wtr+i*nr, fd->lag_data[0], nr, 0, r[0]);
809  fd->lag[1](wts+i*ns, fd->lag_data[1], ns, 0, r[1]);
810  r = (const double*)((const char*)r + r_stride);
811  }
812 
813  tensor_mxm(slice,nr, in,ns, wts,pn);
814  for(i=0,out=out_base;i<pn;++i) {
815  const double *const wtr_i = wtr+i*nr, *const slice_i = slice+i*nr;
816  *out = tensor_i1(wtr_i,nr, slice_i);
817  out = (double*)((char*)out + out_stride);
818  }
819 }
struct findpts_el_gpt_2 pt[4]
Definition: findpts_el.h:36
lagrange_fun * lag[2]
Definition: findpts_el.h:27
static double sum(struct xxt *data, double v, uint n, uint tag)
Definition: xxt.c:400
#define CONVERGED_FLAG
Definition: findpts_el_2.c:52
#define tmalloc(type, count)
Definition: mem.h:91
#define findpts_el_free_2
Definition: findpts_el_2.c:17
void lagrange_fun(double *restrict p, double *restrict data, unsigned n, int d, double x)
Definition: poly.c:20
static double tensor_ig1(double g[1], const double *wtr, uint nr, const double *u)
Definition: tensor.h:124
#define SETDR(d)
void compute_edge_data_fun(struct findpts_el_data_2 *fd)
Definition: findpts_el_2.c:200
void findpt_fun(struct findpts_el_pt_2 *const out, struct findpts_el_data_2 *const fd, const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
Definition: findpts_el_2.c:559
#define SETR(d)
static void tensor_mtxv(double *y, uint ny, const double *A, const double *x, uint nx)
Definition: tensor.h:63
n
Definition: xxt_test.m:73
static void lin_solve_2(double x[2], const double A[4], const double y[2])
Definition: findpts_el_2.c:32
static double zr[NR]
static const struct findpts_el_gpt_2 * get_pt(struct findpts_el_data_2 *fd, unsigned pi)
Definition: findpts_el_2.c:270
static void tensor_mxm(double *C, uint nc, const double *A, uint na, const double *B, uint nb)
Definition: tensor.h:53
static unsigned pt_flags_to_bin_noC(const unsigned flags)
Definition: findpts_el_2.c:61
#define x
ulong A[NUM][SI]
Definition: sort_test.c:17
#define EVAL(dr)
const double * x[2]
Definition: findpts_el.h:31
ns
Definition: xxt_test.m:43
unsigned n[2]
Definition: findpts_el.h:25
static void newton_area(struct findpts_el_pt_2 *const out, const double jac[4], const double resid[2], const struct findpts_el_pt_2 *const p, const double tol)
Definition: findpts_el_2.c:344
static unsigned plus_1_mod_2(const unsigned x)
Definition: findpts_el_2.c:75
double jac[4]
Definition: findpts_el.h:19
static unsigned point_index(const unsigned x)
Definition: findpts_el_2.c:86
double * z[2]
Definition: findpts_el.h:26
double x[2]
Definition: findpts_el.h:19
double * wtend[2]
Definition: findpts_el.h:29
static double tensor_i1(const double *Jr, uint nr, const double *u)
Definition: tensor.h:81
#define gll_lag_setup
Definition: poly.c:18
#define FLAG_MASK
Definition: findpts_el_2.c:53
double * sides
Definition: findpts_el.h:34
#define DO_MAX(x)
double x[2]
Definition: findpts_el.h:14
p
Definition: xxt_test2.m:1
#define gll_lag_size
Definition: poly.c:17
static const struct findpts_el_gedge_2 * get_edge(struct findpts_el_data_2 *fd, unsigned ei)
Definition: findpts_el_2.c:236
static unsigned work_size(const unsigned nr, const unsigned ns, const unsigned npt_max)
Definition: findpts_el_2.c:128
unsigned index
Definition: findpts_el.h:15
static unsigned num_constrained(const unsigned flags)
Definition: findpts_el_2.c:55
#define findpts_el_setup_2
Definition: findpts_el_2.c:16
static void seed(struct findpts_el_data_2 *const fd, struct findpts_el_pt_2 *const pt, const unsigned npt)
Definition: findpts_el_2.c:710
static unsigned pt_flags_to_bin(const unsigned flags)
Definition: findpts_el_2.c:68
static double zs[NS]
double oldr[2]
Definition: findpts_el.h:14
static void compute_edge_data_s(struct findpts_el_data_2 *fd)
Definition: findpts_el_2.c:220
#define tensor_mtxm
Definition: tensor.c:8
static void newton_edge(struct findpts_el_pt_2 *const out, const double jac[4], const double rhes, const double resid[2], const unsigned de, const unsigned dn, unsigned flags, const struct findpts_el_pt_2 *const p, const double tol)
Definition: findpts_el_2.c:504
const double * x[2]
Definition: findpts_el.h:18
static unsigned edge_index(const unsigned x)
Definition: findpts_el_2.c:84
static void compute_pt_data(struct findpts_el_data_2 *fd)
Definition: findpts_el_2.c:252
for i
Definition: xxt_test.m:74
static void findpt_pt(struct findpts_el_pt_2 *const out, struct findpts_el_data_2 *const fd, const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
Definition: findpts_el_2.c:661
static unsigned which_bit(const unsigned x)
Definition: findpts_el_2.c:78
unsigned flags
Definition: findpts_el.h:15
double hes[4]
Definition: findpts_el.h:19
struct findpts_el_pt_2 * p
Definition: findpts_el.h:23
static void findpt_edge(struct findpts_el_pt_2 *const out, struct findpts_el_data_2 *const fd, const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
Definition: findpts_el_2.c:598
static int reject_prior_step_q(struct findpts_el_pt_2 *const out, const double resid[2], const struct findpts_el_pt_2 *const p, const double tol)
Definition: findpts_el_2.c:283
unsigned npt_max
Definition: findpts_el.h:22
#define findpts_el_2
Definition: findpts_el_2.c:18
static const unsigned nr[3]
double * lag_data[2]
Definition: findpts_el.h:28
#define findpts_el_eval_2
Definition: findpts_el_2.c:19
ulong out[N]
Definition: sort_test2.c:20
static double work[TNR *NS]
#define lobatto_nodes
Definition: poly.c:15
establishes some macros to establish naming conventions
double r[2]
Definition: findpts_el.h:14
static void compute_edge_data_r(struct findpts_el_data_2 *fd)
Definition: findpts_el_2.c:203
static void findpt_area(struct findpts_el_pt_2 *const out, struct findpts_el_data_2 *const fd, const struct findpts_el_pt_2 *const p, const unsigned pn, const double tol)
Definition: findpts_el_2.c:565
static double y[NR *NS *NT *N]
Definition: obbox_test.c:31
const double * dxdn[2]
Definition: findpts_el.h:18
unsigned side_init
Definition: findpts_el.h:33
struct findpts_el_gedge_2 edge[4]
Definition: findpts_el.h:35