Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
findpts_el_3.c
Go to the documentation of this file.
1 #include <stddef.h>
2 #include <stdlib.h>
3 #include <string.h>
4 #include <math.h>
5 #include <float.h>
6 #include "c99.h"
7 #include "name.h"
8 #include "fail.h"
9 #include "types.h"
10 #include "mem.h"
11 #include "tensor.h"
12 #include "poly.h"
13 
14 #define findpts_el_setup_3 PREFIXED_NAME(findpts_el_setup_3)
15 #define findpts_el_free_3 PREFIXED_NAME(findpts_el_free_3 )
16 #define findpts_el_3 PREFIXED_NAME(findpts_el_3 )
17 #define findpts_el_eval_3 PREFIXED_NAME(findpts_el_eval_3 )
18 /*
19 #define DIAGNOSTICS_1
20 #define DIAGNOSTICS_2
21 */
22 #define DIAGNOSTICS_ITERATIONS 0
23 
24 #if defined(DIAGNOSTICS_1) || defined(DIAGNOSTICS_2) \
25  || DIAGNOSTICS_ITERATIONS > 0
26 #include <stdio.h>
27 #endif
28 
29 /* A is row-major */
30 static void lin_solve_3(double x[3], const double A[9], const double y[3])
31 {
32  const double a = A[4]*A[8]-A[5]*A[7],
33  b = A[5]*A[6]-A[3]*A[8],
34  c = A[3]*A[7]-A[4]*A[6],
35  idet = 1/(A[0]*a+A[1]*b+A[2]*c);
36  const double
37  inv0 = a,
38  inv1 = A[2]*A[7]-A[1]*A[8],
39  inv2 = A[1]*A[5]-A[2]*A[4],
40  inv3 = b,
41  inv4 = A[0]*A[8]-A[2]*A[6],
42  inv5 = A[2]*A[3]-A[0]*A[5],
43  inv6 = c,
44  inv7 = A[1]*A[6]-A[0]*A[7],
45  inv8 = A[0]*A[4]-A[1]*A[3];
46  x[0] = idet*(inv0*y[0] + inv1*y[1] + inv2*y[2]);
47  x[1] = idet*(inv3*y[0] + inv4*y[1] + inv5*y[2]);
48  x[2] = idet*(inv6*y[0] + inv7*y[1] + inv8*y[2]);
49 }
50 
51 static void lin_solve_sym_2(double x[2], const double A[3], const double y[2])
52 {
53  const double idet = 1/(A[0]*A[2] - A[1]*A[1]);
54  x[0] = idet*(A[2]*y[0] - A[1]*y[1]);
55  x[1] = idet*(A[0]*y[1] - A[1]*y[0]);
56 }
57 
58 
59 struct findpts_el_pt_3 {
60  double x[3],r[3],oldr[3],dist2,dist2p,tr;
61  unsigned index,flags;
62 };
63 
64 /* the bit structure of flags is CTTSSRR
65  the C bit --- 1<<6 --- is set when the point is converged
66  RR is 0 = 00b if r is unconstrained,
67  1 = 01b if r is constrained at -1
68  2 = 10b if r is constrained at +1
69  SS, TT are similarly for s and t constraints
70 */
71 
72 #define CONVERGED_FLAG (1u<<6)
73 #define FLAG_MASK 0x7fu
74 
75 static unsigned num_constrained(const unsigned flags)
76 {
77  const unsigned y = flags | flags>>1;
78  return (y&1u) + (y>>2 & 1u) + (y>>4 & 1u);
79 }
80 
81 static unsigned pt_flags_to_bin_noC(const unsigned flags)
82 {
83  return ((flags>>4 & 3u)*3 + (flags>>2 & 3u))*3 + (flags & 3u);
84 }
85 
86 /* map flags to 27 if the C bit is set,
87  else to [0,26] --- the 27 valid configs of TTSSRR */
88 static unsigned pt_flags_to_bin(const unsigned flags)
89 {
90  const unsigned mask = 0u - (flags>>6); /* 0 or 0xfff... when converged */
91  return (mask & 27u) | (~mask & pt_flags_to_bin_noC(flags));
92 }
93 
94 /* assumes x = 0, 1, or 2 */
95 static unsigned plus_1_mod_3(const unsigned x) { return ((x | x>>1)+1) & 3u; }
96 static unsigned plus_2_mod_3(const unsigned x)
97 {
98  const unsigned y = (x-1) & 3u;
99  return y ^ (y>>1);
100 }
101 
102 /* assumes x = 1 << i, with i < 6, returns i+1 */
103 static unsigned which_bit(const unsigned x)
104 {
105  const unsigned y = x&7u;
106  return (y-(y>>2)) | ((x-1)&4u) | (x>>4);
107 }
108 
109 static unsigned face_index(const unsigned x) { return which_bit(x)-1; }
110 
111 static unsigned edge_index(const unsigned x)
112 {
113  const unsigned y = ~((x>>1) | x);
114  const unsigned RTSR = ((x>>1)&1u) | ((x>>2)&2u) | ((x>>3)&4u) | ((x<<2)&8u);
115  const unsigned re = RTSR>>1;
116  const unsigned se = 4u | RTSR>>2;
117  const unsigned te = 8u | (RTSR&3u);
118  return ( (0u - ( y &1u)) & re )
119  | ( (0u - ((y>>2)&1u)) & se )
120  | ( (0u - ((y>>4)&1u)) & te );
121 }
122 
123 static unsigned point_index(const unsigned x)
124 {
125  return ((x>>1)&1u) | ((x>>2)&2u) | ((x>>3)&4u);
126 }
127 
128 /* extra data
129 
130  we need x, dx/dn for each face
131  rs: x at 0, nrst - nrs,
132  6*nrs extra for dx/dn
133  st: 12*nst extra
134  tr: 12*ntr extra
135  (transposed order for embedded t-edges)
136 
137  for each edge,
138  have x, dx/dn2 already as part of face data
139  need dx/dn1 (strided in face data)
140  need d^2x/dn1^2, d^2x/dn2^2 possibly, if constraints relax
141  thats 3*4*(nr+ns+nt) extra
142 
143 */
144 
145 struct findpts_el_gface_3 { const double *x[3], *dxdn[3]; };
146 struct findpts_el_gedge_3 { const double *x[3], *dxdn1[3], *dxdn2[3],
147  *d2xdn1[3], *d2xdn2[3]; };
148 struct findpts_el_gpt_3 { double x[3], jac[9], hes[18]; };
149 
150 struct findpts_el_data_3 {
151  unsigned npt_max;
152  struct findpts_el_pt_3 *p;
153 
154  unsigned n[3];
155  double *z[3];
156  lagrange_fun *lag[3];
157  double *lag_data[3];
158  double *wtend[3];
159 
160  const double *x[3];
161 
162  unsigned side_init;
163  double *sides;
164  struct findpts_el_gface_3 face[6]; /* ST R=-1,R=+1; TR S=-1,S=+1; ... */
165  struct findpts_el_gedge_3 edge[12]; /* R S=-1,T=-1; R S=1,T=-1; ... */
166  struct findpts_el_gpt_3 pt[8];
167 
168  double *work;
169 };
170 
171 /* work[2*nt+2*nrs] */
172 /* work[4*nr+4*nst] */
173 /* work[4*ns+4*nr] */
174 /* work[4*n1+4*n], work[2*n2+2*n] */
175 /* work[4*nr+4], work[2*nt+2] */
176 /* work[(3+9+2*(nr+ns+nt+nrs))*pn + max(2*nr,ns) ] */
177 /* work[(3+9+3+3*(n1+n2+n1))*pn ] */
178 /* work[ 3*n ] */
179 static unsigned work_size(
180  const unsigned nr, const unsigned ns, const unsigned nt,
181  const unsigned npt_max)
182 {
183  unsigned n1, n2, wsize;
184  if(nr>ns) {
185  if(nr>nt) n1=nr, n2 = (ns>nt ? ns : nt);
186  else n1=nt, n2 = nr;
187  } else {
188  if(ns>nt) n1=ns, n2 = (nr>nt ? nr : nt);
189  else n1=nt, n2 = ns;
190  }
191  #define DO_MAX(x) do { const unsigned temp=(x); \
192  wsize=temp>wsize?temp:wsize; } while(0)
193  wsize = (12 + 2*(nr+ns+nt+nr*ns)) * npt_max + (2*nr>ns?2*nr:ns);
194  DO_MAX(2*(nt+nr*ns));
195  DO_MAX(4*(nr+ns*nt));
196  DO_MAX(4*(n1+n2));
197  DO_MAX(npt_max*(15+3*(2*n1+n2)));
198  #undef DO_MAX
199  return wsize;
200 }
201 
202 void findpts_el_setup_3(struct findpts_el_data_3 *const fd,
203  const unsigned n[3],
204  const unsigned npt_max)
205 {
206  const unsigned nr=n[0], ns=n[1], nt=n[2];
207  const unsigned nrs = nr*ns, nst=ns*nt, ntr=nt*nr;
208  const unsigned face_size = 12*nst + 12*ntr + 6*nrs;
209  const unsigned off_es = face_size + 36*nr, off_et = off_es + 36*ns,
210  tot = off_et + 36*nt;
211  unsigned d,i, lag_size[3];
212 
213  fd->npt_max = npt_max;
214  fd->p = tmalloc(struct findpts_el_pt_3, npt_max*2);
215 
216  fd->n[0]=nr, fd->n[1]=ns, fd->n[2]=nt;
217  for(d=0;d<3;++d) lag_size[d] = gll_lag_size(fd->n[d]);
218 
219  fd->z[0] = tmalloc(double,lag_size[0]+lag_size[1]+lag_size[2]
220  +7*(nr+ns+nt) + tot +
221  work_size(nr,ns,nt,npt_max));
222  fd->z[1] = fd->z[0]+nr;
223  fd->z[2] = fd->z[1]+ns;
224  fd->lag_data[0] = fd->z[2]+nt;
225  fd->lag_data[1] = fd->lag_data[0]+lag_size[0];
226  fd->lag_data[2] = fd->lag_data[1]+lag_size[1];
227  fd->wtend[0] = fd->lag_data[2]+lag_size[2];
228  fd->wtend[1] = fd->wtend[0]+6*nr;
229  fd->wtend[2] = fd->wtend[1]+6*ns;
230  fd->sides = fd->wtend[2]+6*nt;
231  fd->work = fd->sides + tot;
232 
233  fd->side_init = 0;
234 
235  for(d=0;d<3;++d) {
236  double *wt=fd->wtend[d]; unsigned n=fd->n[d];
237  lobatto_nodes(fd->z[d],n);
238  fd->lag[d] = gll_lag_setup(fd->lag_data[d],n);
239  fd->lag[d](wt , fd->lag_data[d],n,2,-1);
240  fd->lag[d](wt+3*n, fd->lag_data[d],n,2, 1);
241 
242  wt[0]=1; for(i=1;i<n;++i) wt[i]=0;
243  wt+=3*n; { for(i=0;i<n-1;++i) wt[i]=0; } wt[i]=1;
244  }
245 
246  #define SET_FACE(i,base,n) do { for(d=0;d<3;++d) \
247  fd->face[2*i ].x[d] = fd->sides + base + d *n, \
248  fd->face[2*i ].dxdn[d] = fd->sides + base + (3+d)*n, \
249  fd->face[2*i+1].x[d] = fd->sides + base + (6+d)*n, \
250  fd->face[2*i+1].dxdn[d] = fd->sides + base + (9+d)*n; \
251  } while(0)
252  SET_FACE(0,0,nst);
253  SET_FACE(1,12*nst,ntr);
254  #undef SET_FACE
255 
256  for(d=0;d<3;++d)
257  fd->face[4].x[d] = 0, /* will point to user data */
258  fd->face[4].dxdn[d] = fd->sides + 12*(nst+ntr) + d*nrs,
259  fd->face[5].x[d] = 0, /* will point to user data */
260  fd->face[5].dxdn[d] = fd->sides + 12*(nst+ntr) + (3+d)*nrs;
261 
262  #define SET_EDGE1(j,k,d,rd,rn,base) \
263  for(i=0;i<2;++i) \
264  fd->edge[4*j+2*i+0].dxdn2[d] = fd->face[2*k+i].dxdn[d], \
265  fd->edge[4*j+2*i+1].dxdn2[d] = fd->face[2*k+i].dxdn[d]+n##rd##rn-n##rd;
266  #define SET_EDGE2(j,d,rd,rn,base) \
267  for(i=0;i<4;++i) \
268  fd->edge[4*j+i].dxdn1 [d] = fd->sides + base + (9*i +d)*n##rd, \
269  fd->edge[4*j+i].d2xdn1[d] = fd->sides + base + (9*i+3+d)*n##rd, \
270  fd->edge[4*j+i].d2xdn2[d] = fd->sides + base + (9*i+6+d)*n##rd;
271  #define SET_EDGE(j,rd,rn,base) do { \
272  for(d=0;d<3;++d) { SET_EDGE1(j,plus_2_mod_3(j),d,rd,rn,base); \
273  SET_EDGE2(j,d,rd,rn,base); } \
274  } while(0)
275  SET_EDGE(0,r,s,face_size);
276  SET_EDGE(1,s,t,off_es);
277  SET_EDGE(2,t,r,off_et);
278  #undef SET_EDGE
279  #undef SET_EDGE2
280  #undef SET_EDGE1
281 }
282 
283 void findpts_el_free_3(struct findpts_el_data_3 *const fd)
284 {
285  free(fd->p);
286  free(fd->z[0]);
287 }
288 
289 typedef void compute_face_data_fun(struct findpts_el_data_3 *fd);
290 
291 /* work[2*nt+2*nrs] */
293 {
294  const unsigned nr = fd->n[0], ns=fd->n[1], nt=fd->n[2],
295  nrs = nr*ns, nst=ns*nt, ntr = nt*nr, nrstm1 = nrs*(nt-1);
296  unsigned d;
297  double *work = fd->work, *out = fd->sides + 12*(nst+ntr);
298  memcpy(work , fd->wtend[2]+ nt, nt*sizeof(double));
299  memcpy(work+nt, fd->wtend[2]+4*nt, nt*sizeof(double));
300  for(d=0;d<3;++d) {
301  tensor_mxm(work+2*nt,nrs, fd->x[d],nt, work,2);
302  memcpy(out+ d *nrs, work+2*nt , nrs*sizeof(double));
303  memcpy(out+(3+d)*nrs, work+2*nt+nrs , nrs*sizeof(double));
304  fd->face[4].x[d] = fd->x[d];
305  fd->face[5].x[d] = fd->x[d] + nrstm1;
306  }
307 }
308 
309 /* work[4*nr+4*nst] */
311 {
312  const unsigned nr = fd->n[0], ns=fd->n[1], nt=fd->n[2], nst=ns*nt;
313  unsigned i;
314  double *work = fd->work, *out = fd->sides;
315  memcpy(work , fd->wtend[0] , 2*nr*sizeof(double));
316  memcpy(work+2*nr, fd->wtend[0]+3*nr, 2*nr*sizeof(double));
317  for(i=0;i<3;++i) {
318  tensor_mtxm(work+4*nr,nst, fd->x[i],nr, work,4);
319  memcpy(out+ i *nst, work+4*nr , nst*sizeof(double));
320  memcpy(out+(3+i)*nst, work+4*nr+ nst, nst*sizeof(double));
321  memcpy(out+(6+i)*nst, work+4*nr+2*nst, nst*sizeof(double));
322  memcpy(out+(9+i)*nst, work+4*nr+3*nst, nst*sizeof(double));
323  }
324 }
325 
326 /* work[4*ns+4*nr] */
328 {
329  const unsigned nr = fd->n[0], ns=fd->n[1], nt=fd->n[2],
330  nrs = nr*ns, nst=ns*nt, ntr=nt*nr;
331  unsigned i,k,d;
332  double *work = fd->work, *out = fd->sides + 12*nst;
333  memcpy(work , fd->wtend[1] , 2*ns*sizeof(double));
334  memcpy(work+2*ns, fd->wtend[1]+3*ns, 2*ns*sizeof(double));
335  for(d=0;d<3;++d) {
336  for(k=0;k<nt;++k) {
337  double *outk; double *in = work+4*ns;
338  tensor_mxm(in,nr, fd->x[d]+k*nrs,ns, work,4);
339  for(outk=out+ d *ntr+k,i=0;i<nr;++i,outk+=nt) *outk=*in++;
340  for(outk=out+(3+d)*ntr+k,i=0;i<nr;++i,outk+=nt) *outk=*in++;
341  for(outk=out+(6+d)*ntr+k,i=0;i<nr;++i,outk+=nt) *outk=*in++;
342  for(outk=out+(9+d)*ntr+k,i=0;i<nr;++i,outk+=nt) *outk=*in++;
343  }
344  }
345 }
346 
347 static const struct findpts_el_gface_3 *get_face(
348  struct findpts_el_data_3 *fd, unsigned fi)
349 {
350  const unsigned mask = 1u<<(fi/2);
351  if((fd->side_init&mask)==0) {
352  compute_face_data_fun *const fun[3] = {
356  };
357  fun[fi/2](fd);
358  fd->side_init |= mask;
359  }
360  return &fd->face[fi];
361 }
362 
363 /* work[4*n1+4*n], work[2*n2+2*n] */
364 static void compute_edge_data(struct findpts_el_data_3 *fd, unsigned d)
365 {
366  const unsigned dn1 = plus_1_mod_3(d), dn2 = plus_2_mod_3(d);
367  const unsigned n = fd->n[d], n1 = fd->n[dn1], n2 = fd->n[dn2];
368  const unsigned nr=fd->n[0],ns=fd->n[1],nt=fd->n[2],
369  nrs=nr*ns,nst=ns*nt,ntr=nt*nr;
370  const unsigned base = 6*nrs + 12*nst + 12*ntr
371  + (d>0 ? 36*nr : 0) + (d>1 ? 36*ns : 0);
372  #define DXDN1(i,d) (fd->sides+base+(9*(i) +(d))*n)
373  #define D2XDN1(i,d) (fd->sides+base+(9*(i)+3+(d))*n)
374  #define D2XDN2(i,d) (fd->sides+base+(9*(i)+6+(d))*n)
375  const struct findpts_el_gface_3 *face_d_n1 = get_face(fd,2*dn2),
376  *face_n2_d = get_face(fd,2*dn1);
377  struct findpts_el_gedge_3 *e = fd->edge + 4*d;
378  unsigned i,xd;
379  double *work = fd->work;
380  for(xd=0;xd<3;++xd) for(i=0;i<2;++i)
381  e[2*i ].x[xd] = face_d_n1[i].x[xd],
382  e[2*i+1].x[xd] = face_d_n1[i].x[xd]+n*(n1-1);
383  memcpy(work , fd->wtend[dn1]+ n1,2*n1*sizeof(double));
384  memcpy(work+2*n1, fd->wtend[dn1]+4*n1,2*n1*sizeof(double));
385  for(i=0;i<2;++i) for(xd=0;xd<3;++xd) {
386  tensor_mxm(work+4*n1,n, face_d_n1[i].x[xd],n1, work,4);
387  memcpy( DXDN1(2*i+0,xd), work+4*n1 , n*sizeof(double));
388  memcpy(D2XDN1(2*i+0,xd), work+4*n1+ n, n*sizeof(double));
389  memcpy( DXDN1(2*i+1,xd), work+4*n1+2*n, n*sizeof(double));
390  memcpy(D2XDN1(2*i+1,xd), work+4*n1+3*n, n*sizeof(double));
391  }
392  memcpy(work , fd->wtend[dn2]+2*n2,n2*sizeof(double));
393  memcpy(work+n2, fd->wtend[dn2]+5*n2,n2*sizeof(double));
394  for(i=0;i<2;++i) for(xd=0;xd<3;++xd) {
395  tensor_mtxm(work+2*n2,n, face_n2_d[i].x[xd],n2, work,2);
396  memcpy(D2XDN2( i,xd), work+2*n2 , n*sizeof(double));
397  memcpy(D2XDN2(2+i,xd), work+2*n2+n, n*sizeof(double));
398  }
399  #undef D2XDN2
400  #undef D2XDN1
401  #undef DXDN1
402 }
403 
404 static const struct findpts_el_gedge_3 *get_edge(
405  struct findpts_el_data_3 *fd, unsigned ei)
406 {
407  const unsigned mask = 8u<<(ei/4);
408  if((fd->side_init&mask)==0)
409  compute_edge_data(fd,ei/4), fd->side_init |= mask;
410  return &fd->edge[ei];
411 }
412 
413 /* work[4*nr+4], work[2*nt+2] */
414 static void compute_pt_data(struct findpts_el_data_3 *fd)
415 {
416  const unsigned nr = fd->n[0], nt = fd->n[2];
417  const struct findpts_el_gedge_3 *e = get_edge(fd,0);
418  unsigned d,i;
419  double *work = fd->work;
420  for(i=0;i<4;++i) for(d=0;d<3;++d)
421  fd->pt[2*i ].x[d] = e[i].x[d][0],
422  fd->pt[2*i ].jac[3*d+1] = e[i].dxdn1[d][0],
423  fd->pt[2*i ].jac[3*d+2] = e[i].dxdn2[d][0],
424  fd->pt[2*i ].hes[6*d+3] = e[i].d2xdn1[d][0],
425  fd->pt[2*i ].hes[6*d+5] = e[i].d2xdn2[d][0],
426  fd->pt[2*i+1].x[d] = e[i].x[d][nr-1],
427  fd->pt[2*i+1].jac[3*d+1] = e[i].dxdn1[d][nr-1],
428  fd->pt[2*i+1].jac[3*d+2] = e[i].dxdn2[d][nr-1],
429  fd->pt[2*i+1].hes[6*d+3] = e[i].d2xdn1[d][nr-1],
430  fd->pt[2*i+1].hes[6*d+5] = e[i].d2xdn2[d][nr-1];
431  memcpy(work , fd->wtend[0]+ nr, 2*nr*sizeof(double));
432  memcpy(work+2*nr, fd->wtend[0]+4*nr, 2*nr*sizeof(double));
433  for(i=0;i<4;++i) for(d=0;d<3;++d) {
434  tensor_mtxv(work+4*nr,4, work, e[i].x[d],nr);
435  fd->pt[2*i ].jac[3*d ] = work[4*nr ];
436  fd->pt[2*i ].hes[6*d ] = work[4*nr+1];
437  fd->pt[2*i+1].jac[3*d ] = work[4*nr+2];
438  fd->pt[2*i+1].hes[6*d ] = work[4*nr+3];
439  }
440  memcpy(work+nr,work+2*nr,nr*sizeof(double));
441  for(i=0;i<4;++i) for(d=0;d<3;++d) {
442  tensor_mtxv(work+2*nr,2, work, e[i].dxdn1[d],nr);
443  fd->pt[2*i ].hes[6*d+1] = work[2*nr ];
444  fd->pt[2*i+1].hes[6*d+1] = work[2*nr+1];
445  tensor_mtxv(work+2*nr,2, work, e[i].dxdn2[d],nr);
446  fd->pt[2*i ].hes[6*d+2] = work[2*nr ];
447  fd->pt[2*i+1].hes[6*d+2] = work[2*nr+1];
448  }
449  e = get_edge(fd,8);
450  memcpy(work , fd->wtend[2]+ nt, nt*sizeof(double));
451  memcpy(work+nt, fd->wtend[2]+4*nt, nt*sizeof(double));
452  for(i=0;i<4;++i) for(d=0;d<3;++d) {
453  tensor_mtxv(work+2*nt,2, work, e[i].dxdn2[d],nt);
454  fd->pt[ i].hes[6*d+4] = work[2*nt ];
455  fd->pt[4+i].hes[6*d+4] = work[2*nt+1];
456  }
457 }
458 
459 static const struct findpts_el_gpt_3 *get_pt(
460  struct findpts_el_data_3 *fd, unsigned pi)
461 {
462  if((fd->side_init&0x40u)==0)
463  compute_pt_data(fd), fd->side_init |= 0x40u;
464  return &fd->pt[pi];
465 }
466 
467 /* check reduction in objective against prediction, and adjust
468  trust region radius (p->tr) accordingly;
469  may reject the prior step, returning 1; otherwise returns 0
470  sets out->dist2, out->index, out->x, out->oldr in any event,
471  leaving out->r, out->dr, out->flags to be set when returning 0 */
472 static int reject_prior_step_q(struct findpts_el_pt_3 *const out,
473  const double resid[3],
474  const struct findpts_el_pt_3 *const p,
475  const double tol)
476 {
477  const double old_dist2 = p->dist2;
478  const double dist2 = resid[0]*resid[0]+resid[1]*resid[1]+resid[2]*resid[2];
479  const double decr = old_dist2-dist2;
480  const double pred = p->dist2p;
481  out->x[0]=p->x[0],out->x[1]=p->x[1],out->x[2]=p->x[2];
482  out->oldr[0]=p->r[0],out->oldr[1]=p->r[1],out->oldr[2]=p->r[2];
483  out->index=p->index;
484  out->dist2=dist2;
485 #ifdef DIAGNOSTICS_2
486  printf("Checking prior step:\n"
487  " old r = (%.17g,%.17g,%.17g), old flags = %x\n"
488  " old_dist2 = %.17g\n"
489  " r = (%.17g,%.17g,%.17g), flags = %x\n"
490  " dist2 = %.17g\n"
491  " difference = %.17g\n"
492  " predicted = %.17g\n"
493  " rho = %.17g\n",
494  p->oldr[0],p->oldr[1],p->oldr[2],(p->flags>>7)&FLAG_MASK,old_dist2,
495  p->r[0],p->r[1],p->r[2],p->flags&FLAG_MASK,dist2,
496  decr, pred, decr/pred);
497 #endif
498  if(decr>= 0.01 * pred) {
499  if(decr>= 0.9 * pred) {
500  out->tr = p->tr*2;
501 #ifdef DIAGNOSTICS_2
502  printf(" very good iteration; tr -> %g\n", out->tr);
503 #endif
504  } else {
505 #ifdef DIAGNOSTICS_2
506  printf(" good iteration; tr = %g\n", p->tr);
507 #endif
508  out->tr = p->tr;
509  }
510  return 0;
511  } else {
512  /* reject step; note: the point will pass through this routine
513  again, and we set things up here so it gets classed as a
514  "very good iteration" --- this doubles the trust radius,
515  which is why we divide by 4 below */
516  double v0 = fabs(p->r[0]-p->oldr[0]),
517  v1 = fabs(p->r[1]-p->oldr[1]),
518  v2 = fabs(p->r[2]-p->oldr[2]);
519  out->tr = (v1>v2?(v0>v1?v0:v1):(v0>v2?v0:v2))/4;
520 #ifdef DIAGNOSTICS_2
521  printf(" bad iteration; tr -> %g\n", out->tr);
522 #endif
523  out->dist2=old_dist2;
524  out->r[0]=p->oldr[0],out->r[1]=p->oldr[1],out->r[2]=p->oldr[2];
525  out->flags=p->flags>>7;
526  out->dist2p=-DBL_MAX;
527  if(pred < dist2*tol) out->flags|=CONVERGED_FLAG;
528  return 1;
529  }
530 }
531 
532 /* minimize ||resid - jac * dr||_2, with |dr| <= tr, |r0+dr|<=1
533  (exact solution of trust region problem) */
534 static void newton_vol(struct findpts_el_pt_3 *const out,
535  const double jac[9], const double resid[3],
536  const struct findpts_el_pt_3 *const p, const double tol)
537 {
538  const double tr = p->tr;
539  double bnd[6] = { -1,1, -1,1, -1,1 };
540  double r0[3];
541  double dr[3], fac;
542  unsigned d, mask, flags;
543  r0[0]=p->r[0],r0[1]=p->r[1],r0[2]=p->r[2];
544 #ifdef DIAGNOSTICS_1
545  printf("newton_vol:\n");
546  printf(" resid = (%g,%g,%g); r^T r / 2 = %g\n",resid[0],resid[1],resid[2],
547  (resid[0]*resid[0]+resid[1]*resid[1]+resid[2]*resid[2])/2);
548  printf(" jac = %g\t%g\t%g\n"
549  " %g\t%g\t%g\n"
550  " %g\t%g\t%g\n",
551  jac[0],jac[1],jac[2],jac[3],jac[4],jac[5],jac[6],jac[7],jac[8]);
552  printf(" r = (%.15g,%.15g,%.15g)\n",r0[0],r0[1],r0[2]);
553 #endif
554 
555  mask = 0x3fu;
556  for(d=0;d<3;++d) {
557  if(r0[d]-tr>-1) bnd[2*d ]=r0[d]-tr, mask^=1u<<(2*d);
558  if(r0[d]+tr< 1) bnd[2*d+1]=r0[d]+tr, mask^=2u<<(2*d);
559  }
560 
561  lin_solve_3(dr, jac,resid);
562 
563 #ifdef DIAGNOSTICS_1
564  printf(" min at r = (%.17g,%.17g,%.17g)\n",
565  r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
566 #endif
567 
568  fac = 1, flags = 0;
569  for(d=0;d<3;++d) {
570  double nr = r0[d]+dr[d];
571  if((nr-bnd[2*d])*(bnd[2*d+1]-nr)>=0) continue;
572  if(nr<bnd[2*d]) {
573  double f = (bnd[2*d ]-r0[d])/dr[d];
574  if(f<fac) fac=f, flags = 1u<<(2*d);
575  } else {
576  double f = (bnd[2*d+1]-r0[d])/dr[d];
577  if(f<fac) fac=f, flags = 2u<<(2*d);
578  }
579  }
580 
581 #ifdef DIAGNOSTICS_1
582  printf(" flags = %x, fac = %.15g\n",flags,fac);
583 #endif
584 
585  if(flags==0) goto newton_vol_fin;
586 
587  for(d=0;d<3;++d) dr[d]*=fac;
588 
589  newton_vol_face: {
590  const unsigned fi = face_index(flags);
591  const unsigned dn = fi>>1, d1 = plus_1_mod_3(dn), d2 = plus_2_mod_3(dn);
592  double drc[2], fac=1;
593  unsigned new_flags=0;
594  double res[3], y[2], JtJ[3];
595  res[0] = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
596  res[1] = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
597  res[2] = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
598  /* y = J_u^T res */
599  y[0] = jac[d1]*res[0]+jac[3+d1]*res[1]+jac[6+d1]*res[2],
600  y[1] = jac[d2]*res[0]+jac[3+d2]*res[1]+jac[6+d2]*res[2];
601  /* JtJ = J_u^T J_u */
602  JtJ[0] = jac[ d1]*jac[ d1]
603  +jac[3+d1]*jac[3+d1]
604  +jac[6+d1]*jac[6+d1],
605  JtJ[1] = jac[ d1]*jac[ d2]
606  +jac[3+d1]*jac[3+d2]
607  +jac[6+d1]*jac[6+d2],
608  JtJ[2] = jac[ d2]*jac[ d2]
609  +jac[3+d2]*jac[3+d2]
610  +jac[6+d2]*jac[6+d2];
611  lin_solve_sym_2(drc, JtJ,y);
612 #ifdef DIAGNOSTICS_1
613  printf(" face %u, dn=%u, (d1,d2)=(%u,%u)\n",fi,dn,d1,d2);
614  printf(" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
615  printf(" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
616  (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
617  printf(" min at (%.17g,%.17g)\n",
618  r0[d1]+dr[d1]+drc[0],r0[d2]+dr[d2]+drc[1]);
619 #endif
620  #define CHECK_CONSTRAINT(drcd,d3) do { \
621  const double rz = r0[d3]+dr[d3], lb=bnd[2*d3],ub=bnd[2*d3+1]; \
622  const double delta=drcd, nr = r0[d3]+(dr[d3]+delta); \
623  if((nr-lb)*(ub-nr)<0) { \
624  if(nr<lb) { \
625  double f = (lb-rz)/delta; \
626  if(f<fac) fac=f, new_flags = 1u<<(2*d3); \
627  } else { \
628  double f = (ub-rz)/delta; \
629  if(f<fac) fac=f, new_flags = 2u<<(2*d3); \
630  } \
631  } \
632  } while(0)
633  CHECK_CONSTRAINT(drc[0],d1); CHECK_CONSTRAINT(drc[1],d2);
634 #ifdef DIAGNOSTICS_1
635  printf(" new_flags = %x, fac = %.17g\n",new_flags,fac);
636 #endif
637  dr[d1] += fac*drc[0], dr[d2] += fac*drc[1];
638  if(new_flags==0) goto newton_vol_fin;
639  flags |= new_flags;
640  }
641 
642  newton_vol_edge: {
643  const unsigned ei = edge_index(flags);
644  const unsigned de = ei>>2;
645  double fac = 1;
646  unsigned new_flags = 0;
647  double res[3],y,JtJ,drc;
648  res[0] = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
649  res[1] = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
650  res[2] = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
651  /* y = J_u^T res */
652  y = jac[de]*res[0]+jac[3+de]*res[1]+jac[6+de]*res[2];
653  /* JtJ = J_u^T J_u */
654  JtJ = jac[ de]*jac[ de]
655  +jac[3+de]*jac[3+de]
656  +jac[6+de]*jac[6+de];
657  drc = y/JtJ;
658 #ifdef DIAGNOSTICS_1
659  printf(" edge %u, de=%u\n",ei,de);
660  printf(" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
661  printf(" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
662  (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
663  printf(" min at %.17g\n", r0[de]+dr[de]+drc);
664 #endif
665  CHECK_CONSTRAINT(drc,de);
666  #undef CHECK_CONSTRAINT
667 #ifdef DIAGNOSTICS_1
668  printf(" new_flags = %x, fac = %.17g\n",new_flags,fac);
669 #endif
670  dr[de] += fac*drc;
671  flags |= new_flags;
672  goto newton_vol_relax;
673  }
674 
675  /* check and possibly relax constraints */
676  newton_vol_relax: {
677  const unsigned old_flags = flags;
678  double res[3], y[3];
679  /* res := res_0 - J dr */
680  res[0] = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
681  res[1] = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
682  res[2] = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
683  /* y := J^T res */
684  y[0] = jac[0]*res[0]+jac[3]*res[1]+jac[6]*res[2],
685  y[1] = jac[1]*res[0]+jac[4]*res[1]+jac[7]*res[2],
686  y[2] = jac[2]*res[0]+jac[5]*res[1]+jac[8]*res[2];
687  #define SETDR(d) do { \
688  unsigned f = flags>>(2*d) & 3u; \
689  if(f) dr[d] = bnd[2*d+(f-1)] - r0[d]; \
690  } while(0)
691  SETDR(0); SETDR(1); SETDR(2);
692  #undef SETDR
693  for(d=0;d<3;++d) {
694  unsigned c = flags>>(2*d) & 3u;
695  if(c==0) continue;
696  else if(dr[d]*y[d]<0) flags &= ~(3u<<(2*d));
697 #ifdef DIAGNOSTICS_1
698  if( (c==1&&dr[d]>0) || (c==2&&dr[d]<0) )
699  printf("FAIL! c=%u, dr[d]=%g\n",c,dr[d]);
700 #endif
701  }
702 #ifdef DIAGNOSTICS_1
703  printf(" checking constraints (%x)\n",old_flags);
704  printf(" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
705  printf(" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
706  (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
707  printf(" relaxed %x -> %x\n",old_flags,flags);
708 #endif
709  if(flags==old_flags) goto newton_vol_fin;
710  switch(num_constrained(flags)) {
711  case 1: goto newton_vol_face;
712  case 2: goto newton_vol_edge;
713  }
714  }
715 
716 newton_vol_fin:
717 #ifdef DIAGNOSTICS_1
718  {
719  const double res[3]={ resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
720  resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
721  resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]) };
722  printf(" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
723  printf(" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
724  (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
725  }
726 #endif
727  flags &= mask;
728  if(fabs(dr[0])+fabs(dr[1])+fabs(dr[2]) < tol) flags |= CONVERGED_FLAG;
729  {
730  const double res0 = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
731  res1 = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
732  res2 = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
733  out->dist2p=resid[0]*resid[0]+resid[1]*resid[1]+resid[2]*resid[2]
734  -(res0*res0+res1*res1+res2*res2);
735  }
736  #define SETR(d) do { \
737  unsigned f = flags>>(2*d) & 3u; \
738  out->r[d] = f==0 ? r0[d]+dr[d] : ( f==1 ? -1 : 1 ); \
739  } while(0)
740  SETR(0); SETR(1); SETR(2);
741  #undef SETR
742  out->flags = flags | (p->flags<<7);
743 }
744 
745 static void newton_face(struct findpts_el_pt_3 *const out,
746  const double jac[9], const double rhes[3],
747  const double resid[3],
748  const unsigned d1, const unsigned d2, const unsigned dn,
749  const unsigned flags,
750  const struct findpts_el_pt_3 *const p, const double tol)
751 {
752  const double tr = p->tr;
753  double bnd[4];
754  double r[2], dr[2]={0,0};
755  unsigned mask, new_flags;
756  double v, tv; unsigned i;
757  double A[3], y[2], r0[2];
758  /* A = J^T J - resid_d H_d */
759  A[0] = jac[ d1]*jac[ d1]
760  +jac[3+d1]*jac[3+d1]
761  +jac[6+d1]*jac[6+d1] - rhes[0],
762  A[1] = jac[ d1]*jac[ d2]
763  +jac[3+d1]*jac[3+d2]
764  +jac[6+d1]*jac[6+d2] - rhes[1],
765  A[2] = jac[ d2]*jac[ d2]
766  +jac[3+d2]*jac[3+d2]
767  +jac[6+d2]*jac[6+d2] - rhes[2];
768  /* y = J^T r */
769  y[0] = jac[ d1]*resid[0]
770  +jac[3+d1]*resid[1]
771  +jac[6+d1]*resid[2],
772  y[1] = jac[ d2]*resid[0]
773  +jac[3+d2]*resid[1]
774  +jac[6+d2]*resid[2];
775  r0[0] = p->r[d1], r0[1] = p->r[d2];
776 
777 #ifdef DIAGNOSTICS_1
778  printf("newton_face, dn=%u, (d1,d2)=%u,%u:\n", dn,d1,d2);
779  printf(" J^T r = (%g,%g)\n", y[0],y[1]);
780  printf(" A = %g\t%g\n"
781  " %g\t%g\n", A[0],A[1],A[1],A[2]);
782  printf(" r = (%.15g,%.15g)\n", r0[0],r0[1]);
783 #endif
784 
785  new_flags=flags;
786  mask=0x3fu;
787  if(r0[0]-tr>-1) bnd[0]=-tr, mask^=1u; else bnd[0]=-1-r0[0];
788  if(r0[0]+tr< 1) bnd[1]= tr, mask^=2u; else bnd[1]= 1-r0[0];
789  if(r0[1]-tr>-1) bnd[2]=-tr, mask^=1u<<2; else bnd[2]=-1-r0[1];
790  if(r0[1]+tr< 1) bnd[3]= tr, mask^=2u<<2; else bnd[3]= 1-r0[1];
791 
792 #ifdef DIAGNOSTICS_1
793  printf(" bounds = ([%.15g,%.15g],[%.15g,%.15g])\n",
794  r0[0]+bnd[0],r0[0]+bnd[1],r0[1]+bnd[2],r0[1]+bnd[3]);
795 #endif
796 
797  if(A[0]+A[2]<=0 || A[0]*A[2]<=A[1]*A[1]) goto newton_face_constrained;
798  lin_solve_sym_2(dr, A,y);
799 
800 #ifdef DIAGNOSTICS_1
801  printf(" min at r = (%.15g,%.15g)\n", r0[0]+dr[0],r0[1]+dr[1]);
802 #endif
803 
804  #define EVAL(r,s) -(y[0]*r+y[1]*s)+(r*A[0]*r+(2*r*A[1]+s*A[2])*s)/2
805  if( (dr[0]-bnd[0])*(bnd[1]-dr[0])>=0
806  && (dr[1]-bnd[2])*(bnd[3]-dr[1])>=0) {
807  r[0] = r0[0]+dr[0], r[1] = r0[1]+dr[1];
808  v = EVAL(dr[0],dr[1]);
809  goto newton_face_fin;
810  }
811 newton_face_constrained:
812  v = EVAL(bnd[0],bnd[2]); i=1u|(1u<<2);
813  tv = EVAL(bnd[1],bnd[2]); if(tv<v) v=tv, i=2u|(1u<<2);
814  tv = EVAL(bnd[0],bnd[3]); if(tv<v) v=tv, i=1u|(2u<<2);
815  tv = EVAL(bnd[1],bnd[3]); if(tv<v) v=tv, i=2u|(2u<<2);
816  if(A[0]>0) {
817  double drc;
818  drc = (y[0] - A[1]*bnd[2])/A[0];
819  if((drc-bnd[0])*(bnd[1]-drc)>=0 && (tv=EVAL(drc,bnd[2]))<v)
820  v=tv,i=1u<<2,dr[0]=drc;
821  drc = (y[0] - A[1]*bnd[3])/A[0];
822  if((drc-bnd[0])*(bnd[1]-drc)>=0 && (tv=EVAL(drc,bnd[3]))<v)
823  v=tv,i=2u<<2,dr[0]=drc;
824  }
825  if(A[2]>0) {
826  double drc;
827  drc = (y[1] - A[1]*bnd[0])/A[2];
828  if((drc-bnd[2])*(bnd[3]-drc)>=0 && (tv=EVAL(bnd[0],drc))<v)
829  v=tv,i=1u,dr[1]=drc;
830  drc = (y[1] - A[1]*bnd[1])/A[2];
831  if((drc-bnd[2])*(bnd[3]-drc)>=0 && (tv=EVAL(bnd[1],drc))<v)
832  v=tv,i=2u,dr[1]=drc;
833  }
834  #undef EVAL
835 
836  #define SETR(d,d3) do { \
837  const unsigned f = i>>(2*d) & 3u; \
838  if(f==0) r[d]=r0[d]+dr[d]; \
839  else { \
840  if((f&(mask>>(2*d)))==0) r[d]=r0[d]+(f==1?-tr:tr); \
841  else r[d]=(f==1?-1:1), new_flags |= f<<(2*d3); \
842  } \
843  } while(0)
844  SETR(0,d1); SETR(1,d2);
845 #ifdef DIAGNOSTICS_1
846  printf(" constrained min at r = (%.15g,%.15g)\n", r[0],r[1]);
847 #endif
848 newton_face_fin:
849  out->dist2p = -2*v;
850  dr[0]=r[0]-p->r[d1];
851  dr[1]=r[1]-p->r[d2];
852  if(fabs(dr[0])+fabs(dr[1]) < tol) new_flags |= CONVERGED_FLAG;
853  out->r[dn]=p->r[dn], out->r[d1]=r[0],out->r[d2]=r[1];
854  out->flags = new_flags | (p->flags<<7);
855 }
856 
857 static void newton_edge(struct findpts_el_pt_3 *const out,
858  const double jac[9], const double rhes, const double resid[3],
859  const unsigned de, const unsigned dn1, const unsigned dn2,
860  unsigned flags,
861  const struct findpts_el_pt_3 *const p, const double tol)
862 {
863  const double tr = p->tr;
864  /* A = J^T J - resid_d H_d */
865  const double A = jac[ de]*jac[ de]
866  +jac[3+de]*jac[3+de]
867  +jac[6+de]*jac[6+de] - rhes;
868  /* y = J^T r */
869  const double y = jac[ de]*resid[0]
870  +jac[3+de]*resid[1]
871  +jac[6+de]*resid[2];
872 
873  const double oldr = p->r[de];
874  double dr,nr,tdr,tnr;
875  double v,tv; unsigned new_flags=0, tnew_flags=0;
876 
877 #ifdef DIAGNOSTICS_1
878  printf("Newton edge %u (dn1=%u,dn2=%u) flags=%x\n",de,dn1,dn2,flags);
879  printf(" A=%g, y=%g\n",A,y);
880  if(A<=0) printf(" A not positive\n");
881  printf(" r=(%g,%g,%g)\n",p->r[0],p->r[1],p->r[2]);
882 #endif
883 
884  #define EVAL(dr) (dr*A-2*y)*dr
885 
886  /* if A is not SPD, quadratic model has no minimum */
887  if(A>0) {
888  dr = y/A, nr = oldr+dr;
889  if(fabs(dr)<tr && fabs(nr)<1) { v=EVAL(dr); goto newton_edge_fin; }
890  }
891 
892  if(( nr=oldr-tr)>-1) dr=-tr;
893  else nr=-1, dr=-1-oldr, new_flags = flags | 1u<<(2*de);
894  v =EVAL( dr);
895 
896  if((tnr=oldr+tr)< 1) tdr=tr;
897  else tnr= 1, tdr= 1-oldr, tnew_flags = flags | 2u<<(2*de);
898  tv=EVAL(tdr);
899 
900  if(tv<v) nr=tnr, dr=tdr, v=tv, new_flags=tnew_flags;
901 
902 newton_edge_fin:
903  /* check convergence */
904  if(fabs(dr) < tol) new_flags |= CONVERGED_FLAG;
905  out->r[de]=nr;
906  out->r[dn1]=p->r[dn1];
907  out->r[dn2]=p->r[dn2];
908  out->dist2p = -v;
909  out->flags = flags | new_flags | (p->flags<<7);
910 #ifdef DIAGNOSTICS_1
911  printf(" new r = (%g,%g,%g)\n",out->r[0],out->r[1],out->r[2]);
912 #endif
913 }
914 
915 typedef void findpt_fun(
916  struct findpts_el_pt_3 *const out,
917  struct findpts_el_data_3 *const fd,
918  const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol);
919 
920 /* work[(3+9+2*(nr+ns+nt+nrs))*pn + max(2*nr,ns) ] */
921 static void findpt_vol(
922  struct findpts_el_pt_3 *const out,
923  struct findpts_el_data_3 *const fd,
924  const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
925 {
926  const unsigned nr=fd->n[0],ns=fd->n[1],nt=fd->n[2],
927  nrs=nr*ns;
928  double *const resid = fd->work, *const jac = resid + 3*pn,
929  *const wtrs = jac+9*pn, *const wtt = wtrs+2*(nr+ns)*pn,
930  *const slice = wtt+2*nt*pn, *const temp = slice + 2*pn*nrs;
931  unsigned i; unsigned d;
932  /* evaluate x(r) and jacobian */
933  for(i=0;i<pn;++i)
934  fd->lag[0](wtrs+2*i*(nr+ns) , fd->lag_data[0], nr, 1, p[i].r[0]);
935  for(i=0;i<pn;++i)
936  fd->lag[1](wtrs+2*i*(nr+ns)+2*nr, fd->lag_data[1], ns, 1, p[i].r[1]);
937  for(i=0;i<pn;++i)
938  fd->lag[2](wtt+2*i*nt , fd->lag_data[2], nt, 1, p[i].r[2]);
939  for(d=0;d<3;++d) {
940  tensor_mxm(slice,nrs, fd->x[d],nt, wtt,2*pn);
941  for(i=0;i<pn;++i) {
942  const double *const wtrs_i = wtrs+2*i*(nr+ns),
943  *const slice_i = slice+2*i*nrs;
944  double *const jac_i = jac+9*i+3*d;
945  resid[3*i+d] = p[i].x[d] - tensor_ig2(jac_i,
946  wtrs_i,nr, wtrs_i+2*nr,ns, slice_i, temp);
947  jac_i[2] = tensor_i2(wtrs_i,nr, wtrs_i+2*nr,ns, slice_i+nrs, temp);
948  }
949  }
950  /* perform Newton step */
951  for(i=0;i<pn;++i) {
952  if(reject_prior_step_q(out+i,resid+3*i,p+i,tol)) continue;
953  else newton_vol(out+i, jac+9*i, resid+3*i, p+i, tol);
954  }
955 }
956 
957 /* work[(3+9+3+3*(n1+n2+n1))*pn ] */
958 static void findpt_face(
959  struct findpts_el_pt_3 *const out,
960  struct findpts_el_data_3 *const fd,
961  const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
962 {
963  const unsigned pflag = p->flags & FLAG_MASK;
964  const unsigned fi = face_index(pflag);
965  const unsigned dn = fi>>1, d1 = plus_1_mod_3(dn), d2 = plus_2_mod_3(dn);
966  const unsigned n1 = fd->n[d1], n2 = fd->n[d2];
967  double *const resid=fd->work, *const jac=resid+3*pn, *const hes=jac+9*pn,
968  *const wt1 = hes+3*pn, *const wt2 = wt1+3*n1*pn,
969  *const slice = wt2+3*n2*pn;
970  const struct findpts_el_gface_3 *const face = get_face(fd,fi);
971  unsigned i; unsigned d;
972 
973 #ifdef DIAGNOSTICS_1
974  printf("Face %u\n",fi);
975  printf(" pflag = %u\n",pflag);
976  printf(" fi = %u\n",fi);
977  printf(" dn, d1, d2 = %u, %u, %u\n",dn,d1,d2);
978  printf(" n1, n2 = %u, %u \n", n1,n2);
979 #endif
980 
981  /* evaluate x(r), jacobian, hessian */
982  for(i=0;i<pn;++i)
983  fd->lag[d1](wt1+3*i*n1, fd->lag_data[d1], n1, 2, p[i].r[d1]);
984  for(i=0;i<pn;++i)
985  fd->lag[d2](wt2+3*i*n2, fd->lag_data[d2], n2, 2, p[i].r[d2]);
986  for(i=0;i<3*pn;++i) hes[i]=0;
987  for(d=0;d<3;++d) {
988  tensor_mxm(slice,n1, face->x[d],n2, wt2,3*pn);
989  for(i=0;i<pn;++i) {
990  const double *const wt1_i = wt1+3*i*n1, *const slice_i = slice+3*i*n1;
991  double v[9], r;
992  tensor_mtxm(v,3, wt1_i,n1, slice_i,3);
993  /* v[3*j + i] = d^i/dr1^i d^j/dr2^j x_d */
994  resid[3*i+d] = r = p[i].x[d] - v[0];
995  jac[9*i+3*d+d1] = v[1];
996  jac[9*i+3*d+d2] = v[3];
997  hes[3*i ] += r * v[2];
998  hes[3*i+1] += r * v[4];
999  hes[3*i+2] += r * v[6];
1000  }
1001  }
1002  for(i=1;i<pn;++i) memcpy(wt2+i*n2, wt2+3*i*n2, n2*sizeof(double));
1003  for(d=0;d<3;++d) {
1004  tensor_mxm(slice,n1, face->dxdn[d],n2, wt2,pn);
1005  for(i=0;i<pn;++i)
1006  jac[9*i+3*d+dn] = tensor_dot(wt1+3*i*n1, slice+i*n1, n1);
1007  }
1008  /* perform Newton step */
1009  for(i=0;i<pn;++i) {
1010  double *const resid_i=resid+3*i, *const jac_i=jac+9*i, *const hes_i=hes+3*i;
1011  /* check prior step */
1012  if(!reject_prior_step_q(out+i,resid_i,p+i,tol)) {
1013  /* check constraint */
1014  const double steep = resid_i[0] * jac_i[ dn]
1015  +resid_i[1] * jac_i[3+dn]
1016  +resid_i[2] * jac_i[6+dn];
1017 #ifdef DIAGNOSTICS_1
1018  printf("jacobian = %g\t%g\t%g\n"
1019  " %g\t%g\t%g\n"
1020  " %g\t%g\t%g\n",jac_i[0],jac_i[1],jac_i[2],
1021  jac_i[3],jac_i[4],jac_i[5],jac_i[6],jac_i[7],jac_i[8]);
1022  printf("resid_i = (%g,%g,%g)\n", resid_i[0],resid_i[1],resid_i[2]);
1023  printf("steep = %g (%s)\n", steep, steep * p[i].r[dn] < 0 ? "in" : "out");
1024 #endif
1025  if(steep * p[i].r[dn] < 0) /* relax constraint */
1026  newton_vol(out+i, jac_i, resid_i, p+i, tol);
1027  else
1028  newton_face(out+i, jac_i, hes_i, resid_i, d1,d2,dn,pflag, p+i, tol);
1029  }
1030  }
1031 }
1032 
1033 /* work[ 3*n ] */
1034 static void findpt_edge(
1035  struct findpts_el_pt_3 *const out,
1036  struct findpts_el_data_3 *const fd,
1037  const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
1038 {
1039  const unsigned pflag = p->flags & FLAG_MASK;
1040  const unsigned ei = edge_index(pflag);
1041  const unsigned de = ei>>2, dn1 = plus_1_mod_3(de), dn2 = plus_2_mod_3(de);
1042  const unsigned n = fd->n[de];
1043  double *wt = fd->work;
1044  const struct findpts_el_gedge_3 *edge = get_edge(fd,ei);
1045  unsigned i; unsigned d;
1046 
1047 #ifdef DIAGNOSTICS_1
1048  printf("Edge %u\n",ei);
1049  printf(" pflag = %u\n",pflag);
1050  printf(" ei = %u\n",ei);
1051  printf(" de, dn1, dn2 = %u, %u, %u\n",de,dn1,dn2);
1052  printf(" n = %u \n", n);
1053 #endif
1054 
1055  for(i=0;i<pn;++i) {
1056  double dxi[3], resid[3], jac[9];
1057  double hes[5] = {0,0,0,0,0};
1058  /* evaluate x(r), jacobian, hessian */
1059  fd->lag[de](wt, fd->lag_data[de], n, 2, p[i].r[de]);
1060  for(d=0;d<3;++d) {
1061  double r;
1062  tensor_mtxv(dxi,3, wt, edge->x[d],n);
1063  resid[d] = r = p[i].x[d] - dxi[0];
1064  jac[3*d+de] = dxi[1];
1065  hes[0] += r * dxi[2];
1066  tensor_mtxv(dxi,2, wt, edge->dxdn1[d],n);
1067  jac[3*d+dn1] = dxi[0];
1068  hes[1] += r * dxi[1];
1069  tensor_mtxv(dxi,2, wt, edge->dxdn2[d],n);
1070  jac[3*d+dn2] = dxi[0];
1071  hes[2] += r * dxi[1];
1072  hes[3] += r * tensor_dot(wt, edge->d2xdn1[d], n);
1073  hes[4] += r * tensor_dot(wt, edge->d2xdn2[d], n);
1074  }
1075  /* check prior step */
1076  if(reject_prior_step_q(out+i,resid,p+i,tol)) continue;
1077  /* check constraint */
1078  {
1079  double steep[3], sr1, sr2;
1080  steep[0] = jac[0]*resid[0] + jac[3]*resid[1] + jac[6]*resid[2],
1081  steep[1] = jac[1]*resid[0] + jac[4]*resid[1] + jac[7]*resid[2],
1082  steep[2] = jac[2]*resid[0] + jac[5]*resid[1] + jac[8]*resid[2];
1083  sr1 = steep[dn1]*p[i].r[dn1],
1084  sr2 = steep[dn2]*p[i].r[dn2];
1085 #ifdef DIAGNOSTICS_1
1086  printf("jacobian = %g\t%g\t%g\n"
1087  " %g\t%g\t%g\n"
1088  " %g\t%g\t%g\n",jac[0],jac[1],jac[2],
1089  jac[3],jac[4],jac[5],jac[6],jac[7],jac[8]);
1090  printf("hessian = %g\t%g\t%g\n"
1091  " \t%g \n"
1092  " \t \t%g\n", hes[0],hes[1],hes[2],hes[3],hes[4]);
1093  printf("resid = (%g,%g,%g)\n", resid[0],resid[1],resid[2]);
1094  printf("steep1 = %g (%s)\n", steep[dn1], sr1 < 0 ? "in" : "out");
1095  printf("steep2 = %g (%s)\n", steep[dn2], sr2 < 0 ? "in" : "out");
1096 #endif
1097  if(sr1<0) {
1098  if(sr2<0)
1099  newton_vol(out+i, jac,resid, p+i, tol);
1100  else {
1101  double rh[3]; rh[0]=hes[0], rh[1]=hes[1], rh[2]=hes[3];
1102  newton_face(out+i, jac,rh,resid, de,dn1,dn2,
1103  pflag & (3u<<(dn2*2)), p+i, tol);
1104  }
1105  } else if(sr2<0) {
1106  double rh[3]; rh[0]=hes[4], rh[1]=hes[2], rh[2]=hes[0];
1107  newton_face(out+i, jac,rh,resid, dn2,de,dn1,
1108  pflag & (3u<<(dn1*2)), p+i, tol);
1109  } else
1110  newton_edge(out+i, jac,hes[0],resid, de,dn1,dn2, pflag, p+i, tol);
1111  }
1112  }
1113 }
1114 
1115 static void findpt_pt(
1116  struct findpts_el_pt_3 *const out,
1117  struct findpts_el_data_3 *const fd,
1118  const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
1119 {
1120  const unsigned pflag = p->flags & FLAG_MASK;
1121  const unsigned pi = point_index(pflag);
1122  const struct findpts_el_gpt_3 *gpt = get_pt(fd,pi);
1123  const double *const x = gpt->x, *const jac = gpt->jac, *const hes = gpt->hes;
1124  unsigned i;
1125 
1126 #ifdef DIAGNOSTICS_1
1127  printf("Point %u\n",pi);
1128  printf(" pflag = %u\n",pflag);
1129  printf(" pi = %u\n",pi);
1130 #endif
1131 
1132  for(i=0;i<pn;++i) {
1133  unsigned d1,d2,dn, de,dn1,dn2, hi0,hi1,hi2;
1134  double resid[3], steep[3], sr[3];
1135  resid[0] = p[i].x[0]-x[0],
1136  resid[1] = p[i].x[1]-x[1],
1137  resid[2] = p[i].x[2]-x[2];
1138  steep[0] = jac[0]*resid[0] + jac[3]*resid[1] + jac[6]*resid[2],
1139  steep[1] = jac[1]*resid[0] + jac[4]*resid[1] + jac[7]*resid[2],
1140  steep[2] = jac[2]*resid[0] + jac[5]*resid[1] + jac[8]*resid[2];
1141  sr[0] = steep[0]*p[i].r[0],
1142  sr[1] = steep[1]*p[i].r[1],
1143  sr[2] = steep[2]*p[i].r[2];
1144  /* check prior step */
1145  if(reject_prior_step_q(out+i,resid,p+i,tol)) continue;
1146  /* check constraints */
1147  if(sr[0]<0) {
1148  if(sr[1]<0) {
1149  if(sr[2]<0) goto findpt_pt_vol;
1150  else { d1=0,d2=1,dn=2, hi0=0,hi1=1,hi2=3; goto findpt_pt_face; }
1151  }
1152  else if(sr[2]<0) {d1=2,d2=0,dn=1, hi0=5,hi1=2,hi2=0; goto findpt_pt_face;}
1153  else { de=0,dn1=1,dn2=2, hi0=0; goto findpt_pt_edge; }
1154  }
1155  else if(sr[1]<0) {
1156  if(sr[2]<0) { d1=1,d2=2,dn=0, hi0=3,hi1=4,hi2=5; goto findpt_pt_face; }
1157  else { de=1,dn1=2,dn2=0, hi0=3; goto findpt_pt_edge; }
1158  }
1159  else if(sr[2]<0) { de=2,dn1=0,dn2=1, hi0=5; goto findpt_pt_edge; }
1160  out[i].r[0]=p[i].r[0],out[i].r[1]=p[i].r[1],out[i].r[2]=p[i].r[2];
1161  out[i].dist2p=0;
1162  out[i].flags = pflag | CONVERGED_FLAG;
1163  continue;
1164  findpt_pt_vol:
1165  newton_vol(out+i, jac,resid, p+i, tol);
1166  continue;
1167  findpt_pt_face: {
1168  double rh[3];
1169  rh[0] = resid[0]*hes[hi0]+resid[1]*hes[6+hi0]+resid[2]*hes[12+hi0],
1170  rh[1] = resid[0]*hes[hi1]+resid[1]*hes[6+hi1]+resid[2]*hes[12+hi1],
1171  rh[2] = resid[0]*hes[hi2]+resid[1]*hes[6+hi2]+resid[2]*hes[12+hi2];
1172  newton_face(out+i, jac,rh,resid, d1,d2,dn,
1173  pflag&(3u<<(2*dn)), p+i, tol);
1174  } continue;
1175  findpt_pt_edge: {
1176  const double rh =
1177  resid[0]*hes[hi0]+resid[1]*hes[6+hi0]+resid[2]*hes[12+hi0];
1178  newton_edge(out+i, jac,rh,resid, de,dn1,dn2,
1179  pflag&~(3u<<(2*de)), p+i, tol);
1180  } continue;
1181  }
1182 }
1183 
1184 static void seed(struct findpts_el_data_3 *const fd,
1185  struct findpts_el_pt_3 *const pt, const unsigned npt)
1186 {
1187  struct findpts_el_pt_3 *p, *const pe = pt+npt;
1188  const unsigned nr=fd->n[0], ns=fd->n[1], nt=fd->n[2];
1189  unsigned i,j,k, ii=0;
1190  for(p=pt;p!=pe;++p) p->dist2=DBL_MAX;
1191  for(k=0;k<nt;++k) {
1192  const double zt=fd->z[2][k];
1193  for(j=0;j<ns;++j) {
1194  const double zs=fd->z[1][j];
1195  for(i=0;i<nr;++i) {
1196  const double zr=fd->z[0][i];
1197  const double x=fd->x[0][ii], y=fd->x[1][ii], z=fd->x[2][ii];
1198  ++ii;
1199  for(p=pt;p!=pe;++p) {
1200  const double dx=p->x[0]-x,dy=p->x[1]-y,dz=p->x[2]-z;
1201  const double dist2 = dx*dx+dy*dy+dz*dz;
1202  if(p->dist2<=dist2) continue;
1203  p->dist2=dist2;
1204  p->r[0]=zr, p->r[1]=zs, p->r[2]=zt;
1205  }
1206  }
1207  }
1208  }
1209 }
1210 
1211 void findpts_el_3(struct findpts_el_data_3 *const fd, const unsigned npt,
1212  const double tol)
1213 {
1214  findpt_fun *const fun[4] =
1216  struct findpts_el_pt_3 *const pbuf = fd->p, *const pstart = fd->p + npt;
1217  unsigned nconv = npt;
1218  unsigned step = 0;
1219  unsigned count[27] = { 0,0,0, 0,0,0, 0,0,0,
1220  0,0,0, 0,0,0, 0,0,0,
1221  0,0,0, 0,0,0, 0,0,0 } ;
1222  count[0] = npt;
1223  seed(fd,pbuf,npt);
1224  { unsigned i;
1225  for(i=0;i<npt;++i) {
1226  pstart[i].x[0]=pbuf[i].x[0];
1227  pstart[i].x[1]=pbuf[i].x[1];
1228  pstart[i].x[2]=pbuf[i].x[2];
1229  pstart[i].r[0]=pbuf[i].r[0];
1230  pstart[i].r[1]=pbuf[i].r[1];
1231  pstart[i].r[2]=pbuf[i].r[2];
1232  pstart[i].index=i,pstart[i].flags=0;
1233  pstart[i].dist2=DBL_MAX,pstart[i].dist2p=0,pstart[i].tr=1;
1234  }
1235  }
1236  while(nconv && step++ < 50) {
1237  /* advance each group of points */
1238  struct findpts_el_pt_3 *p, *const pe=pstart+nconv, *pout; unsigned pn;
1239 
1240 #if DIAGNOSTICS_ITERATIONS>1
1241  { unsigned i;
1242  printf("findpts_el_3 Newton step (%u), %u unconverged:\n ", step,nconv);
1243  for(i=0;i<27;++i) printf(" %u",count[i]);
1244  printf("\n");
1245  }
1246 #endif
1247 #ifdef DIAGNOSTICS_3
1248  if(step==50) {
1249  unsigned d, i, n=fd->n[0]*fd->n[1]*fd->n[2];
1250  printf("geometry:\n{\n");
1251  for(d=0;d<3;++d) {
1252  printf(" {\n");
1253  for(i=0;i<n;++i)
1254  printf(" %.15g%s\n",fd->x[d][i],i==n-1?"":",");
1255  printf(" }%s\n",d==3-1?"":",");
1256  }
1257  printf("}\n");
1258  }
1259 #endif
1260 
1261  for(p=pstart,pout=pbuf; p!=pe; p+=pn,pout+=pn) {
1262  const unsigned pflags = p->flags & FLAG_MASK;
1263  pn = count[pt_flags_to_bin_noC(pflags)];
1264  fun[num_constrained(pflags)](pout, fd, p,pn, tol);
1265  }
1266  /* group points by contsraints */
1267  {
1268  unsigned offset[28] = { 0,0,0, 0,0,0, 0,0,0,
1269  0,0,0, 0,0,0, 0,0,0,
1270  0,0,0, 0,0,0, 0,0,0, 0 };
1271  struct findpts_el_pt_3 *const pe = pbuf+nconv;
1272  for(pout=pbuf; pout!=pe; ++pout)
1273  ++offset[pt_flags_to_bin(pout->flags & FLAG_MASK)];
1274  {
1275  unsigned i; unsigned sum=0;
1276  for(i=0;i<27;++i) {
1277  unsigned ci=offset[i]; count[i]=ci, offset[i]=sum, sum+=ci;
1278  }
1279  nconv = offset[27] = sum; /* last bin is converged; forget it */
1280  }
1281  for(pout=pbuf; pout!=pe; ++pout)
1282  pstart[offset[pt_flags_to_bin(pout->flags & FLAG_MASK)]++] = *pout;
1283  }
1284  }
1285  { struct findpts_el_pt_3 *p, *const pe=pstart+npt;
1286  for(p=pstart;p!=pe;++p)
1287  pbuf[p->index]=*p, pbuf[p->index].flags&=FLAG_MASK;
1288  }
1289 #if DIAGNOSTICS_ITERATIONS
1290  printf("findpts_el_3 took %u steps\n ", step);
1291 #endif
1292 }
1293 
1295  double *const out_base, const unsigned out_stride,
1296  const double *const r_base, const unsigned r_stride, const unsigned pn,
1297  const double *const in, struct findpts_el_data_3 *const fd)
1298 {
1299  const unsigned nr=fd->n[0],ns=fd->n[1],nt=fd->n[2],
1300  nrs=nr*ns;
1301  double *const wtrs = fd->work, *const wtt = wtrs+(nr+ns)*pn,
1302  *const slice = wtt+nt*pn, *const temp = slice + pn*nrs;
1303  unsigned i; const double *r; double *out;
1304  for(i=0,r=r_base;i<pn;++i) {
1305  fd->lag[0](wtrs+i*(nr+ns) , fd->lag_data[0], nr, 0, r[0]);
1306  fd->lag[1](wtrs+i*(nr+ns)+nr, fd->lag_data[1], ns, 0, r[1]);
1307  fd->lag[2](wtt +i*nt , fd->lag_data[2], nt, 0, r[2]);
1308  r = (const double*)((const char*)r + r_stride);
1309  }
1310 
1311  tensor_mxm(slice,nrs, in,nt, wtt,pn);
1312  for(i=0,out=out_base;i<pn;++i) {
1313  const double *const wtrs_i = wtrs+i*(nr+ns), *const slice_i = slice+i*nrs;
1314  *out = tensor_i2(wtrs_i,nr, wtrs_i+nr,ns, slice_i, temp);
1315  out = (double*)((char*)out + out_stride);
1316  }
1317 }
1318 
static double tensor_ig2(double g[2], const double *wtr, uint nr, const double *wts, uint ns, const double *u, double *work)
Definition: tensor.h:133
void findpt_fun(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
Definition: findpts_el_3.c:915
double oldr[3]
Definition: findpts_el.h:69
double r[3]
Definition: findpts_el.h:69
static double sum(struct xxt *data, double v, uint n, uint tag)
Definition: xxt.c:400
static unsigned pt_flags_to_bin(const unsigned flags)
Definition: findpts_el_3.c:88
static double tensor_i2(const double *Jr, uint nr, const double *Js, uint ns, const double *u, double *work)
Definition: tensor.h:87
#define tmalloc(type, count)
Definition: mem.h:91
static double zt[NT]
lagrange_fun * lag[3]
Definition: findpts_el.h:84
#define D2XDN1(i, d)
void lagrange_fun(double *restrict p, double *restrict data, unsigned n, int d, double x)
Definition: poly.c:20
static unsigned pt_flags_to_bin_noC(const unsigned flags)
Definition: findpts_el_3.c:81
#define findpts_el_free_3
Definition: findpts_el_3.c:15
#define CONVERGED_FLAG
Definition: findpts_el_3.c:72
const double * x[3]
Definition: findpts_el.h:74
static void lin_solve_3(double x[3], const double A[9], const double y[3])
Definition: findpts_el_3.c:30
static void findpt_vol(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
Definition: findpts_el_3.c:921
static void compute_face_data_tr(struct findpts_el_data_3 *fd)
Definition: findpts_el_3.c:327
static void tensor_mtxv(double *y, uint ny, const double *A, const double *x, uint nx)
Definition: tensor.h:63
double x[3]
Definition: findpts_el.h:76
n
Definition: xxt_test.m:73
#define DXDN1(i, d)
#define SETR(d)
#define DO_MAX(x)
static double zr[NR]
#define CHECK_CONSTRAINT(drcd, d3)
static unsigned edge_index(const unsigned x)
Definition: findpts_el_3.c:111
#define findpts_el_3
Definition: findpts_el_3.c:16
static void tensor_mxm(double *C, uint nc, const double *A, uint na, const double *B, uint nb)
Definition: tensor.h:53
#define x
ulong A[NUM][SI]
Definition: sort_test.c:17
static unsigned work_size(const unsigned nr, const unsigned ns, const unsigned nt, const unsigned npt_max)
Definition: findpts_el_3.c:179
static void compute_face_data_st(struct findpts_el_data_3 *fd)
Definition: findpts_el_3.c:310
double * wtend[3]
Definition: findpts_el.h:86
ns
Definition: xxt_test.m:43
#define findpts_el_setup_3
Definition: findpts_el_3.c:14
struct findpts_el_gpt_3 pt[8]
Definition: findpts_el.h:94
static void newton_face(struct findpts_el_pt_3 *const out, const double jac[9], const double rhes[3], const double resid[3], const unsigned d1, const unsigned d2, const unsigned dn, const unsigned flags, const struct findpts_el_pt_3 *const p, const double tol)
Definition: findpts_el_3.c:745
static void seed(struct findpts_el_data_3 *const fd, struct findpts_el_pt_3 *const pt, const unsigned npt)
double jac[9]
Definition: findpts_el.h:76
#define SET_EDGE(j, rd, rn, base)
const double * dxdn[3]
Definition: findpts_el.h:73
static void findpt_face(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
Definition: findpts_el_3.c:958
static void compute_pt_data(struct findpts_el_data_3 *fd)
Definition: findpts_el_3.c:414
#define tensor_dot
Definition: tensor.c:7
unsigned npt_max
Definition: findpts_el.h:79
const double * dxdn2[3]
Definition: findpts_el.h:74
#define gll_lag_setup
Definition: poly.c:18
struct findpts_el_gface_3 face[6]
Definition: findpts_el.h:92
static unsigned plus_2_mod_3(const unsigned x)
Definition: findpts_el_3.c:96
static void findpt_edge(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
unsigned n[3]
Definition: findpts_el.h:82
#define FLAG_MASK
Definition: findpts_el_3.c:73
p
Definition: xxt_test2.m:1
#define gll_lag_size
Definition: poly.c:17
#define SETDR(d)
static void compute_edge_data(struct findpts_el_data_3 *fd, unsigned d)
Definition: findpts_el_3.c:364
static void lin_solve_sym_2(double x[2], const double A[3], const double y[2])
Definition: findpts_el_3.c:51
#define EVAL(r, s)
static unsigned point_index(const unsigned x)
Definition: findpts_el_3.c:123
const double * x[3]
Definition: findpts_el.h:88
static unsigned plus_1_mod_3(const unsigned x)
Definition: findpts_el_3.c:95
#define SET_FACE(i, base, n)
#define D2XDN2(i, d)
static double zs[NS]
static void newton_edge(struct findpts_el_pt_3 *const out, const double jac[9], const double rhes, const double resid[3], const unsigned de, const unsigned dn1, const unsigned dn2, unsigned flags, const struct findpts_el_pt_3 *const p, const double tol)
Definition: findpts_el_3.c:857
double * lag_data[3]
Definition: findpts_el.h:85
#define tensor_mtxm
Definition: tensor.c:8
const double * d2xdn2[3]
Definition: findpts_el.h:74
static unsigned which_bit(const unsigned x)
Definition: findpts_el_3.c:103
for i
Definition: xxt_test.m:74
unsigned flags
Definition: findpts_el.h:70
double * z[3]
Definition: findpts_el.h:83
static const struct findpts_el_gpt_3 * get_pt(struct findpts_el_data_3 *fd, unsigned pi)
Definition: findpts_el_3.c:459
const double * dxdn1[3]
Definition: findpts_el.h:74
static unsigned num_constrained(const unsigned flags)
Definition: findpts_el_3.c:75
static void compute_face_data_rs(struct findpts_el_data_3 *fd)
Definition: findpts_el_3.c:292
static const unsigned nr[3]
static void findpt_pt(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
double * sides
Definition: findpts_el.h:91
void compute_face_data_fun(struct findpts_el_data_3 *fd)
Definition: findpts_el_3.c:289
struct findpts_el_pt_3 * p
Definition: findpts_el.h:80
double hes[18]
Definition: findpts_el.h:76
ulong out[N]
Definition: sort_test2.c:20
static const struct findpts_el_gface_3 * get_face(struct findpts_el_data_3 *fd, unsigned fi)
Definition: findpts_el_3.c:347
double x[3]
Definition: findpts_el.h:69
static double work[TNR *NS]
unsigned side_init
Definition: findpts_el.h:90
se
Definition: xxt_test.m:77
#define lobatto_nodes
Definition: poly.c:15
establishes some macros to establish naming conventions
unsigned index
Definition: findpts_el.h:70
#define findpts_el_eval_3
Definition: findpts_el_3.c:17
const double * x[3]
Definition: findpts_el.h:73
static double y[NR *NS *NT *N]
Definition: obbox_test.c:31
const double * d2xdn1[3]
Definition: findpts_el.h:74
static void newton_vol(struct findpts_el_pt_3 *const out, const double jac[9], const double resid[3], const struct findpts_el_pt_3 *const p, const double tol)
Definition: findpts_el_3.c:534
struct findpts_el_gedge_3 edge[12]
Definition: findpts_el.h:93
static int reject_prior_step_q(struct findpts_el_pt_3 *const out, const double resid[3], const struct findpts_el_pt_3 *const p, const double tol)
Definition: findpts_el_3.c:472
static unsigned face_index(const unsigned x)
Definition: findpts_el_3.c:109
static const struct findpts_el_gedge_3 * get_edge(struct findpts_el_data_3 *fd, unsigned ei)
Definition: findpts_el_3.c:404
static double z[NR *NS *NT *N]
Definition: obbox_test.c:31