14 #define findpts_el_setup_3 PREFIXED_NAME(findpts_el_setup_3)
15 #define findpts_el_free_3 PREFIXED_NAME(findpts_el_free_3 )
16 #define findpts_el_3 PREFIXED_NAME(findpts_el_3 )
17 #define findpts_el_eval_3 PREFIXED_NAME(findpts_el_eval_3 )
22 #define DIAGNOSTICS_ITERATIONS 0
24 #if defined(DIAGNOSTICS_1) || defined(DIAGNOSTICS_2) \
25 || DIAGNOSTICS_ITERATIONS > 0
32 const double a = A[4]*A[8]-A[5]*A[7],
33 b = A[5]*A[6]-A[3]*A[8],
34 c = A[3]*A[7]-A[4]*A[6],
35 idet = 1/(A[0]*a+A[1]*b+A[2]*c);
38 inv1 = A[2]*A[7]-A[1]*A[8],
39 inv2 = A[1]*A[5]-A[2]*A[4],
41 inv4 = A[0]*A[8]-A[2]*A[6],
42 inv5 = A[2]*A[3]-A[0]*A[5],
44 inv7 = A[1]*A[6]-A[0]*A[7],
45 inv8 = A[0]*A[4]-A[1]*A[3];
46 x[0] = idet*(inv0*y[0] + inv1*y[1] + inv2*y[2]);
47 x[1] = idet*(inv3*y[0] + inv4*y[1] + inv5*y[2]);
48 x[2] = idet*(inv6*y[0] + inv7*y[1] + inv8*y[2]);
53 const double idet = 1/(A[0]*A[2] - A[1]*A[1]);
54 x[0] = idet*(A[2]*y[0] - A[1]*y[1]);
55 x[1] = idet*(A[0]*y[1] - A[1]*y[0]);
72 #define CONVERGED_FLAG (1u<<6)
73 #define FLAG_MASK 0x7fu
77 const unsigned y = flags | flags>>1;
78 return (y&1u) + (y>>2 & 1u) + (y>>4 & 1u);
83 return ((flags>>4 & 3u)*3 + (flags>>2 & 3u))*3 + (flags & 3u);
90 const unsigned mask = 0u - (flags>>6);
95 static unsigned plus_1_mod_3(
const unsigned x) {
return ((x | x>>1)+1) & 3u; }
98 const unsigned y = (x-1) & 3u;
105 const unsigned y = x&7u;
106 return (y-(y>>2)) | ((x-1)&4u) | (x>>4);
113 const unsigned y = ~((x>>1) | x);
114 const unsigned RTSR = ((x>>1)&1u) | ((x>>2)&2u) | ((x>>3)&4u) | ((x<<2)&8u);
115 const unsigned re = RTSR>>1;
116 const unsigned se = 4u | RTSR>>2;
117 const unsigned te = 8u | (RTSR&3u);
118 return ( (0u - ( y &1u)) & re )
119 | ( (0u - ((y>>2)&1u)) & se )
120 | ( (0u - ((y>>4)&1u)) & te );
125 return ((x>>1)&1u) | ((x>>2)&2u) | ((x>>3)&4u);
180 const unsigned nr,
const unsigned ns,
const unsigned nt,
181 const unsigned npt_max)
183 unsigned n1, n2, wsize;
185 if(nr>nt) n1=
nr, n2 = (ns>nt ? ns : nt);
188 if(ns>nt) n1=
ns, n2 = (nr>nt ? nr : nt);
191 #define DO_MAX(x) do { const unsigned temp=(x); \
192 wsize=temp>wsize?temp:wsize; } while(0)
193 wsize = (12 + 2*(nr+ns+nt+nr*
ns)) * npt_max + (2*nr>ns?2*nr:
ns);
197 DO_MAX(npt_max*(15+3*(2*n1+n2)));
204 const unsigned npt_max)
206 const unsigned nr=n[0],
ns=n[1], nt=n[2];
207 const unsigned nrs = nr*
ns, nst=ns*nt, ntr=nt*
nr;
208 const unsigned face_size = 12*nst + 12*ntr + 6*nrs;
209 const unsigned off_es = face_size + 36*
nr, off_et = off_es + 36*
ns,
210 tot = off_et + 36*nt;
211 unsigned d,
i, lag_size[3];
216 fd->
n[0]=
nr, fd->
n[1]=
ns, fd->
n[2]=nt;
219 fd->
z[0] =
tmalloc(
double,lag_size[0]+lag_size[1]+lag_size[2]
220 +7*(nr+ns+nt) + tot +
222 fd->
z[1] = fd->
z[0]+
nr;
223 fd->
z[2] = fd->
z[1]+
ns;
236 double *wt=fd->
wtend[d];
unsigned n=fd->
n[d];
242 wt[0]=1;
for(i=1;i<
n;++
i) wt[i]=0;
243 wt+=3*
n; {
for(i=0;i<n-1;++
i) wt[i]=0; } wt[
i]=1;
246 #define SET_FACE(i,base,n) do { for(d=0;d<3;++d) \
247 fd->face[2*i ].x[d] = fd->sides + base + d *n, \
248 fd->face[2*i ].dxdn[d] = fd->sides + base + (3+d)*n, \
249 fd->face[2*i+1].x[d] = fd->sides + base + (6+d)*n, \
250 fd->face[2*i+1].dxdn[d] = fd->sides + base + (9+d)*n; \
257 fd->
face[4].
x[d] = 0,
259 fd->
face[5].
x[d] = 0,
262 #define SET_EDGE1(j,k,d,rd,rn,base) \
264 fd->edge[4*j+2*i+0].dxdn2[d] = fd->face[2*k+i].dxdn[d], \
265 fd->edge[4*j+2*i+1].dxdn2[d] = fd->face[2*k+i].dxdn[d]+n##rd##rn-n##rd;
266 #define SET_EDGE2(j,d,rd,rn,base) \
268 fd->edge[4*j+i].dxdn1 [d] = fd->sides + base + (9*i +d)*n##rd, \
269 fd->edge[4*j+i].d2xdn1[d] = fd->sides + base + (9*i+3+d)*n##rd, \
270 fd->edge[4*j+i].d2xdn2[d] = fd->sides + base + (9*i+6+d)*n##rd;
271 #define SET_EDGE(j,rd,rn,base) do { \
272 for(d=0;d<3;++d) { SET_EDGE1(j,plus_2_mod_3(j),d,rd,rn,base); \
273 SET_EDGE2(j,d,rd,rn,base); } \
294 const unsigned nr = fd->
n[0],
ns=fd->
n[1], nt=fd->
n[2],
295 nrs = nr*
ns, nst=ns*nt, ntr = nt*
nr, nrstm1 = nrs*(nt-1);
298 memcpy(work , fd->
wtend[2]+ nt, nt*
sizeof(
double));
299 memcpy(work+nt, fd->
wtend[2]+4*nt, nt*
sizeof(
double));
302 memcpy(
out+ d *nrs, work+2*nt , nrs*
sizeof(
double));
303 memcpy(
out+(3+d)*nrs, work+2*nt+nrs , nrs*
sizeof(
double));
304 fd->
face[4].
x[d] = fd->
x[d];
305 fd->
face[5].
x[d] = fd->
x[d] + nrstm1;
312 const unsigned nr = fd->
n[0],
ns=fd->
n[1], nt=fd->
n[2], nst=
ns*nt;
315 memcpy(work , fd->
wtend[0] , 2*nr*
sizeof(
double));
316 memcpy(work+2*nr, fd->
wtend[0]+3*nr, 2*nr*
sizeof(
double));
319 memcpy(
out+ i *nst, work+4*nr , nst*
sizeof(
double));
320 memcpy(
out+(3+i)*nst, work+4*nr+ nst, nst*
sizeof(
double));
321 memcpy(
out+(6+i)*nst, work+4*nr+2*nst, nst*
sizeof(
double));
322 memcpy(
out+(9+i)*nst, work+4*nr+3*nst, nst*
sizeof(
double));
329 const unsigned nr = fd->
n[0],
ns=fd->
n[1], nt=fd->
n[2],
330 nrs = nr*
ns, nst=ns*nt, ntr=nt*
nr;
333 memcpy(work , fd->
wtend[1] , 2*ns*
sizeof(
double));
334 memcpy(work+2*ns, fd->
wtend[1]+3*ns, 2*ns*
sizeof(
double));
337 double *outk;
double *in = work+4*
ns;
339 for(outk=
out+ d *ntr+k,i=0;i<
nr;++
i,outk+=nt) *outk=*in++;
340 for(outk=
out+(3+d)*ntr+k,i=0;i<
nr;++
i,outk+=nt) *outk=*in++;
341 for(outk=
out+(6+d)*ntr+k,i=0;i<
nr;++
i,outk+=nt) *outk=*in++;
342 for(outk=
out+(9+d)*ntr+k,i=0;i<
nr;++
i,outk+=nt) *outk=*in++;
350 const unsigned mask = 1u<<(fi/2);
360 return &fd->
face[fi];
367 const unsigned n = fd->
n[d], n1 = fd->
n[dn1], n2 = fd->
n[dn2];
368 const unsigned nr=fd->
n[0],
ns=fd->
n[1],nt=fd->
n[2],
369 nrs=nr*
ns,nst=ns*nt,ntr=nt*
nr;
370 const unsigned base = 6*nrs + 12*nst + 12*ntr
371 + (d>0 ? 36*nr : 0) + (d>1 ? 36*ns : 0);
372 #define DXDN1(i,d) (fd->sides+base+(9*(i) +(d))*n)
373 #define D2XDN1(i,d) (fd->sides+base+(9*(i)+3+(d))*n)
374 #define D2XDN2(i,d) (fd->sides+base+(9*(i)+6+(d))*n)
380 for(xd=0;xd<3;++xd)
for(i=0;i<2;++
i)
381 e[2*i ].
x[xd] = face_d_n1[i].
x[xd],
382 e[2*i+1].
x[xd] = face_d_n1[i].
x[xd]+n*(n1-1);
383 memcpy(work , fd->
wtend[dn1]+ n1,2*n1*
sizeof(
double));
384 memcpy(work+2*n1, fd->
wtend[dn1]+4*n1,2*n1*
sizeof(
double));
385 for(i=0;i<2;++
i)
for(xd=0;xd<3;++xd) {
386 tensor_mxm(work+4*n1,n, face_d_n1[i].
x[xd],n1, work,4);
387 memcpy(
DXDN1(2*i+0,xd), work+4*n1 , n*
sizeof(
double));
388 memcpy(
D2XDN1(2*i+0,xd), work+4*n1+ n, n*
sizeof(
double));
389 memcpy(
DXDN1(2*i+1,xd), work+4*n1+2*n, n*
sizeof(
double));
390 memcpy(
D2XDN1(2*i+1,xd), work+4*n1+3*n, n*
sizeof(
double));
392 memcpy(work , fd->
wtend[dn2]+2*n2,n2*
sizeof(
double));
393 memcpy(work+n2, fd->
wtend[dn2]+5*n2,n2*
sizeof(
double));
394 for(i=0;i<2;++
i)
for(xd=0;xd<3;++xd) {
395 tensor_mtxm(work+2*n2,n, face_n2_d[i].
x[xd],n2, work,2);
396 memcpy(
D2XDN2( i,xd), work+2*n2 , n*
sizeof(
double));
397 memcpy(
D2XDN2(2+i,xd), work+2*n2+n, n*
sizeof(
double));
407 const unsigned mask = 8u<<(ei/4);
410 return &fd->
edge[ei];
416 const unsigned nr = fd->
n[0], nt = fd->
n[2];
420 for(i=0;i<4;++
i)
for(d=0;d<3;++d)
421 fd->
pt[2*i ].
x[d] = e[i].
x[d][0],
426 fd->
pt[2*i+1].
x[d] = e[i].
x[d][nr-1],
427 fd->
pt[2*i+1].
jac[3*d+1] = e[i].
dxdn1[d][nr-1],
428 fd->
pt[2*i+1].
jac[3*d+2] = e[i].
dxdn2[d][nr-1],
431 memcpy(work , fd->
wtend[0]+ nr, 2*nr*
sizeof(
double));
432 memcpy(work+2*nr, fd->
wtend[0]+4*nr, 2*nr*
sizeof(
double));
433 for(i=0;i<4;++
i)
for(d=0;d<3;++d) {
435 fd->
pt[2*
i ].
jac[3*d ] = work[4*
nr ];
436 fd->
pt[2*
i ].
hes[6*d ] = work[4*nr+1];
437 fd->
pt[2*i+1].
jac[3*d ] = work[4*nr+2];
438 fd->
pt[2*i+1].
hes[6*d ] = work[4*nr+3];
440 memcpy(work+nr,work+2*nr,nr*
sizeof(
double));
441 for(i=0;i<4;++
i)
for(d=0;d<3;++d) {
443 fd->
pt[2*
i ].
hes[6*d+1] = work[2*
nr ];
444 fd->
pt[2*i+1].
hes[6*d+1] = work[2*nr+1];
446 fd->
pt[2*
i ].
hes[6*d+2] = work[2*
nr ];
447 fd->
pt[2*i+1].
hes[6*d+2] = work[2*nr+1];
450 memcpy(work , fd->
wtend[2]+ nt, nt*
sizeof(
double));
451 memcpy(work+nt, fd->
wtend[2]+4*nt, nt*
sizeof(
double));
452 for(i=0;i<4;++
i)
for(d=0;d<3;++d) {
454 fd->
pt[
i].
hes[6*d+4] = work[2*nt ];
455 fd->
pt[4+
i].
hes[6*d+4] = work[2*nt+1];
473 const double resid[3],
477 const double old_dist2 = p->
dist2;
478 const double dist2 = resid[0]*resid[0]+resid[1]*resid[1]+resid[2]*resid[2];
479 const double decr = old_dist2-dist2;
480 const double pred = p->
dist2p;
481 out->
x[0]=p->
x[0],out->
x[1]=p->
x[1],out->
x[2]=p->
x[2];
486 printf(
"Checking prior step:\n"
487 " old r = (%.17g,%.17g,%.17g), old flags = %x\n"
488 " old_dist2 = %.17g\n"
489 " r = (%.17g,%.17g,%.17g), flags = %x\n"
491 " difference = %.17g\n"
492 " predicted = %.17g\n"
496 decr, pred, decr/pred);
498 if(decr>= 0.01 * pred) {
499 if(decr>= 0.9 * pred) {
502 printf(
" very good iteration; tr -> %g\n", out->
tr);
506 printf(
" good iteration; tr = %g\n", p->
tr);
516 double v0 = fabs(p->
r[0]-p->
oldr[0]),
517 v1 = fabs(p->
r[1]-p->
oldr[1]),
518 v2 = fabs(p->
r[2]-p->
oldr[2]);
519 out->
tr = (v1>v2?(v0>v1?v0:v1):(v0>v2?v0:v2))/4;
521 printf(
" bad iteration; tr -> %g\n", out->
tr);
523 out->
dist2=old_dist2;
535 const double jac[9],
const double resid[3],
538 const double tr = p->
tr;
539 double bnd[6] = { -1,1, -1,1, -1,1 };
542 unsigned d, mask, flags;
543 r0[0]=p->
r[0],r0[1]=p->
r[1],r0[2]=p->
r[2];
545 printf(
"newton_vol:\n");
546 printf(
" resid = (%g,%g,%g); r^T r / 2 = %g\n",resid[0],resid[1],resid[2],
547 (resid[0]*resid[0]+resid[1]*resid[1]+resid[2]*resid[2])/2);
548 printf(
" jac = %g\t%g\t%g\n"
551 jac[0],jac[1],jac[2],jac[3],jac[4],jac[5],jac[6],jac[7],jac[8]);
552 printf(
" r = (%.15g,%.15g,%.15g)\n",r0[0],r0[1],r0[2]);
557 if(r0[d]-tr>-1) bnd[2*d ]=r0[d]-tr, mask^=1u<<(2*d);
558 if(r0[d]+tr< 1) bnd[2*d+1]=r0[d]+tr, mask^=2u<<(2*d);
564 printf(
" min at r = (%.17g,%.17g,%.17g)\n",
565 r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
570 double nr = r0[d]+dr[d];
571 if((nr-bnd[2*d])*(bnd[2*d+1]-nr)>=0)
continue;
573 double f = (bnd[2*d ]-r0[d])/dr[d];
574 if(f<fac) fac=f, flags = 1u<<(2*d);
576 double f = (bnd[2*d+1]-r0[d])/dr[d];
577 if(f<fac) fac=f, flags = 2u<<(2*d);
582 printf(
" flags = %x, fac = %.15g\n",flags,fac);
585 if(flags==0)
goto newton_vol_fin;
587 for(d=0;d<3;++d) dr[d]*=fac;
592 double drc[2], fac=1;
593 unsigned new_flags=0;
594 double res[3],
y[2], JtJ[3];
595 res[0] = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
596 res[1] = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
597 res[2] = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
599 y[0] = jac[d1]*res[0]+jac[3+d1]*res[1]+jac[6+d1]*res[2],
600 y[1] = jac[d2]*res[0]+jac[3+d2]*res[1]+jac[6+d2]*res[2];
602 JtJ[0] = jac[ d1]*jac[ d1]
604 +jac[6+d1]*jac[6+d1],
605 JtJ[1] = jac[ d1]*jac[ d2]
607 +jac[6+d1]*jac[6+d2],
608 JtJ[2] = jac[ d2]*jac[ d2]
610 +jac[6+d2]*jac[6+d2];
613 printf(
" face %u, dn=%u, (d1,d2)=(%u,%u)\n",fi,dn,d1,d2);
614 printf(
" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
615 printf(
" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
616 (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
617 printf(
" min at (%.17g,%.17g)\n",
618 r0[d1]+dr[d1]+drc[0],r0[d2]+dr[d2]+drc[1]);
620 #define CHECK_CONSTRAINT(drcd,d3) do { \
621 const double rz = r0[d3]+dr[d3], lb=bnd[2*d3],ub=bnd[2*d3+1]; \
622 const double delta=drcd, nr = r0[d3]+(dr[d3]+delta); \
623 if((nr-lb)*(ub-nr)<0) { \
625 double f = (lb-rz)/delta; \
626 if(f<fac) fac=f, new_flags = 1u<<(2*d3); \
628 double f = (ub-rz)/delta; \
629 if(f<fac) fac=f, new_flags = 2u<<(2*d3); \
635 printf(
" new_flags = %x, fac = %.17g\n",new_flags,fac);
637 dr[d1] += fac*drc[0], dr[d2] += fac*drc[1];
638 if(new_flags==0)
goto newton_vol_fin;
644 const unsigned de = ei>>2;
646 unsigned new_flags = 0;
647 double res[3],
y,JtJ,drc;
648 res[0] = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
649 res[1] = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
650 res[2] = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
652 y = jac[de]*res[0]+jac[3+de]*res[1]+jac[6+de]*res[2];
654 JtJ = jac[ de]*jac[ de]
656 +jac[6+de]*jac[6+de];
659 printf(
" edge %u, de=%u\n",ei,de);
660 printf(
" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
661 printf(
" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
662 (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
663 printf(
" min at %.17g\n", r0[de]+dr[de]+drc);
666 #undef CHECK_CONSTRAINT
668 printf(
" new_flags = %x, fac = %.17g\n",new_flags,fac);
672 goto newton_vol_relax;
677 const unsigned old_flags = flags;
680 res[0] = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
681 res[1] = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
682 res[2] = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
684 y[0] = jac[0]*res[0]+jac[3]*res[1]+jac[6]*res[2],
685 y[1] = jac[1]*res[0]+jac[4]*res[1]+jac[7]*res[2],
686 y[2] = jac[2]*res[0]+jac[5]*res[1]+jac[8]*res[2];
687 #define SETDR(d) do { \
688 unsigned f = flags>>(2*d) & 3u; \
689 if(f) dr[d] = bnd[2*d+(f-1)] - r0[d]; \
694 unsigned c = flags>>(2*d) & 3u;
696 else if(dr[d]*y[d]<0) flags &= ~(3u<<(2*d));
698 if( (c==1&&dr[d]>0) || (c==2&&dr[d]<0) )
699 printf(
"FAIL! c=%u, dr[d]=%g\n",c,dr[d]);
703 printf(
" checking constraints (%x)\n",old_flags);
704 printf(
" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
705 printf(
" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
706 (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
707 printf(
" relaxed %x -> %x\n",old_flags,flags);
709 if(flags==old_flags)
goto newton_vol_fin;
711 case 1:
goto newton_vol_face;
712 case 2:
goto newton_vol_edge;
719 const double res[3]={ resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
720 resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
721 resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]) };
722 printf(
" r=(%.17g,%.17g,%.17g)\n", r0[0]+dr[0],r0[1]+dr[1],r0[2]+dr[2]);
723 printf(
" resid = (%g,%g,%g); r^T r / 2 = %g\n",res[0],res[1],res[2],
724 (res[0]*res[0]+res[1]*res[1]+res[2]*res[2])/2);
728 if(fabs(dr[0])+fabs(dr[1])+fabs(dr[2]) < tol) flags |=
CONVERGED_FLAG;
730 const double res0 = resid[0]-(jac[0]*dr[0]+jac[1]*dr[1]+jac[2]*dr[2]),
731 res1 = resid[1]-(jac[3]*dr[0]+jac[4]*dr[1]+jac[5]*dr[2]),
732 res2 = resid[2]-(jac[6]*dr[0]+jac[7]*dr[1]+jac[8]*dr[2]);
733 out->
dist2p=resid[0]*resid[0]+resid[1]*resid[1]+resid[2]*resid[2]
734 -(res0*res0+res1*res1+res2*res2);
736 #define SETR(d) do { \
737 unsigned f = flags>>(2*d) & 3u; \
738 out->r[d] = f==0 ? r0[d]+dr[d] : ( f==1 ? -1 : 1 ); \
746 const double jac[9],
const double rhes[3],
747 const double resid[3],
748 const unsigned d1,
const unsigned d2,
const unsigned dn,
749 const unsigned flags,
752 const double tr = p->
tr;
754 double r[2], dr[2]={0,0};
755 unsigned mask, new_flags;
756 double v, tv;
unsigned i;
757 double A[3],
y[2], r0[2];
759 A[0] = jac[ d1]*jac[ d1]
761 +jac[6+d1]*jac[6+d1] - rhes[0],
762 A[1] = jac[ d1]*jac[ d2]
764 +jac[6+d1]*jac[6+d2] - rhes[1],
765 A[2] = jac[ d2]*jac[ d2]
767 +jac[6+d2]*jac[6+d2] - rhes[2];
769 y[0] = jac[ d1]*resid[0]
772 y[1] = jac[ d2]*resid[0]
775 r0[0] = p->
r[d1], r0[1] = p->
r[d2];
778 printf(
"newton_face, dn=%u, (d1,d2)=%u,%u:\n", dn,d1,d2);
779 printf(
" J^T r = (%g,%g)\n", y[0],y[1]);
780 printf(
" A = %g\t%g\n"
781 " %g\t%g\n", A[0],A[1],A[1],A[2]);
782 printf(
" r = (%.15g,%.15g)\n", r0[0],r0[1]);
787 if(r0[0]-tr>-1) bnd[0]=-tr, mask^=1u;
else bnd[0]=-1-r0[0];
788 if(r0[0]+tr< 1) bnd[1]= tr, mask^=2u;
else bnd[1]= 1-r0[0];
789 if(r0[1]-tr>-1) bnd[2]=-tr, mask^=1u<<2;
else bnd[2]=-1-r0[1];
790 if(r0[1]+tr< 1) bnd[3]= tr, mask^=2u<<2;
else bnd[3]= 1-r0[1];
793 printf(
" bounds = ([%.15g,%.15g],[%.15g,%.15g])\n",
794 r0[0]+bnd[0],r0[0]+bnd[1],r0[1]+bnd[2],r0[1]+bnd[3]);
797 if(A[0]+A[2]<=0 || A[0]*A[2]<=A[1]*A[1])
goto newton_face_constrained;
801 printf(
" min at r = (%.15g,%.15g)\n", r0[0]+dr[0],r0[1]+dr[1]);
804 #define EVAL(r,s) -(y[0]*r+y[1]*s)+(r*A[0]*r+(2*r*A[1]+s*A[2])*s)/2
805 if( (dr[0]-bnd[0])*(bnd[1]-dr[0])>=0
806 && (dr[1]-bnd[2])*(bnd[3]-dr[1])>=0) {
807 r[0] = r0[0]+dr[0], r[1] = r0[1]+dr[1];
808 v =
EVAL(dr[0],dr[1]);
809 goto newton_face_fin;
811 newton_face_constrained:
812 v =
EVAL(bnd[0],bnd[2]); i=1u|(1u<<2);
813 tv =
EVAL(bnd[1],bnd[2]);
if(tv<v) v=tv, i=2u|(1u<<2);
814 tv =
EVAL(bnd[0],bnd[3]);
if(tv<v) v=tv, i=1u|(2u<<2);
815 tv =
EVAL(bnd[1],bnd[3]);
if(tv<v) v=tv, i=2u|(2u<<2);
818 drc = (y[0] - A[1]*bnd[2])/A[0];
819 if((drc-bnd[0])*(bnd[1]-drc)>=0 && (tv=
EVAL(drc,bnd[2]))<v)
820 v=tv,i=1u<<2,dr[0]=drc;
821 drc = (y[0] - A[1]*bnd[3])/A[0];
822 if((drc-bnd[0])*(bnd[1]-drc)>=0 && (tv=
EVAL(drc,bnd[3]))<v)
823 v=tv,i=2u<<2,dr[0]=drc;
827 drc = (y[1] - A[1]*bnd[0])/A[2];
828 if((drc-bnd[2])*(bnd[3]-drc)>=0 && (tv=
EVAL(bnd[0],drc))<v)
830 drc = (y[1] - A[1]*bnd[1])/A[2];
831 if((drc-bnd[2])*(bnd[3]-drc)>=0 && (tv=
EVAL(bnd[1],drc))<v)
836 #define SETR(d,d3) do { \
837 const unsigned f = i>>(2*d) & 3u; \
838 if(f==0) r[d]=r0[d]+dr[d]; \
840 if((f&(mask>>(2*d)))==0) r[d]=r0[d]+(f==1?-tr:tr); \
841 else r[d]=(f==1?-1:1), new_flags |= f<<(2*d3); \
846 printf(
" constrained min at r = (%.15g,%.15g)\n", r[0],r[1]);
853 out->
r[dn]=p->
r[dn], out->
r[d1]=r[0],out->
r[d2]=r[1];
858 const double jac[9],
const double rhes,
const double resid[3],
859 const unsigned de,
const unsigned dn1,
const unsigned dn2,
863 const double tr = p->
tr;
865 const double A = jac[ de]*jac[ de]
867 +jac[6+de]*jac[6+de] - rhes;
869 const double y = jac[ de]*resid[0]
873 const double oldr = p->
r[de];
874 double dr,
nr,tdr,tnr;
875 double v,tv;
unsigned new_flags=0, tnew_flags=0;
878 printf(
"Newton edge %u (dn1=%u,dn2=%u) flags=%x\n",de,dn1,dn2,flags);
879 printf(
" A=%g, y=%g\n",A,y);
880 if(A<=0) printf(
" A not positive\n");
881 printf(
" r=(%g,%g,%g)\n",p->
r[0],p->
r[1],p->
r[2]);
884 #define EVAL(dr) (dr*A-2*y)*dr
888 dr = y/
A, nr = oldr+dr;
889 if(fabs(dr)<tr && fabs(nr)<1) { v=
EVAL(dr);
goto newton_edge_fin; }
892 if(( nr=oldr-tr)>-1) dr=-tr;
893 else nr=-1, dr=-1-oldr, new_flags = flags | 1u<<(2*de);
896 if((tnr=oldr+tr)< 1) tdr=tr;
897 else tnr= 1, tdr= 1-oldr, tnew_flags = flags | 2u<<(2*de);
900 if(tv<v) nr=tnr, dr=tdr, v=tv, new_flags=tnew_flags;
906 out->
r[dn1]=p->
r[dn1];
907 out->
r[dn2]=p->
r[dn2];
909 out->
flags = flags | new_flags | (p->
flags<<7);
911 printf(
" new r = (%g,%g,%g)\n",out->
r[0],out->
r[1],out->
r[2]);
918 const struct findpts_el_pt_3 *
const p,
const unsigned pn,
const double tol);
924 const struct findpts_el_pt_3 *
const p,
const unsigned pn,
const double tol)
926 const unsigned nr=fd->
n[0],
ns=fd->
n[1],nt=fd->
n[2],
928 double *
const resid = fd->
work, *
const jac = resid + 3*pn,
929 *
const wtrs = jac+9*pn, *
const wtt = wtrs+2*(nr+
ns)*pn,
930 *
const slice = wtt+2*nt*pn, *
const temp = slice + 2*pn*nrs;
931 unsigned i;
unsigned d;
938 fd->
lag[2](wtt+2*i*nt , fd->
lag_data[2], nt, 1, p[i].
r[2]);
942 const double *
const wtrs_i = wtrs+2*i*(nr+
ns),
943 *
const slice_i = slice+2*i*nrs;
944 double *
const jac_i = jac+9*i+3*d;
946 wtrs_i,nr, wtrs_i+2*nr,ns, slice_i, temp);
947 jac_i[2] =
tensor_i2(wtrs_i,nr, wtrs_i+2*nr,ns, slice_i+nrs, temp);
953 else newton_vol(out+i, jac+9*i, resid+3*i, p+i, tol);
961 const struct findpts_el_pt_3 *
const p,
const unsigned pn,
const double tol)
966 const unsigned n1 = fd->
n[d1], n2 = fd->
n[d2];
967 double *
const resid=fd->
work, *
const jac=resid+3*pn, *
const hes=jac+9*pn,
968 *
const wt1 = hes+3*pn, *
const wt2 = wt1+3*n1*pn,
969 *
const slice = wt2+3*n2*pn;
971 unsigned i;
unsigned d;
974 printf(
"Face %u\n",fi);
975 printf(
" pflag = %u\n",pflag);
976 printf(
" fi = %u\n",fi);
977 printf(
" dn, d1, d2 = %u, %u, %u\n",dn,d1,d2);
978 printf(
" n1, n2 = %u, %u \n", n1,n2);
983 fd->
lag[d1](wt1+3*i*n1, fd->
lag_data[d1], n1, 2, p[i].
r[d1]);
985 fd->
lag[d2](wt2+3*i*n2, fd->
lag_data[d2], n2, 2, p[i].
r[d2]);
986 for(i=0;i<3*pn;++
i) hes[i]=0;
990 const double *
const wt1_i = wt1+3*i*n1, *
const slice_i = slice+3*i*n1;
994 resid[3*i+d] = r = p[
i].
x[d] - v[0];
995 jac[9*i+3*d+d1] = v[1];
996 jac[9*i+3*d+d2] = v[3];
997 hes[3*
i ] += r * v[2];
998 hes[3*i+1] += r * v[4];
999 hes[3*i+2] += r * v[6];
1002 for(i=1;i<pn;++
i) memcpy(wt2+i*n2, wt2+3*i*n2, n2*
sizeof(
double));
1006 jac[9*i+3*d+dn] =
tensor_dot(wt1+3*i*n1, slice+i*n1, n1);
1010 double *
const resid_i=resid+3*
i, *
const jac_i=jac+9*
i, *
const hes_i=hes+3*
i;
1014 const double steep = resid_i[0] * jac_i[ dn]
1015 +resid_i[1] * jac_i[3+dn]
1016 +resid_i[2] * jac_i[6+dn];
1017 #ifdef DIAGNOSTICS_1
1018 printf(
"jacobian = %g\t%g\t%g\n"
1020 " %g\t%g\t%g\n",jac_i[0],jac_i[1],jac_i[2],
1021 jac_i[3],jac_i[4],jac_i[5],jac_i[6],jac_i[7],jac_i[8]);
1022 printf(
"resid_i = (%g,%g,%g)\n", resid_i[0],resid_i[1],resid_i[2]);
1023 printf(
"steep = %g (%s)\n", steep, steep * p[i].r[dn] < 0 ?
"in" :
"out");
1025 if(steep * p[i].r[dn] < 0)
1028 newton_face(out+i, jac_i, hes_i, resid_i, d1,d2,dn,pflag, p+i, tol);
1037 const struct findpts_el_pt_3 *
const p,
const unsigned pn,
const double tol)
1042 const unsigned n = fd->
n[de];
1043 double *wt = fd->
work;
1045 unsigned i;
unsigned d;
1047 #ifdef DIAGNOSTICS_1
1048 printf(
"Edge %u\n",ei);
1049 printf(
" pflag = %u\n",pflag);
1050 printf(
" ei = %u\n",ei);
1051 printf(
" de, dn1, dn2 = %u, %u, %u\n",de,dn1,dn2);
1052 printf(
" n = %u \n", n);
1056 double dxi[3], resid[3], jac[9];
1057 double hes[5] = {0,0,0,0,0};
1063 resid[d] = r = p[
i].
x[d] - dxi[0];
1064 jac[3*d+de] = dxi[1];
1065 hes[0] += r * dxi[2];
1067 jac[3*d+dn1] = dxi[0];
1068 hes[1] += r * dxi[1];
1070 jac[3*d+dn2] = dxi[0];
1071 hes[2] += r * dxi[1];
1079 double steep[3], sr1, sr2;
1080 steep[0] = jac[0]*resid[0] + jac[3]*resid[1] + jac[6]*resid[2],
1081 steep[1] = jac[1]*resid[0] + jac[4]*resid[1] + jac[7]*resid[2],
1082 steep[2] = jac[2]*resid[0] + jac[5]*resid[1] + jac[8]*resid[2];
1083 sr1 = steep[dn1]*p[
i].
r[dn1],
1084 sr2 = steep[dn2]*p[
i].
r[dn2];
1085 #ifdef DIAGNOSTICS_1
1086 printf(
"jacobian = %g\t%g\t%g\n"
1088 " %g\t%g\t%g\n",jac[0],jac[1],jac[2],
1089 jac[3],jac[4],jac[5],jac[6],jac[7],jac[8]);
1090 printf(
"hessian = %g\t%g\t%g\n"
1092 " \t \t%g\n", hes[0],hes[1],hes[2],hes[3],hes[4]);
1093 printf(
"resid = (%g,%g,%g)\n", resid[0],resid[1],resid[2]);
1094 printf(
"steep1 = %g (%s)\n", steep[dn1], sr1 < 0 ?
"in" :
"out");
1095 printf(
"steep2 = %g (%s)\n", steep[dn2], sr2 < 0 ?
"in" :
"out");
1101 double rh[3]; rh[0]=hes[0], rh[1]=hes[1], rh[2]=hes[3];
1103 pflag & (3u<<(dn2*2)), p+i, tol);
1106 double rh[3]; rh[0]=hes[4], rh[1]=hes[2], rh[2]=hes[0];
1108 pflag & (3u<<(dn1*2)), p+i, tol);
1110 newton_edge(out+i, jac,hes[0],resid, de,dn1,dn2, pflag, p+i, tol);
1118 const struct findpts_el_pt_3 *
const p,
const unsigned pn,
const double tol)
1123 const double *
const x = gpt->
x, *
const jac = gpt->
jac, *
const hes = gpt->
hes;
1126 #ifdef DIAGNOSTICS_1
1127 printf(
"Point %u\n",pi);
1128 printf(
" pflag = %u\n",pflag);
1129 printf(
" pi = %u\n",pi);
1133 unsigned d1,d2,dn, de,dn1,dn2, hi0,hi1,hi2;
1134 double resid[3], steep[3], sr[3];
1135 resid[0] = p[
i].
x[0]-x[0],
1136 resid[1] = p[
i].
x[1]-x[1],
1137 resid[2] = p[
i].
x[2]-x[2];
1138 steep[0] = jac[0]*resid[0] + jac[3]*resid[1] + jac[6]*resid[2],
1139 steep[1] = jac[1]*resid[0] + jac[4]*resid[1] + jac[7]*resid[2],
1140 steep[2] = jac[2]*resid[0] + jac[5]*resid[1] + jac[8]*resid[2];
1141 sr[0] = steep[0]*p[
i].
r[0],
1142 sr[1] = steep[1]*p[
i].
r[1],
1143 sr[2] = steep[2]*p[
i].
r[2];
1149 if(sr[2]<0)
goto findpt_pt_vol;
1150 else { d1=0,d2=1,dn=2, hi0=0,hi1=1,hi2=3;
goto findpt_pt_face; }
1152 else if(sr[2]<0) {d1=2,d2=0,dn=1, hi0=5,hi1=2,hi2=0;
goto findpt_pt_face;}
1153 else { de=0,dn1=1,dn2=2, hi0=0;
goto findpt_pt_edge; }
1156 if(sr[2]<0) { d1=1,d2=2,dn=0, hi0=3,hi1=4,hi2=5;
goto findpt_pt_face; }
1157 else { de=1,dn1=2,dn2=0, hi0=3;
goto findpt_pt_edge; }
1159 else if(sr[2]<0) { de=2,dn1=0,dn2=1, hi0=5;
goto findpt_pt_edge; }
1160 out[
i].
r[0]=p[
i].
r[0],out[
i].
r[1]=p[
i].
r[1],out[
i].
r[2]=p[
i].
r[2];
1169 rh[0] = resid[0]*
hes[hi0]+resid[1]*
hes[6+hi0]+resid[2]*
hes[12+hi0],
1170 rh[1] = resid[0]*hes[hi1]+resid[1]*hes[6+hi1]+resid[2]*hes[12+hi1],
1171 rh[2] = resid[0]*hes[hi2]+resid[1]*hes[6+hi2]+resid[2]*hes[12+hi2];
1173 pflag&(3u<<(2*dn)), p+i, tol);
1177 resid[0]*
hes[hi0]+resid[1]*
hes[6+hi0]+resid[2]*
hes[12+hi0];
1179 pflag&~(3u<<(2*de)), p+i, tol);
1188 const unsigned nr=fd->
n[0],
ns=fd->
n[1], nt=fd->
n[2];
1189 unsigned i,j,k, ii=0;
1190 for(p=pt;p!=pe;++
p) p->
dist2=DBL_MAX;
1192 const double zt=fd->
z[2][k];
1194 const double zs=fd->
z[1][j];
1196 const double zr=fd->
z[0][
i];
1197 const double x=fd->
x[0][ii],
y=fd->
x[1][ii],
z=fd->
x[2][ii];
1199 for(p=pt;p!=pe;++
p) {
1200 const double dx=p->
x[0]-
x,dy=p->
x[1]-
y,dz=p->
x[2]-
z;
1201 const double dist2 = dx*dx+dy*dy+dz*dz;
1202 if(p->
dist2<=dist2)
continue;
1217 unsigned nconv = npt;
1219 unsigned count[27] = { 0,0,0, 0,0,0, 0,0,0,
1220 0,0,0, 0,0,0, 0,0,0,
1221 0,0,0, 0,0,0, 0,0,0 } ;
1225 for(i=0;i<npt;++
i) {
1226 pstart[
i].x[0]=pbuf[
i].
x[0];
1227 pstart[
i].x[1]=pbuf[
i].
x[1];
1228 pstart[
i].x[2]=pbuf[
i].
x[2];
1229 pstart[
i].r[0]=pbuf[
i].
r[0];
1230 pstart[
i].r[1]=pbuf[
i].
r[1];
1231 pstart[
i].r[2]=pbuf[
i].
r[2];
1232 pstart[
i].index=
i,pstart[
i].flags=0;
1233 pstart[
i].dist2=DBL_MAX,pstart[
i].dist2p=0,pstart[
i].tr=1;
1236 while(nconv && step++ < 50) {
1240 #if DIAGNOSTICS_ITERATIONS>1
1242 printf(
"findpts_el_3 Newton step (%u), %u unconverged:\n ", step,nconv);
1243 for(i=0;i<27;++
i) printf(
" %u",count[i]);
1247 #ifdef DIAGNOSTICS_3
1249 unsigned d,
i,
n=fd->
n[0]*fd->
n[1]*fd->
n[2];
1250 printf(
"geometry:\n{\n");
1254 printf(
" %.15g%s\n",fd->
x[d][i],i==n-1?
"":
",");
1255 printf(
" }%s\n",d==3-1?
"":
",");
1261 for(p=pstart,pout=pbuf; p!=pe; p+=pn,pout+=pn) {
1268 unsigned offset[28] = { 0,0,0, 0,0,0, 0,0,0,
1269 0,0,0, 0,0,0, 0,0,0,
1270 0,0,0, 0,0,0, 0,0,0, 0 };
1272 for(pout=pbuf; pout!=pe; ++pout)
1275 unsigned i;
unsigned sum=0;
1277 unsigned ci=offset[
i]; count[
i]=ci, offset[
i]=
sum, sum+=ci;
1279 nconv = offset[27] =
sum;
1281 for(pout=pbuf; pout!=pe; ++pout)
1286 for(p=pstart;p!=pe;++
p)
1289 #if DIAGNOSTICS_ITERATIONS
1290 printf(
"findpts_el_3 took %u steps\n ", step);
1295 double *
const out_base,
const unsigned out_stride,
1296 const double *
const r_base,
const unsigned r_stride,
const unsigned pn,
1299 const unsigned nr=fd->
n[0],
ns=fd->
n[1],nt=fd->
n[2],
1301 double *
const wtrs = fd->
work, *
const wtt = wtrs+(nr+
ns)*pn,
1302 *
const slice = wtt+nt*pn, *
const temp = slice + pn*nrs;
1303 unsigned i;
const double *
r;
double *
out;
1304 for(i=0,r=r_base;i<pn;++
i) {
1306 fd->
lag[1](wtrs+i*(nr+
ns)+nr, fd->
lag_data[1], ns, 0, r[1]);
1307 fd->
lag[2](wtt +i*nt , fd->
lag_data[2], nt, 0, r[2]);
1308 r = (
const double*)((
const char*)r + r_stride);
1312 for(i=0,out=out_base;i<pn;++
i) {
1313 const double *
const wtrs_i = wtrs+i*(nr+
ns), *
const slice_i = slice+i*nrs;
1314 *out =
tensor_i2(wtrs_i,nr, wtrs_i+nr,ns, slice_i, temp);
1315 out = (
double*)((
char*)out + out_stride);
static double tensor_ig2(double g[2], const double *wtr, uint nr, const double *wts, uint ns, const double *u, double *work)
void findpt_fun(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
static double sum(struct xxt *data, double v, uint n, uint tag)
static unsigned pt_flags_to_bin(const unsigned flags)
static double tensor_i2(const double *Jr, uint nr, const double *Js, uint ns, const double *u, double *work)
#define tmalloc(type, count)
void lagrange_fun(double *restrict p, double *restrict data, unsigned n, int d, double x)
static unsigned pt_flags_to_bin_noC(const unsigned flags)
#define findpts_el_free_3
static void lin_solve_3(double x[3], const double A[9], const double y[3])
static void findpt_vol(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
static void compute_face_data_tr(struct findpts_el_data_3 *fd)
static void tensor_mtxv(double *y, uint ny, const double *A, const double *x, uint nx)
#define CHECK_CONSTRAINT(drcd, d3)
static unsigned edge_index(const unsigned x)
static void tensor_mxm(double *C, uint nc, const double *A, uint na, const double *B, uint nb)
static unsigned work_size(const unsigned nr, const unsigned ns, const unsigned nt, const unsigned npt_max)
static void compute_face_data_st(struct findpts_el_data_3 *fd)
#define findpts_el_setup_3
struct findpts_el_gpt_3 pt[8]
static void newton_face(struct findpts_el_pt_3 *const out, const double jac[9], const double rhes[3], const double resid[3], const unsigned d1, const unsigned d2, const unsigned dn, const unsigned flags, const struct findpts_el_pt_3 *const p, const double tol)
static void seed(struct findpts_el_data_3 *const fd, struct findpts_el_pt_3 *const pt, const unsigned npt)
#define SET_EDGE(j, rd, rn, base)
static void findpt_face(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
static void compute_pt_data(struct findpts_el_data_3 *fd)
struct findpts_el_gface_3 face[6]
static unsigned plus_2_mod_3(const unsigned x)
static void findpt_edge(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
static void compute_edge_data(struct findpts_el_data_3 *fd, unsigned d)
static void lin_solve_sym_2(double x[2], const double A[3], const double y[2])
static unsigned point_index(const unsigned x)
static unsigned plus_1_mod_3(const unsigned x)
#define SET_FACE(i, base, n)
static void newton_edge(struct findpts_el_pt_3 *const out, const double jac[9], const double rhes, const double resid[3], const unsigned de, const unsigned dn1, const unsigned dn2, unsigned flags, const struct findpts_el_pt_3 *const p, const double tol)
static unsigned which_bit(const unsigned x)
static const struct findpts_el_gpt_3 * get_pt(struct findpts_el_data_3 *fd, unsigned pi)
static unsigned num_constrained(const unsigned flags)
static void compute_face_data_rs(struct findpts_el_data_3 *fd)
static const unsigned nr[3]
static void findpt_pt(struct findpts_el_pt_3 *const out, struct findpts_el_data_3 *const fd, const struct findpts_el_pt_3 *const p, const unsigned pn, const double tol)
void compute_face_data_fun(struct findpts_el_data_3 *fd)
struct findpts_el_pt_3 * p
static const struct findpts_el_gface_3 * get_face(struct findpts_el_data_3 *fd, unsigned fi)
static double work[TNR *NS]
establishes some macros to establish naming conventions
#define findpts_el_eval_3
static double y[NR *NS *NT *N]
static void newton_vol(struct findpts_el_pt_3 *const out, const double jac[9], const double resid[3], const struct findpts_el_pt_3 *const p, const double tol)
struct findpts_el_gedge_3 edge[12]
static int reject_prior_step_q(struct findpts_el_pt_3 *const out, const double resid[3], const struct findpts_el_pt_3 *const p, const double tol)
static unsigned face_index(const unsigned x)
static const struct findpts_el_gedge_3 * get_edge(struct findpts_el_data_3 *fd, unsigned ei)
static double z[NR *NS *NT *N]