15 #define REPEAT 1000000
17 #define USE_HW_COUNTER 1
24 #define EPS (128*DBL_EPSILON)
27 return fabs(b-a)/(fabs(b)+fabs(a)) > 16*
EPS;
35 #define PI 3.1415926535897932384626433832795028841971693993751058209749445923
44 p[0] = ((2*i+1)*x*p[1]- i *p[0])/(i+1);
45 p[1] = ((2*i+3)*x*p[0]-(i+1)*p[1])/(i+2);
57 p[1] = ((2*i+1)*x*p[0]-(i+1)*p[1])/i;
58 p[0] = ((2*i+3)*x*p[1]-(i+2)*p[0])/(i+1);
70 p[0] = ((2*i+1)*x*p[1]-(i+2)*p[0])/(i-1);
71 p[1] = ((2*i+3)*x*p[0]-(i+3)*p[1])/i;
80 for(i=0; i<=n/2-1; ++
i) {
81 real ox,
x =
cosr( (2*n-2*i-1)*(
PI/2)/n );
89 for(j=(n+1)/2,i=n/2-1; j<
n; ++j,--
i) z[j]=-z[i];
96 for(i=0; i<=n/2-1; ++
i) {
97 real ox,
x =
cosr( (n-i)*
PI/np );
105 for(j=(n+1)/2,i=n/2-1; j<
n; ++j,--
i) z[j]=-z[i];
111 z[0] = -1, z[n-1] = 1;
118 for(i=0; i<=(n-1)/2; ++
i) {
120 w[
i] = 2*(1-z[
i]*z[
i])/(d*d);
122 for(j=(n+1)/2,i=n/2-1; j<
n; ++j,--
i) w[j]=w[i];
128 for(i=0; i<=(n-1)/2; ++
i) {
130 w[
i] = 2/((n-1)*n*d*d);
132 for(j=(n+1)/2,i=n/2-1; j<
n; ++j,--
i) w[j]=w[i];
141 real *
w, *d, *u0, *v0, *u1, *v1, *u2, *v2;
147 for(i=0 ; i<
n ; ++
i) p->
d[i] = x-p->
z[i];
148 for(i=0 ; i<n-1; ++i) p->
u0[i+1] = p->
d[
i]*p->
u0[
i];
149 for(i=n-1;
i ; --
i) p->
v0[i-1] = p->
d[i]*p->
v0[i];
150 for(i=0 ; i<n ; ++i) p->
J[
i] = p->
w[
i]*p->
u0[
i]*p->
v0[
i];
156 for(i=0 ; i<
n ; ++
i) p->
d[i] = x-p->
z[i];
157 for(i=0 ; i<n-1; ++i)
161 p->
v0[i-1] = p->
d[i]*p->
v0[i],
162 p->
v1[i-1] = p->
d[i]*p->
v1[i] + p->
v0[i];
171 for(i=0 ; i<
n ; ++
i) p->
d[i] = x-p->
z[i];
172 for(i=0 ; i<n-1; ++i)
177 p->
v0[i-1]=p->
d[i]*p->
v0[i],
178 p->
v1[i-1]=p->
d[i]*p->
v1[i]+p->
v0[i],
179 p->
v2[i-1]=p->
d[i]*p->
v2[i]+2*p->
v1[i];
183 p->
D2[i]=p->
w[i]*(p->
u2[i]*p->
v0[i]+2*p->
u1[i]*p->
v1[i]+p->
u0[i]*p->
v2[i]);
189 for(i=0 ; i<n-1; ++
i)
190 p->
u2[i+1]=p->
d[i]*p->
u2[i]+2*p->
u1[i];
194 p->
D2[i]=p->
w[i]*(p->
u2[i]*p->
v0[i]+2*p->
u1[i]*p->
v1[i]+p->
u0[i]*p->
v2[i]);
210 real ww = 1, zi = z[
i];
211 for(j=0; j<
i; ++j) ww *= zi-z[j];
212 for(++j; j<
n; ++j) ww *= zi-z[j];
215 p->
u0[0] = p->
v0[n-1] = 1;
216 p->
u1[0] = p->
v1[n-1] = 0;
217 p->
u2[0] = p->
v2[n-1] = 0;
236 unsigned long long tic, toc;
238 #define TIME(t, repeat, what) do { \
239 for(r=repeat;r;--r) { what; } \
241 for(r=repeat;r;--r) { what; } \
246 for(d=0;d<3;++d)
for(n=1;n<
N;++
n) {
247 unsigned rep =
REPEAT/(n+1)/(d+1);
248 unsigned long long tr=0,tt=0;
250 double *
p,
x = rand()/(double)RAND_MAX;
261 TIME(tt,rep,lag(p,p+3*n,n,d,x));
267 printf(
"lagrange_%d_%02d cycles per call: "
268 "ref=%g\timp=%g\tspup=%g\n", d, (
int)n,
269 tr/(
double)rep,tt/(
double)rep,tr/(
double)tt);
275 for(d=0;d<3;++d)
for(n=2;n<
N;++
n) {
276 unsigned rep =
REPEAT/(n+1)/(d+1);
277 unsigned long long tr=0,tt=0;
279 double *
p,
x = rand()/(double)RAND_MAX;
289 TIME(tt,rep,lag(p,p+3*n,n,d,x));
295 printf(
"gll_lag_%d_%02d cycles per call: "
296 "ref=%g\timp=%g\tspup=%g\n", d, (
int)n,
297 tr/(
double)rep,tt/(
double)rep,tr/(
double)tt);
306 double z[
N], w[
N],
zr[
N], wr[
N];
313 printf(
"Gauss quadrature tests: %s\n", n==N?
"passed":
"failed");
316 double z[
N], w[
N],
zr[
N], wr[
N];
323 printf(
"Gauss-Lobatto quadrature tests: %s\n", n==N?
"passed":
"failed");
326 double *
p,
x = rand()/(double)RAND_MAX;
340 for(i=0;i<
n;++
i)
if(
not_same(p[ i],rld.
J [i]))
break;
if(i!=n)
break;
341 for(i=0;i<
n;++
i)
if(
not_same(p[ n+i],rld.
D [i]))
break;
if(i!=n)
break;
342 for(i=0;i<
n;++
i)
if(
not_same(p[2*n+i],rld.
D2[i]))
break;
if(i!=n)
break;
347 printf(
"Gauss nodal Lagrange basis tests: %s\n", n==N?
"passed":
"failed");
350 double *
p,
x = rand()/(double)RAND_MAX;
363 for(i=0;i<
n;++
i)
if(
not_same(p[ i],rld.
J [i]))
break;
if(i!=n)
break;
364 for(i=0;i<
n;++
i)
if(
not_same(p[ n+i],rld.
D [i]))
break;
if(i!=n)
break;
365 for(i=0;i<
n;++
i)
if(
not_same(p[2*n+i],rld.
D2[i]))
break;
if(i!=n)
break;
370 printf(
"Gauss-Lobatto nodal Lagrange basis tests: %s\n",
371 n==N?
"passed":
"failed");
#define tmalloc(type, count)
void lagrange_fun(double *restrict p, double *restrict data, unsigned n, int d, double x)
static void ref_gauss_nodes(real *z, int n)
static void ref_lobatto_nodes(real *z, int n)
static real legendre(int n, real x)
static void ref_lagrange_2u(ref_lagrange_data *p)
static void ref_lagrange_1(ref_lagrange_data *p, real x)
static void ref_gauss_weights(const real *z, real *w, int n)
#define DEFINE_HW_COUNTER()
static void lobatto_nodes_aux(real *z, int n)
void ref_lagrange_free(ref_lagrange_data *p)
static int not_same(double a, double b)
#define TIME(t, repeat, what)
static real legendre_d2(int n, real x)
static void ref_lagrange_2(ref_lagrange_data *p, real x)
static void ref_lagrange_setup(ref_lagrange_data *p, const real *z, unsigned n)
static void ref_lobatto_weights(const real *z, real *w, int n)
static real legendre_d1(int n, real x)
establishes some macros to establish naming conventions
static void ref_lagrange_0(ref_lagrange_data *p, real x)
static double z[NR *NS *NT *N]