Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
poly_test2.c
Go to the documentation of this file.
1 #include <stdio.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include <float.h>
6 #include "c99.h"
7 #include "types.h"
8 #include "name.h"
9 #include "fail.h"
10 #include "mem.h"
11 #include "poly.h"
12 #include "rdtsc.h"
13 
14 #define N 32
15 #define REPEAT 1000000
16 
17 #define USE_HW_COUNTER 1
18 
19 #if USE_HW_COUNTER
21 #endif
22 
23 
24 #define EPS (128*DBL_EPSILON)
25 
26 static int not_same(double a, double b) {
27  return fabs(b-a)/(fabs(b)+fabs(a)) > 16*EPS;
28 }
29 
30 /* OLD CODE (reference implemenatoin ) =======================================*/
31 
32 typedef double real;
33 #define cosr cos
34 #define fabsr fabs
35 #define PI 3.1415926535897932384626433832795028841971693993751058209749445923
36 
37 /* precondition: n >= 0 */
38 static real legendre(int n, real x)
39 {
40  real p[2];
41  int i;
42  p[0]=1, p[1]=x;
43  for(i=1; i<n; i+=2) {
44  p[0] = ((2*i+1)*x*p[1]- i *p[0])/(i+1);
45  p[1] = ((2*i+3)*x*p[0]-(i+1)*p[1])/(i+2);
46  }
47  return p[n&1];
48 }
49 
50 /* precondition: n > 0 */
51 static real legendre_d1(int n, real x)
52 {
53  real p[2];
54  int i;
55  p[0]=3*x, p[1]=1;
56  for(i=2; i<n; i+=2) {
57  p[1] = ((2*i+1)*x*p[0]-(i+1)*p[1])/i;
58  p[0] = ((2*i+3)*x*p[1]-(i+2)*p[0])/(i+1);
59  }
60  return p[n&1];
61 }
62 
63 /* precondition: n > 1 */
64 static real legendre_d2(int n, real x)
65 {
66  real p[2];
67  int i;
68  p[0]=3, p[1]=15*x;
69  for(i=3; i<n; i+=2) {
70  p[0] = ((2*i+1)*x*p[1]-(i+2)*p[0])/(i-1);
71  p[1] = ((2*i+3)*x*p[0]-(i+3)*p[1])/i;
72  }
73  return p[n&1];
74 }
75 
76 /* n nodes */
77 static void ref_gauss_nodes(real *z, int n)
78 {
79  int i,j;
80  for(i=0; i<=n/2-1; ++i) {
81  real ox, x = cosr( (2*n-2*i-1)*(PI/2)/n );
82  do {
83  ox = x;
84  x -= legendre(n,x)/legendre_d1(n,x);
85  } while(fabsr(x-ox)>-x*EPS);
86  z[i] = x - legendre(n,x)/legendre_d1(n,x);
87  }
88  if(n&1) z[n/2]=0;
89  for(j=(n+1)/2,i=n/2-1; j<n; ++j,--i) z[j]=-z[i];
90 }
91 
92 /* n inner lobatto nodes (excluding -1,1) */
93 static void lobatto_nodes_aux(real *z, int n)
94 {
95  int i,j,np=n+1;
96  for(i=0; i<=n/2-1; ++i) {
97  real ox, x = cosr( (n-i)*PI/np );
98  do {
99  ox = x;
100  x -= legendre_d1(np,x)/legendre_d2(np,x);
101  } while(fabsr(x-ox)>-x*EPS);
102  z[i] = x - legendre_d1(np,x)/legendre_d2(np,x);
103  }
104  if(n&1) z[n/2]=0;
105  for(j=(n+1)/2,i=n/2-1; j<n; ++j,--i) z[j]=-z[i];
106 }
107 
108 /* n lobatto nodes */
109 static void ref_lobatto_nodes(real *z, int n)
110 {
111  z[0] = -1, z[n-1] = 1;
112  lobatto_nodes_aux(&z[1],n-2);
113 }
114 
115 static void ref_gauss_weights(const real *z, real *w, int n)
116 {
117  int i,j;
118  for(i=0; i<=(n-1)/2; ++i) {
119  real d = (n+1)*legendre(n+1,z[i]);
120  w[i] = 2*(1-z[i]*z[i])/(d*d);
121  }
122  for(j=(n+1)/2,i=n/2-1; j<n; ++j,--i) w[j]=w[i];
123 }
124 
125 static void ref_lobatto_weights(const real *z, real *w, int n)
126 {
127  int i,j;
128  for(i=0; i<=(n-1)/2; ++i) {
129  real d = legendre(n-1,z[i]);
130  w[i] = 2/((n-1)*n*d*d);
131  }
132  for(j=(n+1)/2,i=n/2-1; j<n; ++j,--i) w[j]=w[i];
133 }
134 
135 typedef struct {
136  unsigned n; /* number of Lagrange nodes */
137  const real *z; /* Lagrange nodes (user-supplied) */
138  real *J, *D, *D2; /* weights for 0th,1st,2nd derivatives */
139  real *J_z0, *D_z0, *D2_z0; /* ditto at z[0] (computed at setup) */
140  real *J_zn, *D_zn, *D2_zn; /* ditto at z[n-1] (computed at setup) */
141  real *w, *d, *u0, *v0, *u1, *v1, *u2, *v2; /* work data */
143 
144 static void ref_lagrange_0(ref_lagrange_data *p, real x)
145 {
146  unsigned i, n=p->n;
147  for(i=0 ; i<n ; ++i) p->d[i] = x-p->z[i];
148  for(i=0 ; i<n-1; ++i) p->u0[i+1] = p->d[i]*p->u0[i];
149  for(i=n-1; i ; --i) p->v0[i-1] = p->d[i]*p->v0[i];
150  for(i=0 ; i<n ; ++i) p->J[i] = p->w[i]*p->u0[i]*p->v0[i];
151 }
152 
153 static void ref_lagrange_1(ref_lagrange_data *p, real x)
154 {
155  unsigned i, n=p->n;
156  for(i=0 ; i<n ; ++i) p->d[i] = x-p->z[i];
157  for(i=0 ; i<n-1; ++i)
158  p->u0[i+1] = p->d[i]*p->u0[i],
159  p->u1[i+1] = p->d[i]*p->u1[i] + p->u0[i];
160  for(i=n-1; i ; --i)
161  p->v0[i-1] = p->d[i]*p->v0[i],
162  p->v1[i-1] = p->d[i]*p->v1[i] + p->v0[i];
163  for(i=0 ; i<n ; ++i)
164  p->J[i] = p->w[i]*p->u0[i]*p->v0[i],
165  p->D[i] = p->w[i]*(p->u1[i]*p->v0[i]+p->u0[i]*p->v1[i]);
166 }
167 
168 static void ref_lagrange_2(ref_lagrange_data *p, real x)
169 {
170  unsigned i,n=p->n;
171  for(i=0 ; i<n ; ++i) p->d[i] = x-p->z[i];
172  for(i=0 ; i<n-1; ++i)
173  p->u0[i+1]=p->d[i]*p->u0[i],
174  p->u1[i+1]=p->d[i]*p->u1[i]+p->u0[i],
175  p->u2[i+1]=p->d[i]*p->u2[i]+2*p->u1[i];
176  for(i=n-1; i ; --i)
177  p->v0[i-1]=p->d[i]*p->v0[i],
178  p->v1[i-1]=p->d[i]*p->v1[i]+p->v0[i],
179  p->v2[i-1]=p->d[i]*p->v2[i]+2*p->v1[i];
180  for(i=0 ; i<n ; ++i)
181  p->J [i]=p->w[i]*p->u0[i]*p->v0[i],
182  p->D [i]=p->w[i]*(p->u1[i]*p->v0[i]+p->u0[i]*p->v1[i]),
183  p->D2[i]=p->w[i]*(p->u2[i]*p->v0[i]+2*p->u1[i]*p->v1[i]+p->u0[i]*p->v2[i]);
184 }
185 
187 {
188  unsigned i,n=p->n;
189  for(i=0 ; i<n-1; ++i)
190  p->u2[i+1]=p->d[i]*p->u2[i]+2*p->u1[i];
191  for(i=n-1; i ; --i)
192  p->v2[i-1]=p->d[i]*p->v2[i]+2*p->v1[i];
193  for(i=0 ; i<n ; ++i)
194  p->D2[i]=p->w[i]*(p->u2[i]*p->v0[i]+2*p->u1[i]*p->v1[i]+p->u0[i]*p->v2[i]);
195 }
196 
197 static void ref_lagrange_setup(ref_lagrange_data *p, const real *z, unsigned n)
198 {
199  unsigned i,j;
200  p->n=n, p->z=z;
201  p->w = tmalloc(real, 17*n);
202  p->d = p->w+n;
203  p->J = p->d+n, p->D = p->J+n, p->D2 = p->D+n;
204  p->u0=p->D2+n, p->v0=p->u0+n;
205  p->u1=p->v0+n, p->v1=p->u1+n;
206  p->u2=p->v1+n, p->v2=p->u2+n;
207  p->J_z0=p->v2+n, p->D_z0=p->J_z0+n, p->D2_z0=p->D_z0+n;
208  p->J_zn=p->D2_z0+n, p->D_zn=p->J_zn+n, p->D2_zn=p->D_zn+n;
209  for(i=0; i<n; ++i) {
210  real ww = 1, zi = z[i];
211  for(j=0; j<i; ++j) ww *= zi-z[j];
212  for(++j; j<n; ++j) ww *= zi-z[j];
213  p->w[i] = 1/ww;
214  }
215  p->u0[0] = p->v0[n-1] = 1;
216  p->u1[0] = p->v1[n-1] = 0;
217  p->u2[0] = p->v2[n-1] = 0;
218  ref_lagrange_2(p,z[0 ]); memcpy(p->J_z0,p->J,3*n*sizeof(real));
219  ref_lagrange_2(p,z[n-1]); memcpy(p->J_zn,p->J,3*n*sizeof(real));
220 }
221 
223 {
224  free(p->w);
225 }
226 
227 /* TEST CODE (compare new against reference) =================================*/
228 
229 int main()
230 {
231  uint i,n;
232 
233 #if USE_HW_COUNTER
234  {
235  int d;
236  unsigned long long tic, toc;
237  unsigned r;
238  #define TIME(t, repeat, what) do { \
239  for(r=repeat;r;--r) { what; } \
240  tic = getticks(); \
241  for(r=repeat;r;--r) { what; } \
242  toc = getticks(); \
243  t = toc-tic; \
244  } while(0)
245 
246  for(d=0;d<3;++d) for(n=1;n<N;++n) {
247  unsigned rep = REPEAT/(n+1)/(d+1);
248  unsigned long long tr=0,tt=0;
249 
250  double *p, x = rand()/(double)RAND_MAX;
251  double z[N], zr[N];
252  lagrange_fun *lag;
253  ref_lagrange_data rld;
254  gauss_nodes(z,n);
255  p = tmalloc(double, 3*n+lagrange_size(n));
256  lag = lagrange_setup(p+3*n,z,n);
257 
258  ref_gauss_nodes(zr,n);
259  ref_lagrange_setup(&rld,zr,n);
260 
261  TIME(tt,rep,lag(p,p+3*n,n,d,x));
262  switch(d) {
263  case 0: TIME(tr,rep,ref_lagrange_0(&rld,x)); break;
264  case 1: TIME(tr,rep,ref_lagrange_1(&rld,x)); break;
265  case 2: TIME(tr,rep,(ref_lagrange_1(&rld,x),ref_lagrange_2u(&rld)));
266  }
267  printf("lagrange_%d_%02d cycles per call: "
268  "ref=%g\timp=%g\tspup=%g\n", d, (int)n,
269  tr/(double)rep,tt/(double)rep,tr/(double)tt);
270 
271  ref_lagrange_free(&rld);
272  free(p);
273  }
274 
275  for(d=0;d<3;++d) for(n=2;n<N;++n) {
276  unsigned rep = REPEAT/(n+1)/(d+1);
277  unsigned long long tr=0,tt=0;
278 
279  double *p, x = rand()/(double)RAND_MAX;
280  double zr[N];
281  lagrange_fun *lag;
282  ref_lagrange_data rld;
283  p = tmalloc(double, 3*n+gll_lag_size(n));
284  lag = gll_lag_setup(p+3*n,n);
285 
286  ref_lobatto_nodes(zr,n);
287  ref_lagrange_setup(&rld,zr,n);
288 
289  TIME(tt,rep,lag(p,p+3*n,n,d,x));
290  switch(d) {
291  case 0: TIME(tr,rep,ref_lagrange_0(&rld,x)); break;
292  case 1: TIME(tr,rep,ref_lagrange_1(&rld,x)); break;
293  case 2: TIME(tr,rep,(ref_lagrange_1(&rld,x),ref_lagrange_2u(&rld)));
294  }
295  printf("gll_lag_%d_%02d cycles per call: "
296  "ref=%g\timp=%g\tspup=%g\n", d, (int)n,
297  tr/(double)rep,tt/(double)rep,tr/(double)tt);
298 
299  ref_lagrange_free(&rld);
300  free(p);
301  }
302  }
303 #endif
304 
305  for(n=1;n<N;++n) {
306  double z[N], w[N], zr[N], wr[N];
307  gauss_quad(z,w,n);
308  ref_gauss_nodes(zr,n);
309  ref_gauss_weights(zr,wr,n);
310  for(i=0;i<n;++i) if(not_same(z[i],zr[i]) || not_same(w[i],wr[i])) break;
311  if(i!=n) break;
312  }
313  printf("Gauss quadrature tests: %s\n", n==N?"passed":"failed");
314 
315  for(n=2;n<N;++n) {
316  double z[N], w[N], zr[N], wr[N];
317  lobatto_quad(z,w,n);
318  ref_lobatto_nodes(zr,n);
319  ref_lobatto_weights(zr,wr,n);
320  for(i=0;i<n;++i) if(not_same(z[i],zr[i]) || not_same(w[i],wr[i])) break;
321  if(i!=n) break;
322  }
323  printf("Gauss-Lobatto quadrature tests: %s\n", n==N?"passed":"failed");
324 
325  for(n=1;n<N;++n) {
326  double *p, x = rand()/(double)RAND_MAX;
327  double z[N], zr[N];
328  lagrange_fun *lag;
329  ref_lagrange_data rld;
330  gauss_nodes(z,n);
331  p = tmalloc(double, 3*n+lagrange_size(n));
332  lag = lagrange_setup(p+3*n,z,n);
333  lag(p,p+3*n,n,2,x);
334 
335  ref_gauss_nodes(zr,n);
336  ref_lagrange_setup(&rld,zr,n);
337  ref_lagrange_1(&rld,x);
338  ref_lagrange_2u(&rld);
339 
340  for(i=0;i<n;++i) if(not_same(p[ i],rld.J [i])) break; if(i!=n) break;
341  for(i=0;i<n;++i) if(not_same(p[ n+i],rld.D [i])) break; if(i!=n) break;
342  for(i=0;i<n;++i) if(not_same(p[2*n+i],rld.D2[i])) break; if(i!=n) break;
343 
344  ref_lagrange_free(&rld);
345  free(p);
346  }
347  printf("Gauss nodal Lagrange basis tests: %s\n", n==N?"passed":"failed");
348 
349  for(n=2;n<N;++n) {
350  double *p, x = rand()/(double)RAND_MAX;
351  double zr[N];
352  lagrange_fun *lag;
353  ref_lagrange_data rld;
354  p = tmalloc(double, 3*n+gll_lag_size(n));
355  lag = gll_lag_setup(p+3*n,n);
356  lag(p,p+3*n,n,2,x);
357 
358  ref_lobatto_nodes(zr,n);
359  ref_lagrange_setup(&rld,zr,n);
360  ref_lagrange_1(&rld,x);
361  ref_lagrange_2u(&rld);
362 
363  for(i=0;i<n;++i) if(not_same(p[ i],rld.J [i])) break; if(i!=n) break;
364  for(i=0;i<n;++i) if(not_same(p[ n+i],rld.D [i])) break; if(i!=n) break;
365  for(i=0;i<n;++i) if(not_same(p[2*n+i],rld.D2[i])) break; if(i!=n) break;
366 
367  ref_lagrange_free(&rld);
368  free(p);
369  }
370  printf("Gauss-Lobatto nodal Lagrange basis tests: %s\n",
371  n==N?"passed":"failed");
372 
373  return 0;
374 }
#define uint
Definition: types.h:70
#define tmalloc(type, count)
Definition: mem.h:91
void lagrange_fun(double *restrict p, double *restrict data, unsigned n, int d, double x)
Definition: poly.c:20
#define lobatto_quad
Definition: poly.c:16
n
Definition: xxt_test.m:73
static double zr[NR]
static void ref_gauss_nodes(real *z, int n)
Definition: poly_test2.c:77
#define x
static void ref_lobatto_nodes(real *z, int n)
Definition: poly_test2.c:109
static real legendre(int n, real x)
Definition: poly_test2.c:38
double real
Definition: poly_test2.c:32
#define gauss_quad
Definition: poly.c:14
static void ref_lagrange_2u(ref_lagrange_data *p)
Definition: poly_test2.c:186
static void ref_lagrange_1(ref_lagrange_data *p, real x)
Definition: poly_test2.c:153
#define lagrange_setup
Definition: poly.c:12
#define N
Definition: poly_test2.c:14
#define cosr
Definition: poly_test2.c:33
#define gll_lag_setup
Definition: poly.c:18
p
Definition: xxt_test2.m:1
#define gll_lag_size
Definition: poly.c:17
static void ref_gauss_weights(const real *z, real *w, int n)
Definition: poly_test2.c:115
#define DEFINE_HW_COUNTER()
Definition: rdtsc.h:4
#define lagrange_size
Definition: poly.c:11
static void lobatto_nodes_aux(real *z, int n)
Definition: poly_test2.c:93
void ref_lagrange_free(ref_lagrange_data *p)
Definition: poly_test2.c:222
#define D
Definition: findpts.c:78
static int not_same(double a, double b)
Definition: poly_test2.c:26
const real * z
Definition: poly_test2.c:137
#define PI
Definition: poly_test2.c:35
for i
Definition: xxt_test.m:74
#define TIME(t, repeat, what)
static real legendre_d2(int n, real x)
Definition: poly_test2.c:64
#define fabsr
Definition: poly_test2.c:34
static void ref_lagrange_2(ref_lagrange_data *p, real x)
Definition: poly_test2.c:168
static void ref_lagrange_setup(ref_lagrange_data *p, const real *z, unsigned n)
Definition: poly_test2.c:197
static void ref_lobatto_weights(const real *z, real *w, int n)
Definition: poly_test2.c:125
#define gauss_nodes
Definition: poly.c:13
static real legendre_d1(int n, real x)
Definition: poly_test2.c:51
#define REPEAT
Definition: poly_test2.c:15
establishes some macros to establish naming conventions
static uint np
Definition: findpts_test.c:63
static void ref_lagrange_0(ref_lagrange_data *p, real x)
Definition: poly_test2.c:144
#define EPS
Definition: poly_test2.c:24
int main()
Definition: poly_test2.c:229
static double z[NR *NS *NT *N]
Definition: obbox_test.c:31