Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
sparse_cholesky.c
Go to the documentation of this file.
1 #include <stddef.h>
2 #include <stdlib.h>
3 #include <math.h>
4 #include <string.h>
5 #include "c99.h"
6 #include "name.h"
7 #include "fail.h"
8 #include "types.h"
9 #include "mem.h"
10 #include "sort.h"
11 
12 #define sparse_cholesky_factor PREFIXED_NAME(sparse_cholesky_factor)
13 #define sparse_cholesky_solve PREFIXED_NAME(sparse_cholesky_solve )
14 #define sparse_cholesky_free PREFIXED_NAME(sparse_cholesky_free )
15 
16 /* factors: L is in CSR format
17  D is a diagonal matrix stored as a vector
18  actual factorization is:
19 
20  -1 T
21  A = (I-L) D (I-L)
22 
23  -1 -T -1
24  A = (I-L) D (I-L)
25 
26  (triangular factor is unit diagonal; the diagonal is not stored)
27 */
29  uint n, *Lrp, *Lj;
30  double *L, *D;
31 };
32 
33 /*
34  symbolic factorization: finds the sparsity structure of L
35 
36  uses the concept of elimination tree:
37  the parent of node j is node i when L(i,j) is the first
38  non-zero in column j below the diagonal (i>j)
39  L's structure is discovered row-by-row; the first time
40  an entry in column j is set, it must be the parent
41 
42  the nonzeros in L are the nonzeros in A + paths up the elimination tree
43 
44  linear in the number of nonzeros of L
45 */
46 static void factor_symbolic(uint n, const uint *Arp, const uint *Aj,
47  struct sparse_cholesky *out, buffer *buf)
48 {
49  uint *visit = tmalloc(uint,2*n), *parent = visit+n;
50  uint *Lrp, *Lj;
51  uint i,nz=0;
52 
53  out->n=n;
54 
55  for(i=0;i<n;++i) {
56  uint p=Arp[i], pe=Arp[i+1];
57  visit[i]=i, parent[i]=n;
58  for(;p!=pe;++p) {
59  uint j=Aj[p]; if(j>=i) break;
60  for(;visit[j]!=i;j=parent[j]) {
61  ++nz, visit[j]=i;
62  if(parent[j]==n) { parent[j]=i; break; }
63  }
64  }
65  }
66 
67  Lrp=out->Lrp=tmalloc(uint,n+1+nz);
68  Lj =out->Lj =Lrp+n+1;
69 
70  Lrp[0]=0;
71  for(i=0;i<n;++i) {
72  uint p=Arp[i], pe=Arp[i+1], count=0, *Ljr=&Lj[Lrp[i]];
73  visit[i]=i;
74  for(;p!=pe;++p) {
75  uint j=Aj[p]; if(j>=i) break;
76  for(;visit[j]!=i;j=parent[j]) Ljr[count++]=j, visit[j]=i;
77  }
78  sortv(Ljr, Ljr,count,sizeof(uint), buf);
79  Lrp[i+1]=Lrp[i]+count;
80  }
81  free(visit);
82 }
83 
84 /*
85  numeric factorization:
86 
87  L is built row-by-row, using: ( ' indicates transpose )
88 
89 
90  [ A r ] = [ (I-L) ] [ D^(-1) ] [ (I-L)' -s ]
91  [ r' a ] [ -s' 1 ] [ 1/d ] [ 1 ]
92 
93  = [ A (I-L) D^(-1) (-s) ]
94  [ r' s' D^(-1) s + 1/d ]
95 
96  so, if r' is the next row of A, up to but excluding the diagonal,
97  then the next row of L, s', obeys
98 
99  r = - (I-L) D^(-1) s
100 
101  let y = (I-L)^(-1) (-r)
102  then s = D y, and d = 1/(s' y)
103 
104 */
105 static void factor_numeric(uint n, const uint *Arp, const uint *Aj,
106  const double *A,
107  struct sparse_cholesky *out,
108  uint *visit, double *y)
109 {
110  const uint *Lrp=out->Lrp, *Lj=out->Lj;
111  double *D, *L;
112  uint i;
113 
114  D=out->D=tmalloc(double,n+Lrp[n]);
115  L=out->L=D+n;
116 
117  for(i=0;i<n;++i) {
118  uint p,pe; double a=0;
119  visit[i]=n;
120  for(p=Lrp[i],pe=Lrp[i+1];p!=pe;++p) {
121  uint j=Lj[p]; y[j]=0, visit[j]=i;
122  }
123  for(p=Arp[i],pe=Arp[i+1];p!=pe;++p) {
124  uint j=Aj[p];
125  if(j>=i) { if(j==i) a=A[p]; break; }
126  y[j]=-A[p];
127  }
128  for(p=Lrp[i],pe=Lrp[i+1];p!=pe;++p) {
129  uint q,qe,j=Lj[p]; double lij,yj=y[j];
130  for(q=Lrp[j],qe=Lrp[j+1];q!=qe;++q) {
131  uint k=Lj[q]; if(visit[k]==i) yj+=L[q]*y[k];
132  }
133  y[j]=yj;
134  L[p]=lij=D[j]*yj;
135  a-=yj*lij;
136  }
137  D[i]=1/a;
138  }
139 }
140 
141 /* x = A^(-1) b; works when x and b alias */
143  double *x, const struct sparse_cholesky *fac, double *b)
144 {
145  const uint n=fac->n, *Lrp=fac->Lrp, *Lj=fac->Lj;
146  const double *L=fac->L, *D=fac->D;
147  uint i, p,pe;
148  for(i=0;i<n;++i) {
149  double xi = b[i];
150  for(p=Lrp[i],pe=Lrp[i+1];p!=pe;++p) xi+=L[p]*x[Lj[p]];
151  x[i]=xi;
152  }
153  for(i=0;i<n;++i) x[i]*=D[i];
154  for(i=n;i;) {
155  double xi = x[--i];
156  for(p=Lrp[i],pe=Lrp[i+1];p!=pe;++p) x[Lj[p]]+=L[p]*xi;
157  }
158 }
159 
160 void sparse_cholesky_factor(uint n, const uint *Arp, const uint *Aj,
161  const double *A,
162  struct sparse_cholesky *out, buffer *buf)
163 {
164  const uint n_uints_as_dbls = (n*sizeof(uint)+sizeof(double)-1)/sizeof(double);
165  buffer_reserve(buf,(n_uints_as_dbls+n)*sizeof(double));
166  factor_symbolic(n,Arp,Aj,out,buf);
167  factor_numeric(n,Arp,Aj,A,out,buf->ptr,n_uints_as_dbls+(double*)buf->ptr);
168 }
169 
171 {
172  free(fac->Lrp); fac->Lj=fac->Lrp=0;
173  free(fac->D); fac->L =fac->D =0;
174 }
175 
#define uint
Definition: types.h:70
void sortv(T *out, const T *A, uint n, unsigned stride, buffer *restrict buf)
Definition: sort_imp.h:446
#define tmalloc(type, count)
Definition: mem.h:91
static void factor_numeric(uint n, const uint *Arp, const uint *Aj, const double *A, struct sparse_cholesky *out, uint *visit, double *y)
n
Definition: xxt_test.m:73
#define x
ulong A[NUM][SI]
Definition: sort_test.c:17
#define buffer_reserve(b, max)
Definition: mem.h:157
p
Definition: xxt_test2.m:1
Definition: mem.h:111
#define sparse_cholesky_solve
#define D
Definition: findpts.c:78
static void factor_symbolic(uint n, const uint *Arp, const uint *Aj, struct sparse_cholesky *out, buffer *buf)
for i
Definition: xxt_test.m:74
void * ptr
Definition: mem.h:111
const uint Aj[3][32]
Definition: xxt_test.c:85
#define sparse_cholesky_free
ulong out[N]
Definition: sort_test2.c:20
#define sparse_cholesky_factor
establishes some macros to establish naming conventions
static double y[NR *NS *NT *N]
Definition: obbox_test.c:31
const uint nz[3]
Definition: xxt_test.c:78