21 #define crs_setup PREFIXED_NAME(crs_setup)
22 #define crs_solve PREFIXED_NAME(crs_solve)
23 #define crs_stats PREFIXED_NAME(crs_stats)
24 #define crs_free PREFIXED_NAME(crs_free )
40 #define UINT_BITS (sizeof(uint)*CHAR_BIT)
41 #define BITS(i) ((UINT_BITS+(1<<(i))-1)>>(i))
42 #define MASK(i) ((((uint)1<<BITS(i)) - 1) << BITS(i))
43 #define CHECK(i) if((BITS(i)!=1) && (v&MASK(i))) v>>=BITS(i), r+=BITS(i)
157 if(
id>=base+n) c|=1, base+=
n, n+=(odd&1);
160 data->
nsep = level+1;
164 for(level=0;level<data->
plevels;++level) {
166 uint targ =
id - (n-(odd&1));
172 c>>=1, n=(n<<1)+(odd&1), odd>>=1;
191 unsigned i,lvl;
uint j;
195 v=buf->
ptr, recv=v+
ns;
197 for(i=0;i<
ns;++
i) v[i]=0;
198 for(j=0;j<
n;++j) v[dof[j].
level]+=1/(
float)dof[j].
count;
201 for(lvl=0;lvl<nl;++lvl) {
203 unsigned s = ns-(lvl+1);
208 for(i=lvl+1;i<
ns;++
i) v[i]+=recv[i];
214 unsigned s = ns-(lvl+1);
229 data->
sn=data->
cn-data->
ln;
238 if(Aend==A)
return A;
240 ulong *end = Aend-1, last=*end, *
p=
A,*q=
A,v=0;
248 if(last!=v) *p++=last;
256 const unsigned ns = data->
nsep;
258 ulong *
p=sep_id, *q=other;
262 memcpy(work ,p,size*
sizeof(
ulong));
263 memcpy(work+size,q,size*
sizeof(
ulong));
266 memcpy(p,work,(end-work)*
sizeof(
ulong));
273 const unsigned ns=data->
nsep;
280 for(i=data->
ln;i<n;++i) {
281 unsigned si = dof[
i].
level;
283 memset(xid,0,size*
sizeof(
ulong));
285 if(++s != ns) size=data->
sep_size[s];
287 *xid++ = dof[
i].
id, --size;
290 memset(xid,0,size*
sizeof(
ulong));
292 if(++s != ns) size=data->
sep_size[s];
299 const struct dof *
dof = dofc->
ptr, *dof_end = dof+cn;
302 while(dof!=dof_end) {
304 while(*xid!=v) ++xid, *perm++ = -1;
305 *perm++ = i++, ++dof, ++xid;
307 while(xid!=xid_end) ++xid, *perm++ = -1;
320 size=0;
for(lvl=1;lvl<ns;++lvl) if(sep_size[lvl]>size) size=sep_size[lvl];
321 xid=
tmalloc(
ulong,2*xn+2*size), recv=xid+xn, work=recv+xn;
328 for(lvl=0;lvl<nl;++lvl) {
337 if(ss>=size || lvl==nl-1)
break;
349 p-=ss, size+=ss, --lvl;
362 const unsigned nl=data->
plevels;
363 double *
p=v, *recv=data->
combuf;
364 unsigned lvl, nsend=0;
367 if(n==0 || nl==0)
return;
371 for(lvl=0;lvl<nl;++lvl) {
378 for(i=0;i<size;++
i) p[i]+=recv[i];
381 if(ss>=size || lvl==nl-1)
break;
391 p,size*
sizeof(
double),other ,tag);
395 p-=ss, size+=ss, --lvl;
402 const unsigned nl=data->
plevels;
404 unsigned lvl,nsend=0;
408 if(n==0 || nl==0)
return v;
410 for(lvl=0;lvl<nl;++lvl) {
419 if(ss>=size || lvl==nl-1)
break;
429 &v,
sizeof(
double),other ,tag);
449 if(v!=last) last=v, ++un;
479 for(i=0;i<n;++i) if(perm[i]>=0) bid[perm[i]]=dof[perm[i]].
id=
id[i];
484 for(i=0;i<cn;++
i) v[i]=pcoord;
488 for(i=0;i<cn;++
i) v[i]=1;
490 for(i=0;i<cn;++
i) dof[i].
count=v[i];
497 p =
sortp(buf,0, &dof[0].
level,cn,
sizeof(
struct dof));
498 pi = p+cn;
for(i=0;i<cn;++
i) pi[p[i]]=i;
499 for(i=0;i<n;++i) if(perm[i]>=0) perm[i]=pi[perm[i]];
508 const double *
A = data->
A_sl.
A;
511 for(p=Arp[i],pe=Arp[i+1];p!=pe;++
p)
512 vl[
Aj[p]]+=A[p]*vs[i];
520 const double *
A = data->
A_sl.
A;
523 for(p=Arp[i],pe=Arp[i+1];p!=pe;++
p)
524 vs[i]-=A[p]*vl[
Aj[p]];
533 *Asl_j = data->
A_sl.
Aj, *Ass_j = A_ss->
Aj;
534 const double *Asl = data->
A_sl.
A, *
Ass = A_ss->
A;
536 for(i=0;i<ei;++
i) vs[i]=0;
537 for(p=Ass_rp[ei],pe=Ass_rp[ei+1];p!=pe;++
p) {
542 for(i=0;i<ln;++
i) vl[i]=0;
543 for(p=Asl_rp[ei],pe=Asl_rp[ei+1];p!=pe;++
p) vl[Asl_j[p]]=-Asl[p];
549 struct csr_mat *A_ss,
const double *vs,
double *vl)
552 const uint *Ass_rp = A_ss->
Arp,
554 const double *
Ass = A_ss->
A;
558 for(p=Ass_rp[i],pe=Ass_rp[i+1];p!=pe;++
p) {
565 for(i=0;i<ln;++
i) vl[i]=0;
575 const double *
X = data->
X;
const uint *Xp = data->
Xp;
581 const double *vx,
uint nx)
583 const double *
X = data->
X;
const uint *Xp = data->
Xp;
585 for(i=0;i<
ns;++
i) vs[i]=0;
587 const double v = vx[
i];
588 const double *
x = X+Xp[
i];
uint n=Xp[i+1]-Xp[
i];
589 for(j=0;j<
n;++j) vs[j]+=x[j]*v;
601 if(perm_x2c[i]!=-1) ++h;
602 data->
Xp[i+1]=data->
Xp[
i]+h;
611 double *vl, *vs, *vx, *Svs;
617 vl=buf->
ptr, vs=vl+ln, Svs=vs+sn, vx=Svs+sn;
622 sint ui = perm_x2c[
i];
626 for(j=0;j<
i;++j) vx[j]=0;
635 apply_S(Svs,ns, data,A_ss, vs, vl);
637 ytsy =
sum(data,ytsy,i+1,xn-(i+1));
638 if(ytsy<DBL_EPSILON/128) ytsy=0;
else ytsy = 1/sqrt(ytsy);
639 x=&data->
X[data->
Xp[
i]];
640 for(j=0;j<
ns;++j) x[j]=ytsy*vs[j];
655 for(k=0;k+1<
nz;++k,++
p)
if(p[0].
i==p[1].
i && p[0].
j==p[1].
j)
break;
660 if(i==q->
i&&j==q->
j) p->
v += q->
v, --mat->
n;
661 else ++
p, p->
i=i=q->
i,p->
j=j=q->
j, p->
v=q->
v;
668 out->
Aj = out->
Arp+nr+1;
670 for(k=0;k<
nr;++k) out->
Arp[k]=0;
671 for(p=mat->
ptr,k=0;k<nz;++k,++p)
672 out->
Arp[p->
i]++, out->
Aj[k]=p->
j, out->
A[k]=p->
v;
673 nz=0;
for(k=0;k<=
nr;++k) {
uint t=out->
Arp[k]; out->
Arp[k]=
nz, nz+=t; }
684 struct array mat_ll, mat_sl, mat_ss;
690 sint i=perm[Ai[k]],
j=perm[Aj[k]];
691 if(i<0 || j<0 || A[k]==0)
continue;
694 n=mat_ll.
n++,mll[
n].
i=
i,mll[
n].
j=
j,mll[
n].
v=A[k];
697 n=mat_sl.
n++,msl[
n].
i=i-ln,msl[
n].
j=
j,msl[
n].
v=A[k];
699 n=mat_ss.
n++,mss[
n].
i=i-ln,mss[
n].
j=j-ln,mss[
n].
v=A[k];
735 for(i=0;i<data->
cn;++
i) count+=1/(
double)dof[
i].
count;
736 count=1/
sum(data,count,data->
xn,0);
738 for(i=0;i<data->
cn;++
i)
746 &A_ll,&data->
A_sl,&A_ss,
751 &A_ll,&data->
A_sl,&A_ss,
757 free(A_ll.
Arp); free(A_ll.
A);
760 data->
vc = data->
vl+data->
ln;
761 data->
vx = data->
vc+data->
cn;
765 free(A_ss.
Arp); free(A_ss.
A);
774 uint cn=data->
cn, un=data->
un, ln=data->
ln, sn=data->
sn, xn=data->
xn;
775 double *vl=data->
vl, *vc=data->
vc, *vx=data->
vx;
777 for(i=0;i<cn;++
i) vc[i]=0;
780 if(p>=0) vc[
p]+=b[
i];
788 apply_X(vc+ln,sn, data, vx,xn);
789 for(i=0;i<ln;++
i) vl[i]=0;
792 for(i=0;i<ln;++
i) vc[i]-=vl[i];
796 if(xn==0) vc[ln-1]=0;
797 else if(sn==1) vc[ln]=0;
803 s =
sum(data,s,data->
xn,0);
804 for(i=0;i<cn;++
i) vc[i]-=s;
808 x[
i] = p>=0 ? vc[
p] : 0;
817 printf(
"xxt: separator sizes on %d =",(
int)data->
comm.
id);
818 for(s=0;s<data->
nsep;++s) printf(
" %d",(
int)data->
sep_size[s]);
820 printf(
"xxt: shared dofs on %d = %d\n",(
int)data->
comm.
id,(
int)data->
sn);
824 if(data->
comm.
id==0) printf(
"xxt: max non-shared dofs = %d\n",a);
827 if(data->
comm.
id==0) printf(
"xxt: max shared dofs = %d\n",a);
831 if(data->
comm.
id==0) printf(
"xxt: max X cols = %d\n",a);
832 a=data->
Xp[xcol]*
sizeof(double);
834 if(data->
comm.
id==0) printf(
"xxt: max X size = %d bytes\n",a);
847 free(data->
Xp); free(data->
X);
struct sparse_cholesky fac_A_ll
static double sum(struct xxt *data, double v, uint n, uint tag)
#define tmalloc(type, count)
static void comm_recv(const struct comm *c, void *p, size_t n, uint src, int tag)
static void locate_proc(struct xxt *data)
#define sarray_sort_2(T, A, n, field1, is_long1, field2, is_long2, buf)
static unsigned lg(uint v)
static void comm_wait(comm_req *req, int n)
static void init_sep_ids(struct xxt *data, struct array *dofa, ulong *xid)
static void discover_dofs(struct xxt *data, uint n, const ulong *id, struct array *dofa, buffer *buf, const struct comm *comm)
static void separate_matrix(uint nz, const uint *Ai, const uint *Aj, const double *A, const sint *perm, uint ln, uint sn, struct csr_mat *out_ll, struct csr_mat *out_sl, struct csr_mat *out_ss, buffer *buf)
void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op, void *v, uint vn, void *buf)
static void apply_S_col(double *vs, struct xxt *data, struct csr_mat *A_ss, uint ei, double *vl)
static void apply_Xt(double *vx, uint nx, const struct xxt *data, const double *vs)
static void comm_send(const struct comm *c, void *p, size_t n, uint dst, int tag)
static void apply_m_Asl(double *vs, uint ns, struct xxt *data, const double *vl)
static void comm_free(struct comm *c)
static void apply_QQt(struct xxt *data, double *v, uint n, uint tag)
static void orthogonalize(struct xxt *data, struct csr_mat *A_ss, sint *perm_x2c, buffer *buf)
#define sarray_permute_buf(T, A, n, buf)
#define buffer_reserve(b, max)
static void apply_S(double *Svs, uint ns, struct xxt *data, struct csr_mat *A_ss, const double *vs, double *vl)
static void apply_p_Als(double *vl, struct xxt *data, const double *vs, uint ns)
#define sparse_cholesky_solve
static void allocate_X(struct xxt *data, sint *perm_x2c)
#define array_init(T, a, max)
static void apply_X(double *vs, uint ns, const struct xxt *data, const double *vx, uint nx)
static void find_perm_x2c(uint ln, uint cn, const struct array *dofc, uint xn, const ulong *xid, sint *perm)
static ulong * unique_nonzero(ulong *A, ulong *Aend)
uint * sortp(buffer *restrict buf, int start_perm, const T *restrict A, uint n, unsigned stride)
static void condense_matrix(struct array *mat, uint nr, struct csr_mat *out, buffer *buf)
#define sparse_cholesky_free
#define buffer_init(b, max)
static const unsigned nr[3]
Gather/Scatter Library interface.
static void merge_sep_ids(struct xxt *data, ulong *sep_id, ulong *other, ulong *work, unsigned s0, buffer *buf)
static double work[TNR *NS]
#define sparse_cholesky_factor
establishes some macros to establish naming conventions
static void discover_sep_sizes(struct xxt *data, struct array *dofa, buffer *buf)
static uint unique_ids(uint n, const ulong *id, sint *perm, buffer *buf)
static void comm_isend(comm_req *req, const struct comm *c, void *p, size_t n, uint dst, int tag)
static sint * discover_sep_ids(struct xxt *data, struct array *dofa, buffer *buf)