Nek5000
SEM for Incompressible NS
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Modules Pages
comm.c
Go to the documentation of this file.
1 #include <stddef.h> /* for size_t */
2 #include <stdlib.h> /* for exit */
3 #include <string.h> /* memcpy */
4 #include <limits.h> /* for gs identities */
5 #include <float.h> /* for gs identities */
6 #include "name.h"
7 #include "fail.h"
8 #include "types.h"
9 #include "tensor.h"
10 #include "gs_defs.h"
11 #include "gs_local.h"
12 #include "comm.h"
13 
15 
18 
19 static void scan_imp(void *scan, const struct comm *com, gs_dom dom, gs_op op,
20  const void *v, uint vn, void *buffer)
21 {
22  comm_req req[2];
23  size_t vsize = vn*gs_dom_size[dom];
24  const uint id=com->id, np=com->np;
25  uint n = np, c=1, odd=0, base=0;
26  void *buf[2];
27  void *red = (char*)scan+vsize;
28  buf[0]=buffer,buf[1]=(char*)buffer+vsize;
29  while(n>1) {
30  odd=(odd<<1)|(n&1);
31  c<<=1, n>>=1;
32  if(id>=base+n) c|=1, base+=n, n+=(odd&1);
33  }
34  gs_init_array(scan,vn,dom,op);
35  memcpy(red,v,vsize);
36  while(n<np) {
37  if(c&1) n-=(odd&1), base-=n;
38  c>>=1, n<<=1, n+=(odd&1);
39  odd>>=1;
40  if(base==id) {
41  comm_irecv(&req[0],com, buf[0],vsize, id+n/2,id+n/2);
42  comm_isend(&req[1],com, red ,vsize, id+n/2,id);
43  comm_wait(req,2);
44  gs_gather_array(red,buf[0],vn,dom,op);
45  } else {
46  comm_irecv(&req[0],com, scan,vsize, base,base);
47  comm_isend(&req[1],com, red ,vsize, base,id);
48  comm_wait(req,2);
49  break;
50  }
51  }
52  while(n>1) {
53  if(base==id) {
54  comm_send(com, scan ,2*vsize, id+n/2,id);
55  } else {
56  comm_recv(com, buffer,2*vsize, base,base);
57  gs_gather_array(scan,buf[0],vn,dom,op);
58  memcpy(red,buf[1],vsize);
59  }
60  odd=(odd<<1)|(n&1);
61  c<<=1, n>>=1;
62  if(id>=base+n) c|=1, base+=n, n+=(odd&1);
63  }
64 }
65 
66 
67 static void allreduce_imp(const struct comm *com, gs_dom dom, gs_op op,
68  void *v, uint vn, void *buf)
69 {
70  size_t total_size = vn*gs_dom_size[dom];
71  const uint id=com->id, np=com->np;
72  uint n = np, c=1, odd=0, base=0;
73  while(n>1) {
74  odd=(odd<<1)|(n&1);
75  c<<=1, n>>=1;
76  if(id>=base+n) c|=1, base+=n, n+=(odd&1);
77  }
78  while(n<np) {
79  if(c&1) n-=(odd&1), base-=n;
80  c>>=1, n<<=1, n+=(odd&1);
81  odd>>=1;
82  if(base==id) {
83  comm_recv(com, buf,total_size, id+n/2,id+n/2);
84  gs_gather_array(v,buf,vn, dom,op);
85  } else {
86  comm_send(com, v,total_size, base,id);
87  break;
88  }
89  }
90  while(n>1) {
91  if(base==id)
92  comm_send(com, v,total_size, id+n/2,id);
93  else
94  comm_recv(com, v,total_size, base,base);
95  odd=(odd<<1)|(n&1);
96  c<<=1, n>>=1;
97  if(id>=base+n) c|=1, base+=n, n+=(odd&1);
98  }
99 }
100 
101 void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op,
102  const void *v, uint vn, void *buffer)
103 {
104  scan_imp(scan, com,dom,op, v,vn, buffer);
105 }
106 
107 void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op,
108  void *v, uint vn, void *buf)
109 {
110  if(vn==0) return;
111 #ifdef MPI
112  {
113  MPI_Datatype mpitype;
114  MPI_Op mpiop;
115  #define DOMAIN_SWITCH() do { \
116  switch(dom) { case gs_double: mpitype=MPI_DOUBLE; break; \
117  case gs_float: mpitype=MPI_FLOAT; break; \
118  case gs_int: mpitype=MPI_INT; break; \
119  case gs_long: mpitype=MPI_LONG; break; \
120  WHEN_LONG_LONG(case gs_long_long: mpitype=MPI_LONG_LONG; break;) \
121  default: goto comm_allreduce_byhand; \
122  } \
123  } while(0)
124  DOMAIN_SWITCH();
125  #undef DOMAIN_SWITCH
126  switch(op) { case gs_add: mpiop=MPI_SUM; break;
127  case gs_mul: mpiop=MPI_PROD; break;
128  case gs_min: mpiop=MPI_MIN; break;
129  case gs_max: mpiop=MPI_MAX; break;
130  default: goto comm_allreduce_byhand;
131  }
132  MPI_Allreduce(v,buf,vn,mpitype,mpiop,com->c);
133  memcpy(v,buf,vn*gs_dom_size[dom]);
134  return;
135  }
136 #endif
137 #ifdef MPI
138 comm_allreduce_byhand:
139  allreduce_imp(com,dom,op, v,vn, buf);
140 #endif
141 }
142 
143 double comm_dot(const struct comm *comm, double *v, double *w, uint n)
144 {
145  double s=tensor_dot(v,w,n),b;
146  comm_allreduce(comm,gs_double,gs_add, &s,1, &b);
147  return s;
148 }
149 
150 /* T comm_reduce__T(const struct comm *comm, gs_op op, const T *in, uint n) */
151 
152 #define SWITCH_OP_CASE(T,OP) case gs_##OP: WITH_OP(T,OP); break;
153 #define SWITCH_OP(T,op) do switch(op) { \
154  GS_FOR_EACH_OP(T,SWITCH_OP_CASE) case gs_op_n: break; } while(0)
155 
156 #define WITH_OP(T,OP) \
157  do { T v = *in++; GS_DO_##OP(accum,v); } while(--n)
158 
159 #define DEFINE_REDUCE(T) \
160 T PREFIXED_NAME(comm_reduce__##T)( \
161  const struct comm *comm, gs_op op, const T *in, uint n) \
162 { \
163  T accum = gs_identity_##T[op], buf; \
164  if(n!=0) SWITCH_OP(T,op); \
165  comm_allreduce(comm,gs_##T,op, &accum,1, &buf); \
166  return accum; \
167 }
168 
170 
171 #undef DEFINE_REDUCE
172 #undef WITH_OP
173 #undef SWITCH_OP
174 #undef SWITCH_OP_CASE
175 
#define uint
Definition: types.h:70
comm_ext c
Definition: comm.h:87
static void allreduce_imp(const struct comm *com, gs_dom dom, gs_op op, void *v, uint vn, void *buf)
Definition: comm.c:67
static void comm_recv(const struct comm *c, void *p, size_t n, uint src, int tag)
Definition: comm.h:199
n
Definition: xxt_test.m:73
static void comm_wait(comm_req *req, int n)
Definition: comm.h:236
#define gs_gather_array
Definition: gs_local.c:8
void comm_allreduce(const struct comm *com, gs_dom dom, gs_op op, void *v, uint vn, void *buf)
Definition: comm.c:107
static void comm_send(const struct comm *c, void *p, size_t n, uint dst, int tag)
Definition: comm.h:212
Definition: comm.h:85
#define DEFINE_REDUCE(T)
Definition: comm.c:159
void comm_scan(void *scan, const struct comm *com, gs_dom dom, gs_op op, const void *v, uint vn, void *buffer)
Definition: comm.c:101
uint comm_gbl_np
Definition: comm.c:14
gs_op
Definition: gs_defs.h:77
int comm_req
Definition: comm.h:70
double comm_dot(const struct comm *comm, double *v, double *w, uint n)
Definition: comm.c:143
gs_dom
Definition: gs_defs.h:61
#define tensor_dot
Definition: tensor.c:7
#define GS_DEFINE_IDENTITIES()
Definition: gs_defs.h:47
const gs_dom dom
Definition: gs_test.c:15
uint np
Definition: comm.h:86
Definition: mem.h:111
static void scan_imp(void *scan, const struct comm *com, gs_dom dom, gs_op op, const void *v, uint vn, void *buffer)
Definition: comm.c:19
struct array buffer
Definition: mem.h:153
static void comm_irecv(comm_req *req, const struct comm *c, void *p, size_t n, uint src, int tag)
Definition: comm.h:220
uint comm_gbl_id
Definition: comm.c:14
uint id
Definition: comm.h:86
#define GS_FOR_EACH_DOMAIN(macro)
Definition: gs_defs.h:18
#define GS_DEFINE_DOM_SIZES()
Definition: gs_defs.h:70
establishes some macros to establish naming conventions
static uint np
Definition: findpts_test.c:63
static void comm_isend(comm_req *req, const struct comm *c, void *p, size_t n, uint dst, int tag)
Definition: comm.h:228
#define gs_init_array
Definition: gs_local.c:9