1 #if !defined(T) || !defined(SORT_SUFFIX)
2 #error sort_imp.h not meant to be compiled by itself
5 #define sort_data TOKEN_PASTE(sort_data ,SORT_SUFFIX)
6 #define radix_count TOKEN_PASTE(radix_count ,SORT_SUFFIX)
7 #define radix_offsets TOKEN_PASTE(radix_offsets ,SORT_SUFFIX)
8 #define radix_zeros TOKEN_PASTE(radix_zeros ,SORT_SUFFIX)
9 #define radix_passv TOKEN_PASTE(radix_passv ,SORT_SUFFIX)
10 #define radix_sortv TOKEN_PASTE(radix_sortv ,SORT_SUFFIX)
11 #define radix_passp0_b TOKEN_PASTE(radix_passp0_b ,SORT_SUFFIX)
12 #define radix_passp_b TOKEN_PASTE(radix_passp_b ,SORT_SUFFIX)
13 #define radix_passp_m TOKEN_PASTE(radix_passp_m ,SORT_SUFFIX)
14 #define radix_passp_e TOKEN_PASTE(radix_passp_e ,SORT_SUFFIX)
15 #define radix_passp0_be TOKEN_PASTE(radix_passp0_be,SORT_SUFFIX)
16 #define radix_passp_be TOKEN_PASTE(radix_passp_be, SORT_SUFFIX)
17 #define radix_sortp TOKEN_PASTE(radix_sortp ,SORT_SUFFIX)
18 #define merge_sortv TOKEN_PASTE(merge_sortv ,SORT_SUFFIX)
19 #define merge_copy_perm TOKEN_PASTE(merge_copy_perm,SORT_SUFFIX)
20 #define merge_sortp0 TOKEN_PASTE(merge_sortp0 ,SORT_SUFFIX)
21 #define merge_sortp TOKEN_PASTE(merge_sortp ,SORT_SUFFIX)
22 #define heap_sortv TOKEN_PASTE(heap_sortv ,SORT_SUFFIX)
24 #define sortv PREFIXED_NAME(TOKEN_PASTE(sortv,SORT_SUFFIX))
25 #define sortp PREFIXED_NAME(TOKEN_PASTE(sortp,SORT_SUFFIX))
29 #define INC_PTR(A,stride) ((A)=(T*)((char*)(A)+(stride)))
30 #define INDEX_PTR(A,stride,i) (*(T*)((char*)(A)+(i)*(stride)))
53 #define STATIC_DIGIT_BUCKETS 1
56 #define DIGIT_VALUES (1<<DIGIT_BITS)
57 #define DIGIT_MASK ((T)(DIGIT_VALUES-1))
58 #define CEILDIV(a,b) (((a)+(b)-1)/(b))
59 #define DIGITS CEILDIV(CHAR_BIT*sizeof(T),DIGIT_BITS)
60 #define VALUE_BITS (DIGIT_BITS*DIGITS)
61 #define COUNT_SIZE (DIGITS*DIGIT_VALUES)
64 #define COUNT_DIGIT_01(n,i) \
65 if(n>i) count[i][val&DIGIT_MASK]++, val>>=DIGIT_BITS
66 #define COUNT_DIGIT_02(n,i) COUNT_DIGIT_01(n,i); COUNT_DIGIT_01(n,i+ 1)
67 #define COUNT_DIGIT_04(n,i) COUNT_DIGIT_02(n,i); COUNT_DIGIT_02(n,i+ 2)
68 #define COUNT_DIGIT_08(n,i) COUNT_DIGIT_04(n,i); COUNT_DIGIT_04(n,i+ 4)
69 #define COUNT_DIGIT_16(n,i) COUNT_DIGIT_08(n,i); COUNT_DIGIT_08(n,i+ 8)
70 #define COUNT_DIGIT_32(n,i) COUNT_DIGIT_16(n,i); COUNT_DIGIT_16(n,i+16)
71 #define COUNT_DIGIT_64(n,i) COUNT_DIGIT_32(n,i); COUNT_DIGIT_32(n,i+32)
75 const T *
restrict A,
const T *
const end,
const unsigned stride)
89 }
while(
INC_PTR(A,stride),A!=end);
106 const uint c0=c[0], c1=c[1], c2=c[2], c3=c[3];
107 const uint o1=sum+c0, o2=o1+c1, o3=o2+c2;
108 c[0]=
sum, c[1]=o1, c[2]=o2, c[3]=o3;
118 unsigned digits=0, sh=0;
uint *c = &count[0][0];
120 if(bitorkey&
DIGIT_MASK) *shift++ = sh, *offsets++ = c, ++digits,
127 const T *
restrict A,
const T *
const end,
const unsigned stride,
134 T *
out,
const T *
A,
const uint n,
const unsigned stride,
140 const unsigned digits =
radix_zeros(bitorkey,count,shift,offsets);
142 memset(out,0,n*
sizeof(
T));
144 T *src, *dst;
unsigned d;
145 if(out==A || (digits&1)==0) dst=
out,src=
work;
148 for(d=1;d!=digits;++d) {
150 radix_passv(src,src+n,
sizeof(
T),shift[d],offsets[d],dst);
153 if(src!=out) memcpy(out,src,n*
sizeof(
T));
172 const T *
const restrict A,
const uint n,
const unsigned stride,
176 const uint *
const pe = p+
n;
192 d->
v=src->v,d->
i=src->i;
201 do out[off[(src->v>>sh)&
DIGIT_MASK]++]=src->i;
while(++src!=end);
219 uint *q =
p, *
const qe = p+
n;
220 uint *w = &work[0].i;
226 memcpy(p,w,n*
sizeof(
uint));
237 unsigned digits =
radix_zeros(bitorkey,count,shift,offsets);
239 if(!perm_start) {
uint i=0;
do *idx++=i++;
while(i!=n); }
240 }
else if(digits==1) {
241 if(perm_start)
radix_passp_be (idx,A,n,stride,shift[0],offsets[0],work);
245 if((digits&1)==0) dst=
work,src=dst+
n;
246 else src=
work,dst=src+
n;
247 if(perm_start)
radix_passp_b (idx,A,n,stride,shift[0],offsets[0],src);
249 for(d=1;d!=digits-1;++d) {
266 #define MERGE_2(p,v) \
267 if(VAL(v[1])<VAL(v[0])) p[0]=v[1],p[1]=v[0]; \
268 else p[0]=v[0],p[1]=v[1]
269 #define MERGE_3(p,v) do \
270 if(VAL(v[1])<VAL(v[0])) { \
271 if(VAL(v[2])<VAL(v[1])) p[0]=v[2],p[1]=v[1],p[2]=v[0]; \
272 else { if(VAL(v[2])<VAL(v[0])) p[0]=v[1],p[1]=v[2],p[2]=v[0]; \
273 else p[0]=v[1],p[1]=v[0],p[2]=v[2]; } \
275 if(VAL(v[2])<VAL(v[0])) p[0]=v[2],p[1]=v[0],p[2]=v[1]; \
276 else { if(VAL(v[2])<VAL(v[1])) p[0]=v[0],p[1]=v[2],p[2]=v[1]; \
277 else p[0]=v[0],p[1]=v[1],p[2]=v[2]; } \
279 #define MERGE_SORT() \
281 uint i=0, n=An, base=-n, odd=0, c=0, b=1; \
285 base+=n, n+=(odd&1), c|=1, b^=1; \
286 while(n>3) odd<<=1,odd|=(n&1),n>>=1,c<<=1,b^=1; \
288 base-=n-(odd&1),n<<=1,n-=(odd&1),odd>>=1,c>>=1; \
292 DATA v[2]; SETVAL(v[0],i), SETVAL(v[1],i+1); \
296 DATA v[3]; SETVAL(v[0],i), SETVAL(v[1],i+1), SETVAL(v[2],i+2); \
300 const uint na = n>>1, nb = (n+1)>>1; \
301 const DATA *restrict ap = buf[b^1]+base, *const ae = ap+na; \
302 DATA *restrict bp = p+na, *const be = bp+nb; \
304 if(VAL((*bp))<VAL((*ap))) { \
306 if(bp!=be) continue; \
307 do *p++=*ap++; while(ap!=ae); \
326 #define SETVAL(x,ai) x=*A,INC_PTR(A,stride)
338 uint n_by_8 = (n+7)/8;
340 case 0:
do { *idx++ = (p++)->
i;
341 case 7: *idx++ = (p++)->
i;
342 case 6: *idx++ = (p++)->
i;
343 case 5: *idx++ = (p++)->
i;
344 case 4: *idx++ = (p++)->
i;
345 case 3: *idx++ = (p++)->
i;
346 case 2: *idx++ = (p++)->
i;
347 case 1: *idx++ = (p++)->
i;
348 }
while (--n_by_8 > 0);
358 #define DATA sort_data
360 #define SETVAL(x,ai) x.v=*A,INC_PTR(A,stride),x.i=ai
370 const T *
const restrict A,
const uint An,
const unsigned stride,
374 #define DATA sort_data
376 #define SETVAL(x,ai) x.i=idx[ai],x.v=INDEX_PTR(A,stride,x.i)
401 unsigned h=
i,
p = (h-1)>>1;
402 if(A[p] >= item)
continue;
403 do A[h]=A[
p], h=
p, p=(p-1)>>1;
while(h && A[p] < item);
412 unsigned ch = 1+(h<<1), r = ch+1;
413 if(r<i && A[ch] < A[r]) ch=r;
414 if(ch>=i || item >= A[ch])
break;
454 if(stride!=
sizeof(
T))
455 fail(1,__FILE__,__LINE__,
"in-place sort with non-unit stride");
486 work = (
sort_data*)((
char*)buf->ptr+work_off);
490 if(start_perm)
merge_sortp (perm, A,n,stride, work);
497 work = (
sort_data*)((
char*)buf->ptr+work_off);
498 radix_sortp(perm,start_perm, A,n,stride, work,count);
504 work = (
sort_data*)((
char*)buf->ptr+work_off);
506 radix_sortp(perm,start_perm, A,n,stride, work,count);
511 #undef STATIC_DIGIT_BUCKETS
531 #undef radix_passp_be
532 #undef radix_passp0_be
536 #undef radix_passp0_b
static double sum(struct xxt *data, double v, uint n, uint tag)
#define STATIC_DIGIT_BUCKETS
#define buffer_reserve(b, max)
#define INDEX_PTR(A, stride, i)
#define COUNT_DIGIT_64(n, i)
#define INC_PTR(A, stride)
static double work[TNR *NS]
void fail(int status, const char *file, unsigned line, const char *fmt,...)