|
| 1 | +/* Usage: |
| 2 | +
|
| 3 | + #define SORT_NAME mysort |
| 4 | + #define SORT_KEY_T mykey_t |
| 5 | + #include "sort_stable_impl.c" |
| 6 | +
|
| 7 | + This will instantiate the following APIs: |
| 8 | +
|
| 9 | + // Returns non-zero if n is a supported sort size and zero if not. |
| 10 | + // Unsupported values are negative n (only applicable for signed |
| 11 | + // indexing types) or unreasonably large n (such that the scratch |
| 12 | + // space requirement would be bigger than UINT64_MAX). |
| 13 | +
|
| 14 | + static inline int |
| 15 | + mysort_stable_cnt_valid( uint64_t cnt ); |
| 16 | +
|
| 17 | + // Return the footprint required for a scratch space of any |
| 18 | + // alignment sufficient for sorting up to n items inclusive. |
| 19 | + // Returns 0 if cnt is not valid or no scratch is needed. |
| 20 | +
|
| 21 | + static inline uint64_t |
| 22 | + mysort_stable_scratch_footprint( uint64_t cnt ); |
| 23 | +
|
| 24 | + // Sort elements of keys into an ascending order. Algorithm has a |
| 25 | + // best case of ~0.5 cnt lg cnt and an average and worst case of |
| 26 | + // cnt lg cnt such that it is moderately resistant to timing and |
| 27 | + // computational DOS attacks. Further, the sort is stable. The |
| 28 | + // values in scratch are irrelevant on input. Returns where the |
| 29 | + // sorted data ended up (either key or ALIGN_UP(scratch,mykey_t)). |
| 30 | + // That is, if this returns key, the values in key are the stably |
| 31 | + // sorted data and scratch was clobbered. Otherwise, the values |
| 32 | + // at ALIGN_UP(scratch,mykey_t) are the stably sorted data and key |
| 33 | + // was clobbered. Users wanting the data in a particular location |
| 34 | + // can copy as necessary (allowing this flexibility minimizes the |
| 35 | + // amount of copying needed to do the sorting). E.g.: |
| 36 | + // |
| 37 | + // mykey_t * tmp = mysort_stable( key, cnt, scratch ); |
| 38 | + // if( tmp!=key ) memcpy( key, tmp, cnt*sizeof(mykey_t) ); |
| 39 | + // |
| 40 | + // scratch points to a scratch memory region of any alignment with |
| 41 | + // room for mysort_stable_scratch_footprint( cnt ) bytes. (Any |
| 42 | + // normally declared / normally allocated region with mykey_t |
| 43 | + // compatible alignment and space for cnt mykey_t's will work |
| 44 | + // too.) |
| 45 | + // |
| 46 | + // FIXME: CONSIDER RETURNING NULL IF BAD INPUT ARGS |
| 47 | +
|
| 48 | + static mykey_t * |
| 49 | + mysort_stable( mykey_t * key, // Indexed [0,n) |
| 50 | + uint64_t cnt, // Assumes mysort_stable_cnt_valid( cnt ) is true |
| 51 | + void * scratch ); // Pointer to suitable scratch region |
| 52 | +
|
| 53 | + This can be included multiple types with different names / parameters |
| 54 | + to define many family of sorts that might be useful for a compilation |
| 55 | + unit. |
| 56 | +
|
| 57 | + Other defines exist to change the sort criteria / direction, linkage |
| 58 | + and so forth. See below for details. */ |
| 59 | + |
| 60 | +#include "../../util/compat_stdint.h" /* For uint64_t */ |
| 61 | +#include "../../util/align.h" /* For ALIGN_UP */ |
| 62 | + |
| 63 | +#ifndef SORT_NAME |
| 64 | +#error "Define SORT_NAME" |
| 65 | +#endif |
| 66 | + |
| 67 | +#ifndef SORT_KEY_T |
| 68 | +#error "Define SORT_KEY_T; nominally a POD (plain-old-data) type" |
| 69 | +#endif |
| 70 | + |
| 71 | +/* Define SORT_IDX_T to specify the data type used to index key arrays. |
| 72 | + Default is uint64_t. */ |
| 73 | + |
| 74 | +#ifndef SORT_IDX_T |
| 75 | +#define SORT_IDX_T uint64_t |
| 76 | +#endif |
| 77 | + |
| 78 | +/* Define SORT_BEFORE to specify how sorted keys should be ordered. |
| 79 | + Default is ascending as defined by the "<" operator for the type. |
| 80 | + SORT_BEFORE(u,v) should be non-zero if key u should go strictly |
| 81 | + before key v and zero otherwise. */ |
| 82 | + |
| 83 | +#ifndef SORT_BEFORE |
| 84 | +#define SORT_BEFORE(u,v) ((u)<(v)) |
| 85 | +#endif |
| 86 | + |
| 87 | +/* Define SORT_STATIC to specify the type of linkage the non-inlined |
| 88 | + APIs should have (e.g. if defined to nothing, these will have |
| 89 | + external linkage). Default is static linkage. */ |
| 90 | + |
| 91 | +#ifndef SORT_STATIC |
| 92 | +#define SORT_STATIC static |
| 93 | +#endif |
| 94 | + |
| 95 | +/* Define SORT_STATIC_INLINE to specify the type of linkage inlined |
| 96 | + APIs should have (e.g. if defined to nothing, these will have |
| 97 | + non-inlined external linkage). Default is static inline linkage. */ |
| 98 | + |
| 99 | +#ifndef SORT_STATIC_INLINE |
| 100 | +#define SORT_STATIC_INLINE static inline |
| 101 | +#endif |
| 102 | + |
| 103 | +/* Some macro preprocessor helpers */ |
| 104 | + |
| 105 | +#define SORT_C3(a,b,c)a##b##c |
| 106 | +#define SORT_XC3(a,b,c)SORT_C3(a,b,c) |
| 107 | +#define SORT_IMPL(impl)SORT_XC3(SORT_NAME,_,impl) |
| 108 | + |
| 109 | +SORT_STATIC_INLINE int |
| 110 | +SORT_IMPL(stable_cnt_valid)( SORT_IDX_T cnt ) { |
| 111 | + /* Written this way for complier warning free signed SORT_IDX_T and/or |
| 112 | + byte size SORT_KEY_T support (e.g. compiler often will warn to the |
| 113 | + effect "n>=0 always true" if idx is an unsigned type or |
| 114 | + "n<=UINT64_MAX always true" if key is a byte type). */ |
| 115 | + static uint64_t const max = ((UINT64_MAX - (uint64_t)alignof(SORT_KEY_T) + (uint64_t)1) / (uint64_t)sizeof(SORT_KEY_T)); |
| 116 | + return !cnt || (((SORT_IDX_T)0)<cnt && ((uint64_t)cnt)<max) || ((uint64_t)cnt)==max; |
| 117 | +} |
| 118 | + |
| 119 | +SORT_STATIC_INLINE uint64_t |
| 120 | +SORT_IMPL(stable_scratch_footprint)( SORT_IDX_T cnt ) { |
| 121 | + if( !SORT_IMPL(stable_cnt_valid)( cnt ) ) return (uint64_t)0; |
| 122 | + /* Guaranteed not to overflow given a valid cnt */ |
| 123 | + return ((uint64_t)sizeof (SORT_KEY_T))*(uint64_t)cnt /* Space for the n SORT_KEY_T's */ |
| 124 | + + ((uint64_t)alignof(SORT_KEY_T))-(uint64_t)1; /* Worst case alignment padding */ |
| 125 | +} |
| 126 | + |
| 127 | +SORT_STATIC SORT_KEY_T * |
| 128 | +SORT_IMPL(stable_node)( SORT_KEY_T * x, |
| 129 | + SORT_IDX_T n, |
| 130 | + SORT_KEY_T * t ) { |
| 131 | + |
| 132 | + /* Optimized handling of base cases */ |
| 133 | + |
| 134 | +# include "sort_stable_base.c" |
| 135 | + |
| 136 | + /* Note that n is at least 2 at this point */ |
| 137 | + /* Break input into approximately equal halves and sort them */ |
| 138 | + |
| 139 | + SORT_KEY_T * xl = x; |
| 140 | + SORT_KEY_T * tl = t; |
| 141 | + SORT_IDX_T nl = n >> 1; |
| 142 | + SORT_KEY_T * yl = SORT_IMPL(stable_node)( xl,nl, tl ); |
| 143 | + |
| 144 | + SORT_KEY_T * xr = x + nl; |
| 145 | + SORT_KEY_T * tr = t + nl; |
| 146 | + SORT_IDX_T nr = n - nl; |
| 147 | + SORT_KEY_T * yr = SORT_IMPL(stable_node)( xr,nr, tr ); |
| 148 | + |
| 149 | + /* If left subsort result ended up in orig array, merge into temp |
| 150 | + array. Otherwise, merge into orig array. */ |
| 151 | + |
| 152 | + if( yl==xl ) x = t; |
| 153 | + |
| 154 | + /* At this point, note that yl does not overlap with the location for |
| 155 | + merge output at this point. yr might overlap (with the right half) |
| 156 | + with the location for merge output but this will still work in that |
| 157 | + case. */ |
| 158 | + |
| 159 | + SORT_IDX_T i = (SORT_IDX_T)0; |
| 160 | + SORT_IDX_T j = (SORT_IDX_T)0; |
| 161 | + SORT_IDX_T k = (SORT_IDX_T)0; |
| 162 | + |
| 163 | + /* Note that nl and nr are both at least one at this point so at least |
| 164 | + one iteration of the loop body is necessary. */ |
| 165 | + |
| 166 | + for(;;) { /* Minimal C language operations */ |
| 167 | + if( SORT_BEFORE( yr[k], yl[j] ) ) { |
| 168 | + x[i++] = yr[k++]; |
| 169 | + if( k>=nr ) { /* append left stragglers (at least one) */ do x[i++] = yl[j++]; while( j<nl ); break; } |
| 170 | + } else { |
| 171 | + x[i++] = yl[j++]; |
| 172 | + if( j>=nl ) { /* append right stragglers (at least one) */ do x[i++] = yr[k++]; while( k<nr ); break; } |
| 173 | + } |
| 174 | + } |
| 175 | + |
| 176 | + return x; |
| 177 | +} |
| 178 | + |
| 179 | +SORT_STATIC_INLINE SORT_KEY_T * |
| 180 | +SORT_IMPL(stable)( SORT_KEY_T * key, |
| 181 | + SORT_IDX_T cnt, /* Assumed valid cnt */ |
| 182 | + void * scratch ) { |
| 183 | + return SORT_IMPL(stable_node)( key, cnt, ALIGN_UP( scratch, SORT_KEY_T ) ); |
| 184 | +} |
| 185 | + |
| 186 | +#undef SORT_IMPL |
| 187 | +#undef SORT_XC3 |
| 188 | +#undef SORT_C3 |
| 189 | + |
| 190 | +#undef SORT_STATIC_INLINE |
| 191 | +#undef SORT_STATIC |
| 192 | +#undef SORT_BEFORE |
| 193 | +#undef SORT_IDX_T |
| 194 | +#undef SORT_KEY_T |
| 195 | +#undef SORT_NAME |
0 commit comments