Skip to content

Commit 2c6c836

Browse files
authoredMay 17, 2024··
revert: bring merge-sort back
This reverts commit 2ea67f3. Apparently the numbers that I was computing for heapsort were not correct and were for the closed quick-select algorithm. It seems that the heapsort compute unit exceeds the previous implementation.
1 parent 2ea67f3 commit 2c6c836

13 files changed

+561
-239
lines changed
 

‎program/c/makefile

+1
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ cpyth-native: features.h
4141
test: features.h
4242
mkdir -p $(OUT_DIR)/test/
4343
gcc -c ./src/oracle/model/test_price_model.c -o $(OUT_DIR)/test/test_price_model.o -fPIC
44+
gcc -c ./src/oracle/sort/test_sort_stable.c -o $(OUT_DIR)/test/test_sort_stable.o -fPIC
4445
gcc -c ./src/oracle/util/test_align.c -o $(OUT_DIR)/test/test_align.o -fPIC
4546
gcc -c ./src/oracle/util/test_avg.c -o $(OUT_DIR)/test/test_avg.o -fPIC
4647
gcc -c ./src/oracle/util/test_hash.c -o $(OUT_DIR)/test/test_hash.o -fPIC
+81-73
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,112 @@
11
#include "price_model.h"
22
#include "../util/avg.h" /* For avg_2_int64 */
33

4-
/*
5-
* In-place bottom-up Heapsort implementation optimized for minimal compute unit usage in BPF.
6-
*
7-
* Initially it creates a max heap in linear time and then to get ascending
8-
* order it swaps the root with the last element and then sifts down the root.
9-
*
10-
* The number of comparisions in average case is nlgn + O(1) and in worst case is
11-
* 1.5nlgn + O(n).
12-
*
13-
* There are a lot of (j-1) or (j+1) math in the code which can be optimized by
14-
* thinking of a as 1-based array. Fortunately, BPF compiler optimizes that for us.
15-
*/
16-
void heapsort(int64_t * a, uint64_t n) {
17-
if (n <= 1) return;
18-
19-
/*
20-
* This is a bottom-up heapify which is linear in time.
21-
*/
22-
for (uint64_t i = n / 2 - 1;; --i) {
23-
int64_t root = a[i];
24-
uint64_t j = i * 2 + 1;
25-
while (j < n) {
26-
if (j + 1 < n && a[j] < a[j + 1]) ++j;
27-
if (root >= a[j]) break;
28-
a[(j - 1) / 2] = a[j];
29-
j = j * 2 + 1;
30-
}
31-
a[(j - 1) / 2] = root;
32-
33-
if (i == 0) break;
34-
}
35-
36-
for (uint64_t i = n - 1; i > 0; --i) {
37-
int64_t tmp = a[0];
38-
a[0] = a[i];
39-
a[i] = tmp;
40-
41-
int64_t root = a[0];
42-
uint64_t j = 1;
43-
while (j < i) {
44-
if (j + 1 < i && a[j] < a[j + 1]) ++j;
45-
if (root >= a[j]) break;
46-
a[(j - 1) / 2] = a[j];
47-
j = j * 2 + 1;
48-
}
49-
a[(j - 1) / 2] = root;
50-
}
51-
}
4+
#define SORT_NAME int64_sort_ascending
5+
#define SORT_KEY_T int64_t
6+
#include "../sort/tmpl/sort_stable.c"
527

53-
/*
54-
* Find the 25, 50, and 75 percentiles of the given quotes using heapsort.
55-
*
56-
* This implementation optimizes the price_model_core function for minimal compute unit usage in BPF.
57-
*
58-
* In Solana, each BPF instruction costs 1 unit of compute and is much different than a native code
59-
* execution time. Here are some of the differences:
60-
* 1. There is no cache, so memory access is much more expensive.
61-
* 2. The instruction set is very minimal, and there are only 10 registers available.
62-
* 3. The BPF compiler is not very good at optimizing the code.
63-
* 4. The stack size is limited and having extra stack frame has high overhead.
64-
*
65-
* This implementation is chosen among other implementations such as merge-sort, quick-sort, and quick-select
66-
* because it is very fast, has small number of instructions, and has a very small memory footprint by being
67-
* in-place and is non-recursive and has a nlogn worst-case time complexity.
68-
*/
698
int64_t *
709
price_model_core( uint64_t cnt,
7110
int64_t * quote,
7211
int64_t * _p25,
7312
int64_t * _p50,
74-
int64_t * _p75) {
75-
heapsort(quote, cnt);
13+
int64_t * _p75,
14+
void * scratch ) {
15+
16+
/* Sort the quotes. The sorting implementation used here is a highly
17+
optimized mergesort (merge with an unrolled insertion sorting
18+
network small n base cases). The best case is ~0.5 n lg n compares
19+
and the average and worst cases are ~n lg n compares.
20+
21+
While not completely data oblivious, this has quite low variance in
22+
operation count practically and this is _better_ than quicksort's
23+
average case and quicksort's worst case is a computational
24+
denial-of-service and timing attack vulnerable O(n^2). Unlike
25+
quicksort, this is also stable (but this stability does not
26+
currently matter ... it might be a factor in future models).
27+
28+
A data oblivious sorting network approach might be viable here with
29+
and would have a completely deterministic operations count. It
30+
currently isn't used as the best known practical approaches for
31+
general n have a worse algorithmic cost (O( n (lg n)^2 )) and,
32+
while the application probably doesn't need perfect obliviousness,
33+
mergesort is still moderately oblivious and the application can
34+
benefit from mergesort's lower operations cost. (The main drawback
35+
of mergesort over quicksort is that it isn't in place, but memory
36+
footprint isn't an issue here.)
37+
38+
Given the operations cost model (e.g. cache friendliness is not
39+
incorporated), a radix sort might be viable here (O(n) in best /
40+
average / worst). It currently isn't used as we expect invocations
41+
with small-ish n to be common and radix sort would be have large
42+
coefficients on the O(n) and additional fixed overheads that would
43+
make it more expensive than mergesort in this regime.
44+
45+
Note: price_model_cnt_valid( cnt ) implies
46+
int64_sort_ascending_cnt_valid( cnt ) currently.
47+
48+
Note: consider filtering out "NaN" quotes (i.e. INT64_MIN)? */
49+
50+
int64_t * sort_quote = int64_sort_ascending_stable( quote, cnt, scratch );
51+
52+
/* Extract the p25
53+
54+
There are many variants with subtle tradeoffs here. One option is
55+
to interpolate when the ideal p25 is bracketed by two samples (akin
56+
to the p50 interpolation above when the number of quotes is even).
57+
That is, for p25, interpolate between quotes floor((cnt-2)/4) and
58+
ceil((cnt-2)/4) with the weights determined by cnt mod 4. The
59+
current preference is to not do that as it is slightly more
60+
complex, doesn't exactly always minimize the current loss function
61+
and is more exposed to the confidence intervals getting skewed by
62+
bum quotes with the number of quotes is small.
63+
64+
Another option is to use the inside quote of the above pair. That
65+
is, for p25, use quote ceil((cnt-2)/4) == floor((cnt+1)/4) ==
66+
(cnt+1)>>2. The current preference is not to do this as, though
67+
this has stronger bum quote robustness, it results in p25==p50==p75
68+
when cnt==3. (In this case, the above wants to do an interpolation
69+
between quotes 0 and 1 to for the p25 and between quotes 1 and 2
70+
for the p75. But limiting to just the inside quote results in
71+
p25/p50/p75 all using the median quote.)
72+
73+
A tweak to this option, for p25, is to use floor(cnt/4) == cnt>>2.
74+
This is simple, has the same asymptotic behavior for large cnt, has
75+
good behavior in the cnt==3 case and practically as good bum quote
76+
rejection in the moderate cnt case. */
7677

77-
/* Extract the p25 */
7878
uint64_t p25_idx = cnt >> 2;
79-
*_p25 = quote[p25_idx];
79+
80+
*_p25 = sort_quote[p25_idx];
8081

8182
/* Extract the p50 */
83+
8284
if( (cnt & (uint64_t)1) ) { /* Odd number of quotes */
85+
8386
uint64_t p50_idx = cnt >> 1; /* ==ceil((cnt-1)/2) */
84-
*_p50 = quote[p50_idx];
87+
88+
*_p50 = sort_quote[p50_idx];
89+
8590
} else { /* Even number of quotes (at least 2) */
91+
8692
uint64_t p50_idx_right = cnt >> 1; /* == ceil((cnt-1)/2)> 0 */
8793
uint64_t p50_idx_left = p50_idx_right - (uint64_t)1; /* ==floor((cnt-1)/2)>=0 (no overflow/underflow) */
8894

89-
int64_t vl = quote[p50_idx_left];
90-
int64_t vr = quote[p50_idx_right];
95+
int64_t vl = sort_quote[p50_idx_left ];
96+
int64_t vr = sort_quote[p50_idx_right];
9197

9298
/* Compute the average of vl and vr (with floor / round toward
9399
negative infinity rounding and without possibility of
94100
intermediate overflow). */
101+
95102
*_p50 = avg_2_int64( vl, vr );
96103
}
97104

98105
/* Extract the p75 (this is the mirror image of the p25 case) */
99106

100107
uint64_t p75_idx = cnt - ((uint64_t)1) - p25_idx;
101-
*_p75 = quote[p75_idx];
102108

103-
return quote;
109+
*_p75 = sort_quote[p75_idx];
110+
111+
return sort_quote;
104112
}

‎program/c/src/oracle/model/price_model.h

+82-11
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,91 @@
88
extern "C" {
99
#endif
1010

11-
/*
12-
* This function computes the p25, p50 and p75 percentiles of the given quotes and
13-
* writes them to the given pointers. It also returns the sorted quotes array. Being
14-
* sorted is not necessary for this model to work, and is only relied upon by the
15-
* tests to verify the correctness of the model with more confidence.
16-
*
17-
* The quote array might get modified by this function.
18-
*/
19-
int64_t *
20-
price_model_core( uint64_t cnt, /* Number of elements in quote */
11+
/* Returns the minimum and maximum number of quotes the implementation
12+
can handle */
13+
14+
static inline uint64_t
15+
price_model_quote_min( void ) {
16+
return (uint64_t)1;
17+
}
18+
19+
static inline uint64_t
20+
price_model_quote_max( void ) {
21+
return (UINT64_MAX-(uint64_t)alignof(int64_t)+(uint64_t)1) / (uint64_t)sizeof(int64_t);
22+
}
23+
24+
/* price_model_cnt_valid returns non-zero if cnt is a valid value or
25+
zero if not. */
26+
27+
static inline int
28+
price_model_cnt_valid( uint64_t cnt ) {
29+
return price_model_quote_min()<=cnt && cnt<=price_model_quote_max();
30+
}
31+
32+
/* price_model_scratch_footprint returns the number of bytes of scratch
33+
space needed for an arbitrarily aligned scratch region required by
34+
price_model to handle price_model_quote_min() to cnt quotes
35+
inclusive. */
36+
37+
static inline uint64_t
38+
price_model_scratch_footprint( uint64_t cnt ) { /* Assumes price_model_cnt_valid( cnt ) is true */
39+
/* cnt int64_t's plus worst case alignment padding, no overflow
40+
possible as cnt is valid at this point */
41+
return cnt*(uint64_t)sizeof(int64_t)+(uint64_t)alignof(int64_t)-(uint64_t)1;
42+
}
43+
44+
/* price_model_core minimizes (to quote precision in a floor / round
45+
toward negative infinity sense) the loss model of the given quotes.
46+
Assumes valid inputs (e.g. cnt is at least 1 and not unreasonably
47+
large ... typically a multiple of 3 but this is not required,
48+
quote[i] for i in [0,cnt) are the quotes of interest on input, p25,
49+
p50, p75 point to where to write model outputs, scratch points to a
50+
suitable footprint scratch region).
51+
52+
Returns a pointer to the quotes sorted in ascending order. As such,
53+
the min and max and any other rank statistic can be extracted easily
54+
on return. This location will either be quote itself or to a
55+
location in scratch. Use price_model below for a variant that always
56+
replaces quote with the sorted quotes (potentially has extra ops for
57+
copying). Further, on return, *_p25, *_p50, *_p75 will hold the loss
58+
model minimizing values for the input quotes and the scratch region
59+
was clobbered.
60+
61+
Scratch points to a memory region of arbitrary alignment with at
62+
least price_model_scratch_footprint( cnt ) bytes and it will be
63+
clobbered on output. It is sufficient to use a normally aligned /
64+
normally allocated / normally declared array of cnt int64_t's.
65+
66+
The cost of this function is a fast and low variance (but not
67+
completely data oblivious) O(cnt lg cnt) in the best / average /
68+
worst cases. This function uses no heap / dynamic memory allocation.
69+
It is thread safe provided it passed non-conflicting quote, output
70+
and scratch arrays. It has a bounded call depth ~lg cnt <= ~64 (this
71+
could reduce to O(1) by using a non-recursive sort/select
72+
implementation under the hood if desired). */
73+
74+
int64_t * /* Returns pointer to sorted quotes (either quote or ALIGN_UP(scratch,int64_t)) */
75+
price_model_core( uint64_t cnt, /* Assumes price_model_cnt_valid( cnt ) is true */
2176
int64_t * quote, /* Assumes quote[i] for i in [0,cnt) is the i-th quote on input */
2277
int64_t * _p25, /* Assumes *_p25 is safe to write to the p25 model output */
2378
int64_t * _p50, /* Assumes *_p50 " */
24-
int64_t * _p75); /* Assumes *_p75 " */
79+
int64_t * _p75, /* Assumes *_p75 " */
80+
void * scratch ); /* Assumes a suitable scratch region */
81+
82+
/* Same as the above but always returns quote and quote always holds the
83+
sorted quotes on return. */
84+
85+
static inline int64_t *
86+
price_model( uint64_t cnt,
87+
int64_t * quote,
88+
int64_t * _p25,
89+
int64_t * _p50,
90+
int64_t * _p75,
91+
void * scratch ) {
92+
int64_t * tmp = price_model_core( cnt, quote, _p25, _p50, _p75, scratch );
93+
if( tmp!=quote ) for( uint64_t idx=(uint64_t)0; idx<cnt; idx++ ) quote[ idx ] = tmp[ idx ];
94+
return quote;
95+
}
2596

2697
#ifdef __cplusplus
2798
}

‎program/c/src/oracle/model/test_price_model.c

+3-29
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,13 @@ int test_price_model() {
1919
prng_t _prng[1];
2020
prng_t * prng = prng_join( prng_new( _prng, (uint32_t)0, (uint64_t)0 ) );
2121

22-
# define N 192
22+
# define N 96
2323

2424
int64_t quote0 [N];
2525
int64_t quote [N];
2626
int64_t val [3];
27+
int64_t scratch[N];
2728

28-
/* Brute force validate small sizes via the 0-1 principle. */
29-
for( int cnt=0; cnt<=24; cnt++ ) {
30-
for( long mask=0L; mask<(1L<<cnt); mask++ ) {
31-
for( int i=0; i<cnt; i++ ) quote0[i] = (int64_t) ((mask>>i) & 1L);
32-
33-
memcpy( quote, quote0, sizeof(int64_t)*(size_t)cnt );
34-
if( price_model_core( cnt, quote, val+0, val+1, val+2)!=quote ) { printf( "FAIL (01-compose)\n" ); return 1; }
35-
36-
/* Validate the results */
37-
38-
/* Although being sorted is not necessary it gives us more confidence about the correctness of the model */
39-
qsort( quote0, (size_t)cnt, sizeof(int64_t), qcmp );
40-
if( memcmp( quote, quote0, sizeof(int64_t)*(size_t)cnt ) ) { printf( "FAIL (01-sort)\n" ); return 1; }
41-
42-
uint64_t p25_idx = cnt>>2;
43-
uint64_t p50_idx = cnt>>1;
44-
uint64_t p75_idx = cnt - (uint64_t)1 - p25_idx;
45-
uint64_t is_even = (uint64_t)!(cnt & (uint64_t)1);
46-
47-
if( val[0]!=quote[ p25_idx ] ) { printf( "FAIL (01-p25)\n" ); return 1; }
48-
if( val[1]!=avg_2_int64( quote[ p50_idx-is_even ], quote[ p50_idx ] ) ) { printf( "FAIL (01-p50)\n" ); return 1; }
49-
if( val[2]!=quote[ p75_idx ] ) { printf( "FAIL (01-p75)\n" ); return 1; }
50-
}
51-
}
52-
53-
/* Test using randomized inputs */
5429
for( int iter=0; iter<10000000; iter++ ) {
5530

5631
/* Generate a random test */
@@ -61,11 +36,10 @@ int test_price_model() {
6136
/* Apply the model */
6237

6338
memcpy( quote, quote0, sizeof(int64_t)*(size_t)cnt );
64-
if( price_model_core( cnt, quote, val+0, val+1, val+2)!=quote ) { printf( "FAIL (compose)\n" ); return 1; }
39+
if( price_model( cnt, quote, val+0, val+1, val+2, scratch )!=quote ) { printf( "FAIL (compose)\n" ); return 1; }
6540

6641
/* Validate the results */
6742

68-
/* Although being sorted is not necessary it gives us more confidence about the correctness of the model */
6943
qsort( quote0, (size_t)cnt, sizeof(int64_t), qcmp );
7044
if( memcmp( quote, quote0, sizeof(int64_t)*(size_t)cnt ) ) { printf( "FAIL (sort)\n" ); return 1; }
7145

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#include <stdio.h>
2+
#include <stdlib.h>
3+
4+
void
5+
sort_gen( int n ) {
6+
7+
# if 0
8+
9+
/* In register variant (PSEUDO OPS ~ 9+4n+3n(n-1))
10+
Assumes switch ~ 3 PSEUDO OPS (LDA,LD,JMP) -> 3 switch statements / 9 pseudo ops
11+
Assumes load ~ 2 PSEUDO OPS (LDA,LD) -> n loads / 2n pseudo ops
12+
Assumes store ~ "
13+
Assumes cswap ~ 6 PSEUDO OPS (CMP,MOV,TESTEQ,CMOV,TESTNEQ,CMOV) / 6*0.5*n*(n-1) pseudo ops */
14+
15+
//printf( "static inline key_t * /* returns (sorted) x */\n" );
16+
//printf( "sort_network_stable( key_t * x, /* indexed [0,n) */\n" );
17+
//printf( " ulong n ) { /* assumes n in [0,%i) */\n", n );
18+
printf( " int c;\n" );
19+
printf( " key_t t" );
20+
for( int i=0; i<n; i++ ) printf( ", x%i", i );
21+
printf( ";\n" );
22+
printf( "\n" );
23+
printf( " switch( n ) {\n" );
24+
for( int i=n; i; i-- ) printf( " case %2iUL: x%i = x[%i]; /* fall through */\n", i, i-1, i-1 );
25+
printf( " default: break;\n" );
26+
printf( " }\n" );
27+
printf( "\n" );
28+
printf( "# define _(i,j) c = BEFORE( x##j, x##i ); t = x##i; x##i = c ? x##j : x##i; x##j = c ? t : x##j\n" );
29+
printf( " switch( n ) {\n" );
30+
for( int i=n-1; i; i-- ) {
31+
printf( " case %2iUL:", i+1 );
32+
for( int j=0; j<i; j++ ) printf( " _(%2i,%2i);", j,j+1 );
33+
printf( " /* fall through */\n" );
34+
}
35+
printf( " default: break;\n" );
36+
printf( " }\n" );
37+
printf( "# undef _\n" );
38+
printf( "\n" );
39+
printf( " switch( n ) {\n" );
40+
for( int i=n; i; i-- ) printf( " case %2iUL: x[%i] = x%i; /* fall through */\n", i, i-1, i-1 );
41+
printf( " default: break;\n" );
42+
printf( " }\n" );
43+
44+
//printf( "\n" );
45+
//printf( " return x;\n" );
46+
//printf( "}\n" );
47+
48+
# else
49+
50+
/* Memory variant (PSEUDO OPS ~ 3+4.5n(n-1))
51+
Assumes switch ~ 3 PSEUDO OPS (LDA,LD,JMP) -> 3 pseudo ops
52+
Assumes cswap ~ 9 PSEUDO OPS (LDA,LDA,LD,LD,CMP,CMOV,CMOV,ST,ST) / 9*0.5*n*(n-1) pseudo ops */
53+
54+
//printf( "static inline key_t * /* returns (sorted) x */\n" );
55+
//printf( "sort_network_stable( key_t * x, /* indexed [0,n) */\n" );
56+
//printf( " ulong n ) { /* assumes n in [0,%i) */\n", n );
57+
58+
printf( " do { /* BEGIN AUTOGENERATED CODE (n=%2i) *****************************/\n", n );
59+
printf( " /* This network of comparators and fallthroughs implement a sorting network representation\n" );
60+
printf( " of an insertion sort. Each case acts as a sort pass with the fallthrough falling through\n" );
61+
printf( " to smaller ranges of the input. */\n");
62+
printf( "# define SORT_STABLE_CE(i,j) u = x[(SORT_IDX_T)i]; v = x[(SORT_IDX_T)j]; c = SORT_BEFORE( v, u ); x[(SORT_IDX_T)i] = c ? v : u; x[(SORT_IDX_T)j] = c ? u : v\n" );
63+
printf( " int c;\n" );
64+
printf( " SORT_KEY_T u;\n" );
65+
printf( " SORT_KEY_T v;\n" );
66+
printf( " switch( n ) {\n" );
67+
for( int i=n-1; i>=0; i-- ) {
68+
printf( " case (SORT_IDX_T)%2i:", i+1 );
69+
for( int j=0; j<i; j++ ) printf( " SORT_STABLE_CE(%2i,%2i);", j,j+1 );
70+
printf( " /* fall through */\n" );
71+
}
72+
printf( " case (SORT_IDX_T) 0: return x;\n" );
73+
printf( " default: break;\n" );
74+
printf( " }\n" );
75+
printf( "# undef SORT_STABLE_CE\n" );
76+
printf( " } while(0); /* END AUTOGENERATED CODE *******************************/\n" );
77+
78+
//printf( "\n" );
79+
//printf( " return x;\n" );
80+
//printf( "}\n" );
81+
82+
# endif
83+
84+
}
85+
86+
int
87+
main( int argc,
88+
char ** argv ) {
89+
if( argc!=2 ) { fprintf( stderr, "Usage: %s [max_base_case]\n", argv[0] ); return 1; }
90+
int n = atoi( argv[1] );
91+
if( n<1 ) { fprintf( stderr, "n (%i) must be positive\n", n ); return 1; }
92+
sort_gen( n );
93+
return 0;
94+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
#include <stdio.h>
2+
#include "../util/util.h"
3+
4+
#define BEFORE(i,j) (((i)>>16)<((j)>>16))
5+
6+
#define SORT_NAME sort
7+
#define SORT_KEY_T int
8+
#define SORT_IDX_T int
9+
#define SORT_BEFORE(i,j) BEFORE(i,j)
10+
#include "tmpl/sort_stable.c"
11+
12+
int test_sort_stable() {
13+
14+
# define N 96
15+
int x[N];
16+
int y[N];
17+
int w[N];
18+
19+
/* Brute force validate small sizes via the 0-1 principle (with
20+
additional information in the keys to validate stability as well). */
21+
22+
for( int n=0; n<=24; n++ ) {
23+
for( long b=0L; b<(1L<<n); b++ ) {
24+
for( int i=0; i<n; i++ ) x[i] = (((int)((b>>i) & 1L))<<16) | i;
25+
for( int i=0; i<n; i++ ) w[i] = x[i];
26+
27+
int * z = sort_stable( x,n, y );
28+
29+
/* Make sure that z is a permutation of input data */
30+
for( int i=0; i<n; i++ ) {
31+
int j = z[i] & (int)0xffff; /* j is the index where z was initially */
32+
if( j<0 || j>=n || z[i]!=w[j] ) { printf( "FAIL (corrupt)\n" ); return 1; }
33+
w[j] = -1; /* Mark that this entry has already been confirmed */
34+
}
35+
for( int i=0; i<n; i++ ) if( w[i]!=-1 ) { printf( "FAIL (perm)\n" ); return 1; }
36+
37+
/* Make sure that z is in order and stable */
38+
for( int i=1; i<n; i++ )
39+
if( z[i]<=z[i-1] ) { printf( "FAIL (%s, b=%lx)\n", BEFORE( z[i], z[i-1] ) ? "order" : "stable", b ); return 1; }
40+
}
41+
}
42+
43+
/* Randomized validation for larger sizes */
44+
45+
prng_t _prng[1];
46+
prng_t * prng = prng_join( prng_new( _prng, (uint32_t)0, (uint64_t)0 ) );
47+
48+
for( int iter=0; iter<10000000; iter++ ) {
49+
50+
int n = (int)(prng_uint32( prng ) % (uint32_t)(N+1)); /* In [0,N], approx uniform IID */
51+
for( int i=0; i<n; i++ ) x[i] = (int)((prng_uint32( prng ) & UINT32_C( 0x00ff0000 )) | (uint32_t)i);
52+
for( int i=0; i<n; i++ ) w[i] = x[i];
53+
54+
int * z = sort_stable( x,n, y );
55+
56+
/* Make sure that z is a permutation of input data */
57+
for( int i=0; i<n; i++ ) {
58+
int j = z[i] & (int)0xffff; /* j is the index where z was initially */
59+
if( j<0 || j>=n || z[i]!=w[j] ) { printf( "FAIL (corrupt)\n" ); return 1; }
60+
w[j] = -1; /* Mark that this entry has already been confirmed */
61+
}
62+
for( int i=0; i<n; i++ ) if( w[i]!=-1 ) { printf( "FAIL (perm)\n" ); return 1; }
63+
64+
/* Make sure that z is in order and stable */
65+
for( int i=1; i<n; i++ )
66+
if( z[i]<=z[i-1] ) { printf( "FAIL (%s)\n", BEFORE( z[i], z[i-1] ) ? "order" : "stable" ); return 1; }
67+
}
68+
69+
prng_delete( prng_leave( prng ) );
70+
return 0;
71+
}
72+
73+
#undef BEFORE
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
/* Usage:
2+
3+
#define SORT_NAME mysort
4+
#define SORT_KEY_T mykey_t
5+
#include "sort_stable_impl.c"
6+
7+
This will instantiate the following APIs:
8+
9+
// Returns non-zero if n is a supported sort size and zero if not.
10+
// Unsupported values are negative n (only applicable for signed
11+
// indexing types) or unreasonably large n (such that the scratch
12+
// space requirement would be bigger than UINT64_MAX).
13+
14+
static inline int
15+
mysort_stable_cnt_valid( uint64_t cnt );
16+
17+
// Return the footprint required for a scratch space of any
18+
// alignment sufficient for sorting up to n items inclusive.
19+
// Returns 0 if cnt is not valid or no scratch is needed.
20+
21+
static inline uint64_t
22+
mysort_stable_scratch_footprint( uint64_t cnt );
23+
24+
// Sort elements of keys into an ascending order. Algorithm has a
25+
// best case of ~0.5 cnt lg cnt and an average and worst case of
26+
// cnt lg cnt such that it is moderately resistant to timing and
27+
// computational DOS attacks. Further, the sort is stable. The
28+
// values in scratch are irrelevant on input. Returns where the
29+
// sorted data ended up (either key or ALIGN_UP(scratch,mykey_t)).
30+
// That is, if this returns key, the values in key are the stably
31+
// sorted data and scratch was clobbered. Otherwise, the values
32+
// at ALIGN_UP(scratch,mykey_t) are the stably sorted data and key
33+
// was clobbered. Users wanting the data in a particular location
34+
// can copy as necessary (allowing this flexibility minimizes the
35+
// amount of copying needed to do the sorting). E.g.:
36+
//
37+
// mykey_t * tmp = mysort_stable( key, cnt, scratch );
38+
// if( tmp!=key ) memcpy( key, tmp, cnt*sizeof(mykey_t) );
39+
//
40+
// scratch points to a scratch memory region of any alignment with
41+
// room for mysort_stable_scratch_footprint( cnt ) bytes. (Any
42+
// normally declared / normally allocated region with mykey_t
43+
// compatible alignment and space for cnt mykey_t's will work
44+
// too.)
45+
//
46+
// FIXME: CONSIDER RETURNING NULL IF BAD INPUT ARGS
47+
48+
static mykey_t *
49+
mysort_stable( mykey_t * key, // Indexed [0,n)
50+
uint64_t cnt, // Assumes mysort_stable_cnt_valid( cnt ) is true
51+
void * scratch ); // Pointer to suitable scratch region
52+
53+
This can be included multiple types with different names / parameters
54+
to define many family of sorts that might be useful for a compilation
55+
unit.
56+
57+
Other defines exist to change the sort criteria / direction, linkage
58+
and so forth. See below for details. */
59+
60+
#include "../../util/compat_stdint.h" /* For uint64_t */
61+
#include "../../util/align.h" /* For ALIGN_UP */
62+
63+
#ifndef SORT_NAME
64+
#error "Define SORT_NAME"
65+
#endif
66+
67+
#ifndef SORT_KEY_T
68+
#error "Define SORT_KEY_T; nominally a POD (plain-old-data) type"
69+
#endif
70+
71+
/* Define SORT_IDX_T to specify the data type used to index key arrays.
72+
Default is uint64_t. */
73+
74+
#ifndef SORT_IDX_T
75+
#define SORT_IDX_T uint64_t
76+
#endif
77+
78+
/* Define SORT_BEFORE to specify how sorted keys should be ordered.
79+
Default is ascending as defined by the "<" operator for the type.
80+
SORT_BEFORE(u,v) should be non-zero if key u should go strictly
81+
before key v and zero otherwise. */
82+
83+
#ifndef SORT_BEFORE
84+
#define SORT_BEFORE(u,v) ((u)<(v))
85+
#endif
86+
87+
/* Define SORT_STATIC to specify the type of linkage the non-inlined
88+
APIs should have (e.g. if defined to nothing, these will have
89+
external linkage). Default is static linkage. */
90+
91+
#ifndef SORT_STATIC
92+
#define SORT_STATIC static
93+
#endif
94+
95+
/* Define SORT_STATIC_INLINE to specify the type of linkage inlined
96+
APIs should have (e.g. if defined to nothing, these will have
97+
non-inlined external linkage). Default is static inline linkage. */
98+
99+
#ifndef SORT_STATIC_INLINE
100+
#define SORT_STATIC_INLINE static inline
101+
#endif
102+
103+
/* Some macro preprocessor helpers */
104+
105+
#define SORT_C3(a,b,c)a##b##c
106+
#define SORT_XC3(a,b,c)SORT_C3(a,b,c)
107+
#define SORT_IMPL(impl)SORT_XC3(SORT_NAME,_,impl)
108+
109+
SORT_STATIC_INLINE int
110+
SORT_IMPL(stable_cnt_valid)( SORT_IDX_T cnt ) {
111+
/* Written this way for complier warning free signed SORT_IDX_T and/or
112+
byte size SORT_KEY_T support (e.g. compiler often will warn to the
113+
effect "n>=0 always true" if idx is an unsigned type or
114+
"n<=UINT64_MAX always true" if key is a byte type). */
115+
static uint64_t const max = ((UINT64_MAX - (uint64_t)alignof(SORT_KEY_T) + (uint64_t)1) / (uint64_t)sizeof(SORT_KEY_T));
116+
return !cnt || (((SORT_IDX_T)0)<cnt && ((uint64_t)cnt)<max) || ((uint64_t)cnt)==max;
117+
}
118+
119+
SORT_STATIC_INLINE uint64_t
120+
SORT_IMPL(stable_scratch_footprint)( SORT_IDX_T cnt ) {
121+
if( !SORT_IMPL(stable_cnt_valid)( cnt ) ) return (uint64_t)0;
122+
/* Guaranteed not to overflow given a valid cnt */
123+
return ((uint64_t)sizeof (SORT_KEY_T))*(uint64_t)cnt /* Space for the n SORT_KEY_T's */
124+
+ ((uint64_t)alignof(SORT_KEY_T))-(uint64_t)1; /* Worst case alignment padding */
125+
}
126+
127+
SORT_STATIC SORT_KEY_T *
128+
SORT_IMPL(stable_node)( SORT_KEY_T * x,
129+
SORT_IDX_T n,
130+
SORT_KEY_T * t ) {
131+
132+
/* Optimized handling of base cases */
133+
134+
# include "sort_stable_base.c"
135+
136+
/* Note that n is at least 2 at this point */
137+
/* Break input into approximately equal halves and sort them */
138+
139+
SORT_KEY_T * xl = x;
140+
SORT_KEY_T * tl = t;
141+
SORT_IDX_T nl = n >> 1;
142+
SORT_KEY_T * yl = SORT_IMPL(stable_node)( xl,nl, tl );
143+
144+
SORT_KEY_T * xr = x + nl;
145+
SORT_KEY_T * tr = t + nl;
146+
SORT_IDX_T nr = n - nl;
147+
SORT_KEY_T * yr = SORT_IMPL(stable_node)( xr,nr, tr );
148+
149+
/* If left subsort result ended up in orig array, merge into temp
150+
array. Otherwise, merge into orig array. */
151+
152+
if( yl==xl ) x = t;
153+
154+
/* At this point, note that yl does not overlap with the location for
155+
merge output at this point. yr might overlap (with the right half)
156+
with the location for merge output but this will still work in that
157+
case. */
158+
159+
SORT_IDX_T i = (SORT_IDX_T)0;
160+
SORT_IDX_T j = (SORT_IDX_T)0;
161+
SORT_IDX_T k = (SORT_IDX_T)0;
162+
163+
/* Note that nl and nr are both at least one at this point so at least
164+
one iteration of the loop body is necessary. */
165+
166+
for(;;) { /* Minimal C language operations */
167+
if( SORT_BEFORE( yr[k], yl[j] ) ) {
168+
x[i++] = yr[k++];
169+
if( k>=nr ) { /* append left stragglers (at least one) */ do x[i++] = yl[j++]; while( j<nl ); break; }
170+
} else {
171+
x[i++] = yl[j++];
172+
if( j>=nl ) { /* append right stragglers (at least one) */ do x[i++] = yr[k++]; while( k<nr ); break; }
173+
}
174+
}
175+
176+
return x;
177+
}
178+
179+
SORT_STATIC_INLINE SORT_KEY_T *
180+
SORT_IMPL(stable)( SORT_KEY_T * key,
181+
SORT_IDX_T cnt, /* Assumed valid cnt */
182+
void * scratch ) {
183+
return SORT_IMPL(stable_node)( key, cnt, ALIGN_UP( scratch, SORT_KEY_T ) );
184+
}
185+
186+
#undef SORT_IMPL
187+
#undef SORT_XC3
188+
#undef SORT_C3
189+
190+
#undef SORT_STATIC_INLINE
191+
#undef SORT_STATIC
192+
#undef SORT_BEFORE
193+
#undef SORT_IDX_T
194+
#undef SORT_KEY_T
195+
#undef SORT_NAME
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
do { /* BEGIN AUTOGENERATED CODE (n= 7) *****************************/
2+
/* This network of comparators and fallthroughs implement a sorting network representation
3+
of an insertion sort. Each case acts as a sort pass with the fallthrough falling through
4+
to smaller ranges of the input. */
5+
# define SORT_STABLE_CE(i,j) u = x[(SORT_IDX_T)i]; v = x[(SORT_IDX_T)j]; c = SORT_BEFORE( v, u ); x[(SORT_IDX_T)i] = c ? v : u; x[(SORT_IDX_T)j] = c ? u : v
6+
int c;
7+
SORT_KEY_T u;
8+
SORT_KEY_T v;
9+
switch( n ) {
10+
case (SORT_IDX_T) 7: SORT_STABLE_CE( 0, 1); SORT_STABLE_CE( 1, 2); SORT_STABLE_CE( 2, 3); SORT_STABLE_CE( 3, 4); SORT_STABLE_CE( 4, 5); SORT_STABLE_CE( 5, 6); /* fall through */
11+
case (SORT_IDX_T) 6: SORT_STABLE_CE( 0, 1); SORT_STABLE_CE( 1, 2); SORT_STABLE_CE( 2, 3); SORT_STABLE_CE( 3, 4); SORT_STABLE_CE( 4, 5); /* fall through */
12+
case (SORT_IDX_T) 5: SORT_STABLE_CE( 0, 1); SORT_STABLE_CE( 1, 2); SORT_STABLE_CE( 2, 3); SORT_STABLE_CE( 3, 4); /* fall through */
13+
case (SORT_IDX_T) 4: SORT_STABLE_CE( 0, 1); SORT_STABLE_CE( 1, 2); SORT_STABLE_CE( 2, 3); /* fall through */
14+
case (SORT_IDX_T) 3: SORT_STABLE_CE( 0, 1); SORT_STABLE_CE( 1, 2); /* fall through */
15+
case (SORT_IDX_T) 2: SORT_STABLE_CE( 0, 1); /* fall through */
16+
case (SORT_IDX_T) 1: /* fall through */
17+
case (SORT_IDX_T) 0: return x;
18+
default: break;
19+
}
20+
# undef SORT_STABLE_CE
21+
} while(0); /* END AUTOGENERATED CODE *******************************/

‎program/c/src/oracle/upd_aggregate.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,8 @@ static inline bool upd_aggregate( pc_price_t *ptr, uint64_t slot, int64_t timest
188188
// note: numv>0 and nprcs = 3*numv at this point
189189
int64_t agg_p25;
190190
int64_t agg_p75;
191-
price_model_core( (uint64_t)nprcs, prcs, &agg_p25, &agg_price, &agg_p75 );
191+
int64_t scratch[ PC_NUM_COMP * 3 ]; // ~0.75KiB for current PC_NUM_COMP (FIXME: DOUBLE CHECK THIS FITS INTO STACK FRAME LIMIT)
192+
price_model_core( (uint64_t)nprcs, prcs, &agg_p25, &agg_price, &agg_p75, scratch );
192193

193194
// get the left and right confidences
194195
// note that as valid quotes have positive prices currently and

‎program/rust/src/tests/mod.rs

-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ mod test_add_price;
33
mod test_add_product;
44
mod test_add_publisher;
55
mod test_aggregation;
6-
mod test_benchmark;
76
mod test_c_code;
87
mod test_check_valid_signable_account_or_permissioned_funding_account;
98
mod test_del_price;

‎program/rust/src/tests/pyth_simulator.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ use {
5555
},
5656
solana_sdk::{
5757
account::Account,
58-
commitment_config::CommitmentLevel,
5958
signature::{
6059
Keypair,
6160
Signer,
@@ -207,7 +206,7 @@ impl PythSimulator {
207206

208207
self.context
209208
.banks_client
210-
.process_transaction_with_commitment(transaction, CommitmentLevel::Processed)
209+
.process_transaction(transaction)
211210
.await
212211
}
213212

‎program/rust/src/tests/test_benchmark.rs

-122
This file was deleted.

‎program/rust/src/tests/test_c_code.rs

+8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ mod c {
55
#[link(name = "cpyth-test")]
66
extern "C" {
77
pub fn test_price_model() -> i32;
8+
pub fn test_sort_stable() -> i32;
89
pub fn test_align() -> i32;
910
pub fn test_avg() -> i32;
1011
pub fn test_hash() -> i32;
@@ -21,6 +22,13 @@ fn test_price_model() {
2122
}
2223
}
2324

25+
#[test]
26+
fn test_sort_stable() {
27+
unsafe {
28+
assert_eq!(c::test_sort_stable(), 0);
29+
}
30+
}
31+
2432
#[test]
2533
fn test_align() {
2634
unsafe {

0 commit comments

Comments
 (0)
Please sign in to comment.