Skip to content

Commit 87ca5ac

Browse files
authored
refactor: use ordered publisher list (#402)
This change brings down the average cost of a price update without aggregation from 5k to 2k by moving from unordered list of publishers to an ordered list of publishers on doing binary search on lookup. It will fall back to linear search if the binary search fails. Also add_publisher now tries to sort the list with 000... publisher as a migration path. The sort is implemented in a way to be efficient and small (in code size). There is an insertion sort upon each new publisher to make sure the list remains sorted. del_publisher won't change the order of the list so it will remain sorted. The heavy code path for publishers now uses sol_memcmp (on sorting and lookup) to be more optimized (it costs 10 CU and apparently publisher equality is a few times more expensive). I checked sol_memcmp without change to sorted but the result was not as significant. Maybe later we can extract it out to a function, and use it everywhere.
1 parent ceb4a32 commit 87ca5ac

File tree

5 files changed

+412
-15
lines changed

5 files changed

+412
-15
lines changed

program/rust/src/accounts/price.rs

+30
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
pub use price_pythnet::*;
2+
#[cfg(test)]
3+
use quickcheck::Arbitrary;
24
use {
35
super::{
46
AccountHeader,
@@ -188,14 +190,29 @@ mod price_pythnet {
188190
}
189191

190192
#[repr(C)]
193+
#[cfg_attr(test, derive(Debug, PartialEq))]
191194
#[derive(Copy, Clone, Pod, Zeroable)]
192195
pub struct PriceComponent {
193196
pub pub_: Pubkey,
194197
pub agg_: PriceInfo,
195198
pub latest_: PriceInfo,
196199
}
197200

201+
#[cfg(test)]
202+
impl Arbitrary for PriceComponent {
203+
fn arbitrary(g: &mut quickcheck::Gen) -> Self {
204+
let mut key = [0u8; 32];
205+
key.iter_mut().for_each(|item| *item = u8::arbitrary(g));
206+
PriceComponent {
207+
pub_: Pubkey::new_from_array(key),
208+
agg_: PriceInfo::arbitrary(g),
209+
latest_: PriceInfo::arbitrary(g),
210+
}
211+
}
212+
}
213+
198214
#[repr(C)]
215+
#[cfg_attr(test, derive(Debug, PartialEq))]
199216
#[derive(Copy, Clone, Pod, Zeroable)]
200217
pub struct PriceInfo {
201218
pub price_: i64,
@@ -205,6 +222,19 @@ pub struct PriceInfo {
205222
pub pub_slot_: u64,
206223
}
207224

225+
#[cfg(test)]
226+
impl Arbitrary for PriceInfo {
227+
fn arbitrary(g: &mut quickcheck::Gen) -> Self {
228+
PriceInfo {
229+
price_: i64::arbitrary(g),
230+
conf_: u64::arbitrary(g),
231+
status_: u32::arbitrary(g),
232+
corp_act_status_: u32::arbitrary(g),
233+
pub_slot_: u64::arbitrary(g),
234+
}
235+
}
236+
}
237+
208238
#[repr(C)]
209239
#[derive(Copy, Clone, Pod, Zeroable)]
210240
pub struct PriceEma {

program/rust/src/processor/add_publisher.rs

+148-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,10 @@ use {
2424
account_info::AccountInfo,
2525
entrypoint::ProgramResult,
2626
program_error::ProgramError,
27-
program_memory::sol_memset,
27+
program_memory::{
28+
sol_memcmp,
29+
sol_memset,
30+
},
2831
pubkey::Pubkey,
2932
},
3033
std::mem::size_of,
@@ -41,8 +44,7 @@ pub fn add_publisher(
4144
let cmd_args = load::<AddPublisherArgs>(instruction_data)?;
4245

4346
pyth_assert(
44-
instruction_data.len() == size_of::<AddPublisherArgs>()
45-
&& cmd_args.publisher != Pubkey::default(),
47+
instruction_data.len() == size_of::<AddPublisherArgs>(),
4648
ProgramError::InvalidArgument,
4749
)?;
4850

@@ -63,6 +65,14 @@ pub fn add_publisher(
6365

6466
let mut price_data = load_checked::<PriceAccount>(price_account, cmd_args.header.version)?;
6567

68+
// Use the call with the default pubkey (000..) as a trigger to sort the publishers as a
69+
// migration step from unsorted list to sorted list.
70+
if cmd_args.publisher == Pubkey::default() {
71+
let num_comps = try_convert::<u32, usize>(price_data.num_)?;
72+
sort_price_comps(&mut price_data.comp_, num_comps)?;
73+
return Ok(());
74+
}
75+
6676
if price_data.num_ >= PC_NUM_COMP {
6777
return Err(ProgramError::InvalidArgument);
6878
}
@@ -81,6 +91,141 @@ pub fn add_publisher(
8191
);
8292
price_data.comp_[current_index].pub_ = cmd_args.publisher;
8393
price_data.num_ += 1;
94+
95+
// Sort the publishers in the list
96+
{
97+
let num_comps = try_convert::<u32, usize>(price_data.num_)?;
98+
sort_price_comps(&mut price_data.comp_, num_comps)?;
99+
}
100+
84101
price_data.header.size = try_convert::<_, u32>(PriceAccount::INITIAL_SIZE)?;
85102
Ok(())
86103
}
104+
105+
/// A copy of rust slice/sort.rs heapsort implementation which is small and fast. We couldn't use
106+
/// the sort directly because it was only accessible behind a unstable feature flag at the time of
107+
/// writing this code.
108+
fn heapsort(v: &mut [(Pubkey, usize)]) {
109+
// This binary heap respects the invariant `parent >= child`.
110+
let sift_down = |v: &mut [(Pubkey, usize)], mut node: usize| {
111+
loop {
112+
// Children of `node`.
113+
let mut child = 2 * node + 1;
114+
if child >= v.len() {
115+
break;
116+
}
117+
118+
// Choose the greater child.
119+
if child + 1 < v.len()
120+
&& sol_memcmp(v[child].0.as_ref(), v[child + 1].0.as_ref(), 32) < 0
121+
{
122+
child += 1;
123+
}
124+
125+
// Stop if the invariant holds at `node`.
126+
if sol_memcmp(v[node].0.as_ref(), v[child].0.as_ref(), 32) >= 0 {
127+
break;
128+
}
129+
130+
// Swap `node` with the greater child, move one step down, and continue sifting.
131+
v.swap(node, child);
132+
node = child;
133+
}
134+
};
135+
136+
// Build the heap in linear time.
137+
for i in (0..v.len() / 2).rev() {
138+
sift_down(v, i);
139+
}
140+
141+
// Pop maximal elements from the heap.
142+
for i in (1..v.len()).rev() {
143+
v.swap(0, i);
144+
sift_down(&mut v[..i], 0);
145+
}
146+
}
147+
148+
/// Sort the publishers price component list in place by performing minimal swaps.
149+
/// This code is inspired by the sort_by_cached_key implementation in the Rust stdlib.
150+
/// The rust stdlib implementation is not used because it uses a fast sort variant that has
151+
/// a large code size.
152+
///
153+
/// num_publishers is the number of publishers in the list that should be sorted. It is explicitly
154+
/// passed to avoid callers mistake of passing the full slice which may contain uninitialized values.
155+
fn sort_price_comps(comps: &mut [PriceComponent], num_comps: usize) -> Result<(), ProgramError> {
156+
let comps = comps
157+
.get_mut(..num_comps)
158+
.ok_or(ProgramError::InvalidArgument)?;
159+
160+
// Publishers are likely sorted in ascending order but
161+
// heapsorts creates a max-heap so we reverse the order
162+
// of the keys to make the heapify step faster.
163+
let mut keys = comps
164+
.iter()
165+
.enumerate()
166+
.map(|(i, x)| (x.pub_, i))
167+
.rev()
168+
.collect::<Vec<_>>();
169+
170+
heapsort(&mut keys);
171+
172+
for i in 0..num_comps {
173+
// We know that the publisher with key[i].0 should be at index i in the sorted array and
174+
// want to swap them in-place in O(n). Normally, the publisher at key[i].0 should be at
175+
// key[i].1 but if it is swapped, we need to find the correct index by following the chain
176+
// of swaps.
177+
let mut index = keys[i].1;
178+
179+
while index < i {
180+
index = keys[index].1;
181+
}
182+
// Setting the final index here is important to make the code linear as we won't
183+
// loop over from i to index again when we reach i again.
184+
keys[i].1 = index;
185+
comps.swap(i, index);
186+
}
187+
188+
Ok(())
189+
}
190+
191+
#[cfg(test)]
192+
mod test {
193+
use {
194+
super::*,
195+
quickcheck_macros::quickcheck,
196+
};
197+
198+
#[quickcheck]
199+
pub fn test_sort_price_comps(mut comps: Vec<PriceComponent>) {
200+
let mut rust_std_sorted_comps = comps.clone();
201+
rust_std_sorted_comps.sort_by_key(|x| x.pub_);
202+
203+
let num_comps = comps.len();
204+
assert_eq!(
205+
sort_price_comps(&mut comps, num_comps + 1),
206+
Err(ProgramError::InvalidArgument)
207+
);
208+
209+
assert_eq!(sort_price_comps(&mut comps, num_comps), Ok(()));
210+
assert_eq!(comps, rust_std_sorted_comps);
211+
}
212+
213+
#[quickcheck]
214+
pub fn test_sort_price_comps_smaller_slice(
215+
mut comps: Vec<PriceComponent>,
216+
mut num_comps: usize,
217+
) {
218+
num_comps = if comps.is_empty() {
219+
0
220+
} else {
221+
num_comps % comps.len()
222+
};
223+
224+
let mut rust_std_sorted_comps = comps.get(..num_comps).unwrap().to_vec();
225+
rust_std_sorted_comps.sort_by_key(|x| x.pub_);
226+
227+
228+
assert_eq!(sort_price_comps(&mut comps, num_comps), Ok(()));
229+
assert_eq!(comps.get(..num_comps).unwrap(), rust_std_sorted_comps);
230+
}
231+
}

program/rust/src/processor/upd_price.rs

+111-11
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use {
22
crate::{
33
accounts::{
44
PriceAccount,
5+
PriceComponent,
56
PriceInfo,
67
PythOracleSerialize,
78
UPD_PRICE_WRITE_SEED,
@@ -31,6 +32,7 @@ use {
3132
},
3233
program::invoke_signed,
3334
program_error::ProgramError,
35+
program_memory::sol_memcmp,
3436
pubkey::Pubkey,
3537
sysvar::Sysvar,
3638
},
@@ -127,7 +129,7 @@ pub fn upd_price(
127129
// Check clock
128130
let clock = Clock::from_account_info(clock_account)?;
129131

130-
let mut publisher_index: usize = 0;
132+
let publisher_index: usize;
131133
let latest_aggregate_price: PriceInfo;
132134

133135
// The price_data borrow happens in a scope because it must be
@@ -137,17 +139,15 @@ pub fn upd_price(
137139
// Verify that symbol account is initialized
138140
let price_data = load_checked::<PriceAccount>(price_account, cmd_args.header.version)?;
139141

140-
// Verify that publisher is authorized
141-
while publisher_index < try_convert::<u32, usize>(price_data.num_)? {
142-
if price_data.comp_[publisher_index].pub_ == *funding_account.key {
143-
break;
142+
publisher_index = match find_publisher_index(
143+
&price_data.comp_[..try_convert::<u32, usize>(price_data.num_)?],
144+
funding_account.key,
145+
) {
146+
Some(index) => index,
147+
None => {
148+
return Err(OracleError::PermissionViolation.into());
144149
}
145-
publisher_index += 1;
146-
}
147-
pyth_assert(
148-
publisher_index < try_convert::<u32, usize>(price_data.num_)?,
149-
OracleError::PermissionViolation.into(),
150-
)?;
150+
};
151151

152152
latest_aggregate_price = price_data.agg_;
153153
let latest_publisher_price = price_data.comp_[publisher_index].latest_;
@@ -281,6 +281,64 @@ pub fn upd_price(
281281
Ok(())
282282
}
283283

284+
/// Find the index of the publisher in the list of components.
285+
///
286+
/// This method first tries to binary search for the publisher's key in the list of components
287+
/// to get the result faster if the list is sorted. If the list is not sorted, it falls back to
288+
/// a linear search.
289+
#[inline(always)]
290+
fn find_publisher_index(comps: &[PriceComponent], key: &Pubkey) -> Option<usize> {
291+
// Verify that publisher is authorized by initially binary searching
292+
// for the publisher's component in the price account. The binary
293+
// search might not work if the publisher list is not sorted; therefore
294+
// we fall back to a linear search.
295+
let mut binary_search_result = None;
296+
297+
{
298+
// Binary search to find the publisher key. Rust std binary search is not used because
299+
// they guarantee valid outcome only if the array is sorted whereas we want to rely on
300+
// a Equal match if it is a result on an unsorted array. Currently the rust
301+
// implementation behaves the same but we do not want to rely on api internals.
302+
let mut left = 0;
303+
let mut right = comps.len();
304+
while left < right {
305+
let mid = left + (right - left) / 2;
306+
// sol_memcmp is much faster than rust default comparison of Pubkey. It costs
307+
// 10CU whereas rust default comparison costs a few times more.
308+
match sol_memcmp(comps[mid].pub_.as_ref(), key.as_ref(), 32) {
309+
i if i < 0 => {
310+
left = mid + 1;
311+
}
312+
i if i > 0 => {
313+
right = mid;
314+
}
315+
_ => {
316+
binary_search_result = Some(mid);
317+
break;
318+
}
319+
}
320+
}
321+
}
322+
323+
match binary_search_result {
324+
Some(index) => Some(index),
325+
None => {
326+
let mut index = 0;
327+
while index < comps.len() {
328+
if sol_memcmp(comps[index].pub_.as_ref(), key.as_ref(), 32) == 0 {
329+
break;
330+
}
331+
index += 1;
332+
}
333+
if index == comps.len() {
334+
None
335+
} else {
336+
Some(index)
337+
}
338+
}
339+
}
340+
}
341+
284342
#[allow(dead_code)]
285343
// Wrapper struct for the accounts required to add data to the accumulator program.
286344
struct MessageBufferAccounts<'a, 'b: 'a> {
@@ -289,3 +347,45 @@ struct MessageBufferAccounts<'a, 'b: 'a> {
289347
oracle_auth_pda: &'a AccountInfo<'b>,
290348
message_buffer_data: &'a AccountInfo<'b>,
291349
}
350+
351+
#[cfg(test)]
352+
mod test {
353+
use {
354+
super::*,
355+
crate::accounts::PriceComponent,
356+
quickcheck_macros::quickcheck,
357+
solana_program::pubkey::Pubkey,
358+
};
359+
360+
/// Test the find_publisher_index method works with an unordered list of components.
361+
#[quickcheck]
362+
pub fn test_find_publisher_index_unordered_comp(comps: Vec<PriceComponent>) {
363+
comps.iter().enumerate().for_each(|(idx, comp)| {
364+
assert_eq!(find_publisher_index(&comps, &comp.pub_), Some(idx));
365+
});
366+
367+
let mut key_not_in_list = Pubkey::new_unique();
368+
while comps.iter().any(|comp| comp.pub_ == key_not_in_list) {
369+
key_not_in_list = Pubkey::new_unique();
370+
}
371+
372+
assert_eq!(find_publisher_index(&comps, &key_not_in_list), None);
373+
}
374+
375+
/// Test the find_publisher_index method works with a sorted list of components.
376+
#[quickcheck]
377+
pub fn test_find_publisher_index_ordered_comp(mut comps: Vec<PriceComponent>) {
378+
comps.sort_by_key(|comp| comp.pub_);
379+
380+
comps.iter().enumerate().for_each(|(idx, comp)| {
381+
assert_eq!(find_publisher_index(&comps, &comp.pub_), Some(idx));
382+
});
383+
384+
let mut key_not_in_list = Pubkey::new_unique();
385+
while comps.iter().any(|comp| comp.pub_ == key_not_in_list) {
386+
key_not_in_list = Pubkey::new_unique();
387+
}
388+
389+
assert_eq!(find_publisher_index(&comps, &key_not_in_list), None);
390+
}
391+
}

0 commit comments

Comments
 (0)