Skip to content

Commit 435197c

Browse files
perf(es/fast-lexer): Optimize SIMD vector initialization with initialing u8x16 once. (#10183)
**Description:** Improves performance and reduces repeated allocations by initialing `u8x16` in global to ensure constants u8x16 initial once. - Simplifies SIMD vector creation using `u8x16::new()` - Maintains the previous performance optimizations for lexer vector handling
1 parent c2fe4bf commit 435197c

File tree

3 files changed

+26
-25
lines changed

3 files changed

+26
-25
lines changed

crates/swc_ecma_fast_parser/src/lexer/cursor.rs

+5-5
Original file line numberDiff line numberDiff line change
@@ -91,22 +91,22 @@ impl<'a> Cursor<'a> {
9191

9292
/// Advance until the predicate returns false or EOF is reached
9393
#[inline]
94-
pub fn advance_while<F>(&mut self, mut predicate: F) -> u32
94+
pub fn advance_while<F>(&mut self, predicate: F) -> u32
9595
where
96-
F: FnMut(u8) -> bool,
96+
F: Fn(u8) -> bool,
9797
{
9898
let start = self.pos;
9999

100-
self.advance_while_scalar(&mut predicate);
100+
self.advance_while_scalar(&predicate);
101101

102102
self.pos - start
103103
}
104104

105105
/// Scalar (non-SIMD) implementation of advance_while
106106
#[inline]
107-
fn advance_while_scalar<F>(&mut self, predicate: &mut F)
107+
fn advance_while_scalar<F>(&mut self, predicate: &F)
108108
where
109-
F: FnMut(u8) -> bool,
109+
F: Fn(u8) -> bool,
110110
{
111111
// Warning: Do not scalarize if we do not use SIMD
112112
// const BATCH_SIZE: u32 = 32;

crates/swc_ecma_fast_parser/src/lexer/mod.rs

+10-10
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ const CHAR_HEX_DIGIT: u8 = 0b0010_0000;
104104
const CHAR_OPERATOR: u8 = 0b0100_0000;
105105
const CHAR_SPECIAL: u8 = 0b1000_0000;
106106

107+
// SIMD vectors for common whitespace characters
108+
static SPACE_SIMD_VEC: u8x16 = u8x16::new([b' '; 16]);
109+
static TAB_SIMD_VEC: u8x16 = u8x16::new([b'\t'; 16]);
110+
static FORM_FEED_SMID_VEC: u8x16 = u8x16::new([0x0c; 16]);
111+
static VECR_TAB_SMID_VEC: u8x16 = u8x16::new([0x0b; 16]);
112+
107113
// Extended lookup table for faster character checks (ASCII only)
108114
static ASCII_LOOKUP: [u8; 256] = {
109115
let mut table = [0u8; 256];
@@ -564,18 +570,12 @@ impl<'a> Lexer<'a> {
564570
_ => {}
565571
}
566572

567-
// Create SIMD vectors for common whitespace characters
568-
let space_vec = u8x16::splat(b' ');
569-
let tab_vec = u8x16::splat(b'\t');
570-
let form_feed_vec = u8x16::splat(0x0c); // Form feed
571-
let vert_tab_vec = u8x16::splat(0x0b); // Vertical tab
572-
573573
// Fast path for regular whitespace (space, tab, form feed, vertical tab)
574574
// Compare with our whitespace vectors
575-
let is_space = data.cmp_eq(space_vec);
576-
let is_tab = data.cmp_eq(tab_vec);
577-
let is_ff = data.cmp_eq(form_feed_vec);
578-
let is_vt = data.cmp_eq(vert_tab_vec);
575+
let is_space = data.cmp_eq(SPACE_SIMD_VEC);
576+
let is_tab = data.cmp_eq(TAB_SIMD_VEC);
577+
let is_ff = data.cmp_eq(FORM_FEED_SMID_VEC);
578+
let is_vt = data.cmp_eq(VECR_TAB_SMID_VEC);
579579

580580
// Combine masks for regular whitespace
581581
let is_basic_ws = is_space | is_tab | is_ff | is_vt;

crates/swc_ecma_fast_parser/src/lexer/string.rs

+11-10
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,11 @@ static ESCAPE_LOOKUP: [u8; 128] = {
2929
table
3030
};
3131

32+
// SIMD vectors for common string end characters
33+
static BACKSLASH_SMID_VEC: u8x16 = u8x16::new([b'\\'; 16]);
34+
static NEWLINE_SMID_VEC: u8x16 = u8x16::new([b'\n'; 16]);
35+
static CARRIAGE_SMID_VEC: u8x16 = u8x16::new([b'\r'; 16]);
36+
3237
// Buffer for string construction - using thread_local to avoid allocation
3338
thread_local! {
3439
static STRING_BUFFER: std::cell::RefCell<Vec<u8>> = std::cell::RefCell::new(Vec::with_capacity(1024));
@@ -220,6 +225,9 @@ impl Lexer<'_> {
220225

221226
let mut pos = start_pos;
222227

228+
// Create vectors for quick comparison
229+
let quote_vec = u8x16::splat(quote);
230+
223231
// Process in chunks of 16 bytes using SIMD
224232
while pos + 16 <= rest.len() as u32 {
225233
// Load 16 bytes
@@ -228,18 +236,11 @@ impl Lexer<'_> {
228236
bytes.copy_from_slice(chunk_bytes);
229237
let chunk = u8x16::new(bytes);
230238

231-
// Create vectors for quick comparison
232-
let quote_vec = u8x16::splat(quote);
233-
let backslash_vec = u8x16::splat(b'\\');
234-
let newline_vec = u8x16::splat(b'\n');
235-
let carriage_vec = u8x16::splat(b'\r');
236-
237239
// Check for presence of special characters with a single combined mask
238240
let quote_mask = chunk.cmp_eq(quote_vec);
239-
let backslash_mask = chunk.cmp_eq(backslash_vec);
240-
let newline_mask = chunk.cmp_eq(newline_vec);
241-
let carriage_mask = chunk.cmp_eq(carriage_vec);
242-
241+
let backslash_mask = chunk.cmp_eq(BACKSLASH_SMID_VEC);
242+
let newline_mask = chunk.cmp_eq(NEWLINE_SMID_VEC);
243+
let carriage_mask = chunk.cmp_eq(CARRIAGE_SMID_VEC);
243244
// Combine all masks with OR operation
244245
let combined_mask = quote_mask | backslash_mask | newline_mask | carriage_mask;
245246

0 commit comments

Comments
 (0)