Skip to content

Commit 35194e3

Browse files
authoredMar 10, 2025··
perf(es/fast-lexer): Use memchr for skip_line_comments (#10173)
1 parent d97f7b2 commit 35194e3

File tree

4 files changed

+22
-85
lines changed

4 files changed

+22
-85
lines changed
 

‎Cargo.lock

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

‎crates/swc_ecma_fast_parser/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ swc_common = { version = "8.0.0", path = "../swc_common" }
1919
swc_ecma_ast = { version = "8.0.0", path = "../swc_ecma_ast" }
2020

2121
assume = { workspace = true }
22+
memchr = { workspace = true }
2223
num-bigint = { workspace = true }
2324
phf = { workspace = true, features = ["macros"] }
2425
wide = { workspace = true }

‎crates/swc_ecma_fast_parser/src/lexer/cursor.rs

-66
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
55
use assume::assume;
66
use swc_common::BytePos;
7-
use wide::u8x16;
87

98
use crate::util::unlikely;
109

@@ -173,69 +172,4 @@ impl<'a> Cursor<'a> {
173172
pub fn reset_to(&mut self, pos: BytePos) {
174173
self.pos = pos.0;
175174
}
176-
177-
/// Find the next occurrence of a byte
178-
#[inline]
179-
pub fn find_byte(&self, byte: u8) -> Option<u32> {
180-
// If we're at or near EOF, use the standard implementation
181-
if unlikely(self.pos + 16 > self.len) {
182-
return self.find_byte_scalar(byte);
183-
}
184-
185-
// SIMD implementation using wide crate
186-
self.find_byte_simd(byte)
187-
}
188-
189-
/// SIMD-accelerated implementation of find_byte
190-
#[inline]
191-
fn find_byte_simd(&self, byte: u8) -> Option<u32> {
192-
let input = &self.input[self.pos as usize..];
193-
let mut position = 0u32;
194-
195-
// Process 16 bytes at a time
196-
while position + 16 <= input.len() as u32 {
197-
// Create a vector with our pattern
198-
let needle = u8x16::splat(byte);
199-
200-
// Create a vector with current chunk of data
201-
let mut data = [0u8; 16];
202-
data.copy_from_slice(&input[position as usize..(position + 16) as usize]);
203-
let chunk = u8x16::new(data);
204-
205-
// Compare for equality
206-
let mask = chunk.cmp_eq(needle);
207-
208-
// Converting to array to check byte-by-byte (no move_mask available)
209-
let mask_array = mask.to_array();
210-
211-
// Check for any matches
212-
#[allow(clippy::needless_range_loop)]
213-
for i in 0..16 {
214-
if mask_array[i] != 0 {
215-
return Some(self.pos + position + i as u32);
216-
}
217-
}
218-
219-
position += 16;
220-
}
221-
222-
// Handle the remainder with the scalar implementation
223-
if position < input.len() as u32 {
224-
return input[position as usize..]
225-
.iter()
226-
.position(|&b| b == byte)
227-
.map(|pos| self.pos + position + pos as u32);
228-
}
229-
230-
None
231-
}
232-
233-
/// Standard fallback implementation
234-
#[inline]
235-
fn find_byte_scalar(&self, byte: u8) -> Option<u32> {
236-
self.input[self.pos as usize..]
237-
.iter()
238-
.position(|&b| b == byte)
239-
.map(|pos| self.pos + pos as u32)
240-
}
241175
}

‎crates/swc_ecma_fast_parser/src/lexer/mod.rs

+20-19
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ mod tests;
2020
use std::rc::Rc;
2121

2222
use cursor::Cursor;
23+
use memchr::memchr2;
2324
use swc_common::{BytePos, Span, DUMMY_SP};
2425
use wide::u8x16;
2526

@@ -606,30 +607,30 @@ impl<'a> Lexer<'a> {
606607

607608
#[inline(always)]
608609
fn skip_line_comment(&mut self) {
609-
// Fast path using find_byte (which uses SIMD internally when available)
610-
if let Some(newline_pos) = self.cursor.find_byte(b'\n') {
611-
// Skip to the newline
612-
let from_cursor = newline_pos - self.cursor.position();
613-
self.cursor.advance_n(from_cursor);
614-
self.cursor.advance(); // Skip the newline
615-
self.had_line_break = LineBreak::Present;
616-
return;
617-
}
610+
// Fast path using memchr2 to find either \n or \r
611+
if let Some(pos) = memchr2(b'\n', b'\r', self.cursor.rest()) {
612+
let ch = self.cursor.rest()[pos];
613+
self.cursor.advance_n(pos as u32);
618614

619-
// Slower fallback path for when no newline is found
620-
while let Some(ch) = self.cursor.peek() {
621-
self.cursor.advance();
622615
if ch == b'\n' {
616+
// Simple newline
617+
self.cursor.advance(); // Skip the newline
623618
self.had_line_break = LineBreak::Present;
624-
break;
625-
} else if ch == b'\r' {
626-
self.had_line_break = LineBreak::Present;
627-
// Skip the following \n if it exists (CRLF sequence)
619+
} else {
620+
// Carriage return - check if followed by newline (CRLF)
621+
self.cursor.advance(); // Skip the \r
628622
if let Some(b'\n') = self.cursor.peek() {
629-
self.cursor.advance();
623+
self.cursor.advance(); // Skip the \n in CRLF
630624
}
631-
break;
632-
} else if ch == 0xe2 {
625+
self.had_line_break = LineBreak::Present;
626+
}
627+
return;
628+
}
629+
630+
// Slower fallback path for Unicode line breaks or when no line break is found
631+
while let Some(ch) = self.cursor.peek() {
632+
self.cursor.advance();
633+
if ch == 0xe2 {
633634
// Check for line separator (U+2028) and paragraph separator (U+2029)
634635
let bytes = self.cursor.peek_n(2);
635636
if bytes.len() == 2 && bytes[0] == 0x80 && (bytes[1] == 0xa8 || bytes[1] == 0xa9) {

0 commit comments

Comments
 (0)
Please sign in to comment.