|
7 | 7 | use std::fmt;
|
8 | 8 |
|
9 | 9 | use num_bigint::BigInt as BigIntValue;
|
10 |
| -use phf::phf_map; |
11 | 10 | use swc_atoms::Atom;
|
12 | 11 | use swc_common::Span;
|
13 | 12 |
|
@@ -138,6 +137,7 @@ pub enum TokenType {
|
138 | 137 | While = 135,
|
139 | 138 | With = 136,
|
140 | 139 | Yield = 137,
|
| 140 | + Module = 138, |
141 | 141 |
|
142 | 142 | // TypeScript-related keywords (starting from 150)
|
143 | 143 | Abstract = 150,
|
@@ -465,6 +465,7 @@ impl TokenType {
|
465 | 465 | TokenType::Shebang => "#!",
|
466 | 466 | TokenType::EOF => "EOF",
|
467 | 467 | TokenType::Invalid => "invalid token",
|
| 468 | + TokenType::Module => "module", |
468 | 469 | }
|
469 | 470 | }
|
470 | 471 | }
|
@@ -601,93 +602,177 @@ impl fmt::Debug for Token {
|
601 | 602 | }
|
602 | 603 | }
|
603 | 604 |
|
604 |
| -// Compile-time keyword to token type mapping using PHF |
605 |
| -static KEYWORDS: phf::Map<&'static str, TokenType> = phf_map! { |
606 |
| - // JavaScript keywords |
607 |
| - "await" => TokenType::Await, |
608 |
| - "break" => TokenType::Break, |
609 |
| - "case" => TokenType::Case, |
610 |
| - "catch" => TokenType::Catch, |
611 |
| - "class" => TokenType::Class, |
612 |
| - "const" => TokenType::Const, |
613 |
| - "continue" => TokenType::Continue, |
614 |
| - "debugger" => TokenType::Debugger, |
615 |
| - "default" => TokenType::Default, |
616 |
| - "delete" => TokenType::Delete, |
617 |
| - "do" => TokenType::Do, |
618 |
| - "else" => TokenType::Else, |
619 |
| - "export" => TokenType::Export, |
620 |
| - "extends" => TokenType::Extends, |
621 |
| - "false" => TokenType::False, |
622 |
| - "finally" => TokenType::Finally, |
623 |
| - "for" => TokenType::For, |
624 |
| - "function" => TokenType::Function, |
625 |
| - "if" => TokenType::If, |
626 |
| - "import" => TokenType::Import, |
627 |
| - "in" => TokenType::In, |
628 |
| - "instanceof" => TokenType::InstanceOf, |
629 |
| - "let" => TokenType::Let, |
630 |
| - "new" => TokenType::New, |
631 |
| - "null" => TokenType::Null, |
632 |
| - "return" => TokenType::Return, |
633 |
| - "super" => TokenType::Super, |
634 |
| - "switch" => TokenType::Switch, |
635 |
| - "this" => TokenType::This, |
636 |
| - "throw" => TokenType::Throw, |
637 |
| - "true" => TokenType::True, |
638 |
| - "try" => TokenType::Try, |
639 |
| - "typeof" => TokenType::TypeOf, |
640 |
| - "var" => TokenType::Var, |
641 |
| - "void" => TokenType::Void, |
642 |
| - "while" => TokenType::While, |
643 |
| - "with" => TokenType::With, |
644 |
| - "yield" => TokenType::Yield, |
645 |
| - |
646 |
| - // TypeScript-related keywords |
647 |
| - "abstract" => TokenType::Abstract, |
648 |
| - "any" => TokenType::Any, |
649 |
| - "as" => TokenType::As, |
650 |
| - "asserts" => TokenType::Asserts, |
651 |
| - "assert" => TokenType::Assert, |
652 |
| - "async" => TokenType::Async, |
653 |
| - "bigint" => TokenType::Bigint, |
654 |
| - "boolean" => TokenType::Boolean, |
655 |
| - "constructor" => TokenType::Constructor, |
656 |
| - "declare" => TokenType::Declare, |
657 |
| - "enum" => TokenType::Enum, |
658 |
| - "from" => TokenType::From, |
659 |
| - "get" => TokenType::Get, |
660 |
| - "global" => TokenType::Global, |
661 |
| - "implements" => TokenType::Implements, |
662 |
| - "interface" => TokenType::Interface, |
663 |
| - "intrinsic" => TokenType::Intrinsic, |
664 |
| - "is" => TokenType::Is, |
665 |
| - "keyof" => TokenType::Keyof, |
666 |
| - "namespace" => TokenType::Namespace, |
667 |
| - "never" => TokenType::Never, |
668 |
| - "number" => TokenType::Number, |
669 |
| - "object" => TokenType::Object, |
670 |
| - "of" => TokenType::Of, |
671 |
| - "package" => TokenType::Package, |
672 |
| - "private" => TokenType::Private, |
673 |
| - "protected" => TokenType::Protected, |
674 |
| - "public" => TokenType::Public, |
675 |
| - "readonly" => TokenType::Readonly, |
676 |
| - "require" => TokenType::Require, |
677 |
| - "set" => TokenType::Set, |
678 |
| - "static" => TokenType::Static, |
679 |
| - "string" => TokenType::String, |
680 |
| - "symbol" => TokenType::Symbol, |
681 |
| - "type" => TokenType::Type, |
682 |
| - "undefined" => TokenType::Undefined, |
683 |
| - "unique" => TokenType::Unique, |
684 |
| - "unknown" => TokenType::Unknown, |
685 |
| - "using" => TokenType::Using, |
| 605 | +struct KeywordEntry(&'static str, TokenType); |
| 606 | + |
| 607 | +/// A static array of KeywordEntry tuples, each containing a keyword |
| 608 | +/// string and its corresponding TokenType. |
| 609 | +static KEYWORD_LOOKUP: [KeywordEntry; 78] = [ |
| 610 | + KeywordEntry("await", TokenType::Await), |
| 611 | + KeywordEntry("break", TokenType::Break), |
| 612 | + KeywordEntry("case", TokenType::Case), |
| 613 | + KeywordEntry("catch", TokenType::Catch), |
| 614 | + KeywordEntry("class", TokenType::Class), |
| 615 | + KeywordEntry("const", TokenType::Const), |
| 616 | + KeywordEntry("continue", TokenType::Continue), |
| 617 | + KeywordEntry("debugger", TokenType::Debugger), |
| 618 | + KeywordEntry("default", TokenType::Default), |
| 619 | + KeywordEntry("delete", TokenType::Delete), |
| 620 | + KeywordEntry("do", TokenType::Do), |
| 621 | + KeywordEntry("else", TokenType::Else), |
| 622 | + KeywordEntry("export", TokenType::Export), |
| 623 | + KeywordEntry("extends", TokenType::Extends), |
| 624 | + KeywordEntry("false", TokenType::False), |
| 625 | + KeywordEntry("finally", TokenType::Finally), |
| 626 | + KeywordEntry("for", TokenType::For), |
| 627 | + KeywordEntry("function", TokenType::Function), |
| 628 | + KeywordEntry("if", TokenType::If), |
| 629 | + KeywordEntry("import", TokenType::Import), |
| 630 | + KeywordEntry("in", TokenType::In), |
| 631 | + KeywordEntry("instanceof", TokenType::InstanceOf), |
| 632 | + KeywordEntry("let", TokenType::Let), |
| 633 | + KeywordEntry("new", TokenType::New), |
| 634 | + KeywordEntry("null", TokenType::Null), |
| 635 | + KeywordEntry("return", TokenType::Return), |
| 636 | + KeywordEntry("super", TokenType::Super), |
| 637 | + KeywordEntry("switch", TokenType::Switch), |
| 638 | + KeywordEntry("this", TokenType::This), |
| 639 | + KeywordEntry("throw", TokenType::Throw), |
| 640 | + KeywordEntry("true", TokenType::True), |
| 641 | + KeywordEntry("try", TokenType::Try), |
| 642 | + KeywordEntry("typeof", TokenType::TypeOf), |
| 643 | + KeywordEntry("var", TokenType::Var), |
| 644 | + KeywordEntry("void", TokenType::Void), |
| 645 | + KeywordEntry("while", TokenType::While), |
| 646 | + KeywordEntry("with", TokenType::With), |
| 647 | + KeywordEntry("yield", TokenType::Yield), |
| 648 | + KeywordEntry("module", TokenType::Module), |
| 649 | + KeywordEntry("abstract", TokenType::Abstract), |
| 650 | + KeywordEntry("any", TokenType::Any), |
| 651 | + KeywordEntry("as", TokenType::As), |
| 652 | + KeywordEntry("asserts", TokenType::Asserts), |
| 653 | + KeywordEntry("assert", TokenType::Assert), |
| 654 | + KeywordEntry("async", TokenType::Async), |
| 655 | + KeywordEntry("bigint", TokenType::Bigint), |
| 656 | + KeywordEntry("boolean", TokenType::Boolean), |
| 657 | + KeywordEntry("constructor", TokenType::Constructor), |
| 658 | + KeywordEntry("declare", TokenType::Declare), |
| 659 | + KeywordEntry("enum", TokenType::Enum), |
| 660 | + KeywordEntry("from", TokenType::From), |
| 661 | + KeywordEntry("get", TokenType::Get), |
| 662 | + KeywordEntry("global", TokenType::Global), |
| 663 | + KeywordEntry("implements", TokenType::Implements), |
| 664 | + KeywordEntry("interface", TokenType::Interface), |
| 665 | + KeywordEntry("intrinsic", TokenType::Intrinsic), |
| 666 | + KeywordEntry("is", TokenType::Is), |
| 667 | + KeywordEntry("keyof", TokenType::Keyof), |
| 668 | + KeywordEntry("namespace", TokenType::Namespace), |
| 669 | + KeywordEntry("never", TokenType::Never), |
| 670 | + KeywordEntry("number", TokenType::Number), |
| 671 | + KeywordEntry("object", TokenType::Object), |
| 672 | + KeywordEntry("of", TokenType::Of), |
| 673 | + KeywordEntry("package", TokenType::Package), |
| 674 | + KeywordEntry("private", TokenType::Private), |
| 675 | + KeywordEntry("protected", TokenType::Protected), |
| 676 | + KeywordEntry("public", TokenType::Public), |
| 677 | + KeywordEntry("readonly", TokenType::Readonly), |
| 678 | + KeywordEntry("require", TokenType::Require), |
| 679 | + KeywordEntry("set", TokenType::Set), |
| 680 | + KeywordEntry("static", TokenType::Static), |
| 681 | + KeywordEntry("string", TokenType::String), |
| 682 | + KeywordEntry("symbol", TokenType::Symbol), |
| 683 | + KeywordEntry("type", TokenType::Type), |
| 684 | + KeywordEntry("undefined", TokenType::Undefined), |
| 685 | + KeywordEntry("unique", TokenType::Unique), |
| 686 | + KeywordEntry("unknown", TokenType::Unknown), |
| 687 | + KeywordEntry("using", TokenType::Using), |
| 688 | +]; |
| 689 | + |
| 690 | +const MAX_KEYWORD_LEN: usize = 16; |
| 691 | + |
| 692 | +const MAX_KEYWORD_SLOT_LEN: usize = 4; |
| 693 | + |
| 694 | +/// Static keyword table for fast keyword lookup |
| 695 | +static KEYWORD_TABLE: [[[u8; MAX_KEYWORD_SLOT_LEN]; 26]; MAX_KEYWORD_LEN] = { |
| 696 | + // Initialize the table with 255 (u8) at each position |
| 697 | + let mut table = [[[255u8; MAX_KEYWORD_SLOT_LEN]; 26]; MAX_KEYWORD_LEN]; |
| 698 | + |
| 699 | + // Iterate over the keyword lookup table |
| 700 | + let mut i = 0; |
| 701 | + while i < KEYWORD_LOOKUP.len() { |
| 702 | + let word = KEYWORD_LOOKUP[i].0; |
| 703 | + let len = word.len(); |
| 704 | + |
| 705 | + // Check if the length of the word is within the valid range |
| 706 | + if len > 0 && len <= 16 { |
| 707 | + let first_char = word.as_bytes()[0]; |
| 708 | + let len_idx = len - 1; |
| 709 | + let char_idx = (first_char - b'a') as usize; |
| 710 | + |
| 711 | + // Find an empty slot in the table for the current word |
| 712 | + let mut slot_idx = 0; |
| 713 | + while slot_idx < MAX_KEYWORD_SLOT_LEN && table[len_idx][char_idx][slot_idx] != 255 { |
| 714 | + slot_idx += 1; |
| 715 | + } |
| 716 | + |
| 717 | + // If an empty slot is found, store the index of the keyword entry in the table |
| 718 | + if slot_idx < MAX_KEYWORD_SLOT_LEN { |
| 719 | + table[len_idx][char_idx][slot_idx] = i as u8; |
| 720 | + } |
| 721 | + } |
| 722 | + i += 1; |
| 723 | + } |
| 724 | + |
| 725 | + // Return the initialized table |
| 726 | + table |
686 | 727 | };
|
687 | 728 |
|
| 729 | +/// Attempts to find a keyword in the static keyword table and returns its |
| 730 | +/// corresponding TokenType. |
| 731 | +/// |
| 732 | +/// This function takes a word as input and checks if it matches any of the |
| 733 | +/// keywords stored in the KEYWORD_TABLE. If a match is found, it returns the |
| 734 | +/// TokenType associated with the keyword. Otherwise, it returns None. |
| 735 | +fn find_keyword_from_table(word: &str) -> Option<TokenType> { |
| 736 | + // Determine the length of the word to check if it's within the valid range |
| 737 | + let len = word.len(); |
| 738 | + if len > 0 && len <= 16 { |
| 739 | + // SAFETY: word len is within 1..=16 bounds |
| 740 | + let first_byte = *unsafe { word.as_bytes().get_unchecked(0) }; |
| 741 | + let len_idx = len - 1; |
| 742 | + let byte_idx = (first_byte - b'a') as usize; |
| 743 | + |
| 744 | + let mut slot_idx = 0; |
| 745 | + while slot_idx < MAX_KEYWORD_SLOT_LEN { |
| 746 | + // Retrieve the index of the keyword entry from the table |
| 747 | + let idx = *unsafe { |
| 748 | + KEYWORD_TABLE |
| 749 | + .get_unchecked(len_idx) |
| 750 | + .get_unchecked(byte_idx) |
| 751 | + .get_unchecked(slot_idx) |
| 752 | + }; |
| 753 | + // If the index is 255, it means we've reached the end of the slot |
| 754 | + if idx == 255 { |
| 755 | + break; |
| 756 | + } |
| 757 | + |
| 758 | + // SAFETY: idx is within bounds |
| 759 | + let entry = unsafe { KEYWORD_LOOKUP.get_unchecked(idx as usize) }; |
| 760 | + |
| 761 | + // Check if the word matches the keyword in the entry |
| 762 | + if entry.0 == word { |
| 763 | + return Some(entry.1); |
| 764 | + } |
| 765 | + |
| 766 | + slot_idx += 1; |
| 767 | + } |
| 768 | + } |
| 769 | + |
| 770 | + None |
| 771 | +} |
| 772 | + |
688 | 773 | /// Convert a keyword string to TokenType
|
689 |
| -/// Uses a PHF map for O(1) time complexity with zero runtime overhead |
690 |
| -/// Optimized with fast-path checks for common keywords |
| 774 | +/// Utilizes the first byte and word length to quickly locate the keyword in the |
| 775 | +/// table Optimized with fast-path checks for common keywords |
691 | 776 | #[inline(always)]
|
692 | 777 | pub fn keyword_to_token_type(word: &str) -> Option<TokenType> {
|
693 | 778 | // Fast path for the most common keywords
|
@@ -772,8 +857,8 @@ pub fn keyword_to_token_type(word: &str) -> Option<TokenType> {
|
772 | 857 | _ => {}
|
773 | 858 | }
|
774 | 859 |
|
775 |
| - // Fallback to the PHF map for less common keywords |
776 |
| - KEYWORDS.get(word).copied() |
| 860 | + // Fallback to KEYWORD_TABLE for less common keywords |
| 861 | + find_keyword_from_table(word) |
777 | 862 | }
|
778 | 863 |
|
779 | 864 | #[cfg(test)]
|
|
0 commit comments