Skip to content

Commit f3ec00a

Browse files
committed
Add a fast code path to optimize confusable_idents lint for ASCII code base.
1 parent c05961c commit f3ec00a

File tree

1 file changed

+49
-4
lines changed

1 file changed

+49
-4
lines changed

src/librustc_lint/non_ascii_idents.rs

+49-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ use crate::{EarlyContext, EarlyLintPass, LintContext};
22
use rustc_ast::ast;
33
use rustc_data_structures::fx::FxHashMap;
44
use rustc_span::symbol::SymbolStr;
5+
use std::hash::{Hash, Hasher};
6+
use std::ops::Deref;
57

68
declare_lint! {
79
pub NON_ASCII_IDENTS,
@@ -24,9 +26,6 @@ declare_lint! {
2426

2527
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
2628

27-
use std::hash::{Hash, Hasher};
28-
use std::ops::Deref;
29-
3029
enum CowBoxSymStr {
3130
Interned(SymbolStr),
3231
Owned(Box<str>),
@@ -73,6 +72,35 @@ fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr
7372
}
7473
}
7574

75+
fn is_in_ascii_confusable_closure(c: char) -> bool {
76+
// FIXME: move this table to `unicode_security` crate.
77+
// data here corresponds to Unicode 13.
78+
const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
79+
let c = c as u64;
80+
for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
81+
if c >= range_start && c <= range_end {
82+
return true;
83+
}
84+
}
85+
false
86+
}
87+
88+
fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
89+
// FIXME: move this table to `unicode_security` crate.
90+
// data here corresponds to Unicode 13.
91+
const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
92+
0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
93+
0x2080,
94+
];
95+
let c = c as u64;
96+
for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
97+
if c == item {
98+
return true;
99+
}
100+
}
101+
false
102+
}
103+
76104
impl EarlyLintPass for NonAsciiIdents {
77105
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
78106
use rustc_session::lint::Level;
@@ -81,9 +109,26 @@ impl EarlyLintPass for NonAsciiIdents {
81109
}
82110
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
83111
let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
112+
let mut in_fast_path = true;
84113
for (symbol, sp) in symbols.iter() {
114+
// fast path
85115
let symbol_str = symbol.as_str();
86-
symbol_strs_and_spans.push((symbol_str, *sp));
116+
if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
117+
// fallback to slow path.
118+
symbol_strs_and_spans.clear();
119+
in_fast_path = false;
120+
break;
121+
}
122+
if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
123+
symbol_strs_and_spans.push((symbol_str, *sp));
124+
}
125+
}
126+
if !in_fast_path {
127+
// slow path
128+
for (symbol, sp) in symbols.iter() {
129+
let symbol_str = symbol.as_str();
130+
symbol_strs_and_spans.push((symbol_str, *sp));
131+
}
87132
}
88133
drop(symbols);
89134
symbol_strs_and_spans.sort_by_key(|x| x.0.clone());

0 commit comments

Comments
 (0)