forked from rust-lang/rust
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnon_ascii_idents.rs
176 lines (162 loc) · 5.61 KB
/
non_ascii_idents.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
use crate::{EarlyContext, EarlyLintPass, LintContext};
use rustc_ast::ast;
use rustc_data_structures::fx::FxHashMap;
use rustc_span::symbol::{Ident, SymbolStr};
use std::hash::{Hash, Hasher};
use std::ops::Deref;
declare_lint! {
pub NON_ASCII_IDENTS,
Allow,
"detects non-ASCII identifiers",
crate_level_only
}
declare_lint! {
pub UNCOMMON_CODEPOINTS,
Warn,
"detects uncommon Unicode codepoints in identifiers",
crate_level_only
}
// FIXME: Change this to warn.
declare_lint! {
pub CONFUSABLE_IDENTS,
Allow,
"detects visually confusable pairs between identifiers",
crate_level_only
}
declare_lint_pass!(NonAsciiIdents => [NON_ASCII_IDENTS, UNCOMMON_CODEPOINTS, CONFUSABLE_IDENTS]);
enum CowBoxSymStr {
Interned(SymbolStr),
Owned(Box<str>),
}
impl Deref for CowBoxSymStr {
type Target = str;
fn deref(&self) -> &str {
match self {
CowBoxSymStr::Interned(interned) => interned,
CowBoxSymStr::Owned(ref owned) => owned,
}
}
}
impl Hash for CowBoxSymStr {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
Hash::hash(&**self, state)
}
}
impl PartialEq<CowBoxSymStr> for CowBoxSymStr {
#[inline]
fn eq(&self, other: &CowBoxSymStr) -> bool {
PartialEq::eq(&**self, &**other)
}
}
impl Eq for CowBoxSymStr {}
fn calc_skeleton(symbol_str: SymbolStr, buffer: &'_ mut String) -> CowBoxSymStr {
use std::mem::swap;
use unicode_security::confusable_detection::skeleton;
buffer.clear();
buffer.extend(skeleton(&symbol_str));
if symbol_str == *buffer {
CowBoxSymStr::Interned(symbol_str)
} else {
let mut owned = String::new();
swap(buffer, &mut owned);
CowBoxSymStr::Owned(owned.into_boxed_str())
}
}
fn is_in_ascii_confusable_closure(c: char) -> bool {
// FIXME: move this table to `unicode_security` crate.
// data here corresponds to Unicode 13.
const ASCII_CONFUSABLE_CLOSURE: &[(u64, u64)] = &[(0x00, 0x7f), (0xba, 0xba), (0x2080, 0x2080)];
let c = c as u64;
for &(range_start, range_end) in ASCII_CONFUSABLE_CLOSURE {
if c >= range_start && c <= range_end {
return true;
}
}
false
}
fn is_in_ascii_confusable_closure_relevant_list(c: char) -> bool {
// FIXME: move this table to `unicode_security` crate.
// data here corresponds to Unicode 13.
const ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST: &[u64] = &[
0x22, 0x25, 0x27, 0x2f, 0x30, 0x31, 0x49, 0x4f, 0x60, 0x6c, 0x6d, 0x6e, 0x72, 0x7c, 0xba,
0x2080,
];
let c = c as u64;
for &item in ASCII_CONFUSABLE_CLOSURE_RELEVANT_LIST {
if c == item {
return true;
}
}
false
}
impl EarlyLintPass for NonAsciiIdents {
fn check_crate(&mut self, cx: &EarlyContext<'_>, _: &ast::Crate) {
use rustc_session::lint::Level;
if cx.builder.lint_level(CONFUSABLE_IDENTS).0 == Level::Allow {
return;
}
let symbols = cx.sess.parse_sess.symbol_gallery.symbols.lock();
let mut symbol_strs_and_spans = Vec::with_capacity(symbols.len());
let mut in_fast_path = true;
for (symbol, sp) in symbols.iter() {
// fast path
let symbol_str = symbol.as_str();
if !symbol_str.chars().all(is_in_ascii_confusable_closure) {
// fallback to slow path.
symbol_strs_and_spans.clear();
in_fast_path = false;
break;
}
if symbol_str.chars().any(is_in_ascii_confusable_closure_relevant_list) {
symbol_strs_and_spans.push((symbol_str, *sp));
}
}
if !in_fast_path {
// slow path
for (symbol, sp) in symbols.iter() {
let symbol_str = symbol.as_str();
symbol_strs_and_spans.push((symbol_str, *sp));
}
}
drop(symbols);
symbol_strs_and_spans.sort_by_key(|x| x.0.clone());
let mut skeleton_map =
FxHashMap::with_capacity_and_hasher(symbol_strs_and_spans.len(), Default::default());
let mut str_buf = String::new();
for (symbol_str, sp) in symbol_strs_and_spans {
let skeleton = calc_skeleton(symbol_str.clone(), &mut str_buf);
skeleton_map
.entry(skeleton)
.and_modify(|(existing_symbolstr, existing_span)| {
cx.struct_span_lint(CONFUSABLE_IDENTS, sp, |lint| {
lint.build(&format!(
"identifier pair considered confusable between `{}` and `{}`",
existing_symbolstr, symbol_str
))
.span_label(
*existing_span,
"this is where the previous identifier occurred",
)
.emit();
});
})
.or_insert((symbol_str, sp));
}
}
fn check_ident(&mut self, cx: &EarlyContext<'_>, ident: Ident) {
use unicode_security::GeneralSecurityProfile;
let name_str = ident.name.as_str();
if name_str.is_ascii() {
return;
}
cx.struct_span_lint(NON_ASCII_IDENTS, ident.span, |lint| {
lint.build("identifier contains non-ASCII characters").emit()
});
if !name_str.chars().all(GeneralSecurityProfile::identifier_allowed) {
cx.struct_span_lint(UNCOMMON_CODEPOINTS, ident.span, |lint| {
lint.build("identifier contains uncommon Unicode codepoints").emit()
})
}
}
}