Skip to content

Commit 8cec9ec

Browse files
authored
Rollup merge of rust-lang#111543 - Urgau:uplift_invalid_utf8_in_unchecked, r=WaffleLapkin
Uplift `clippy::invalid_utf8_in_unchecked` lint This PR aims at uplifting the `clippy::invalid_utf8_in_unchecked` lint into two lints. ## `invalid_from_utf8_unchecked` (deny-by-default) The `invalid_from_utf8_unchecked` lint checks for calls to `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut` with an invalid UTF-8 literal. ### Example ```rust unsafe { std::str::from_utf8_unchecked(b"cl\x82ippy"); } ``` ### Explanation Creating such a `str` would result in undefined behavior as per documentation for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`. ## `invalid_from_utf8` (warn-by-default) The `invalid_from_utf8` lint checks for calls to `std::str::from_utf8` and `std::str::from_utf8_mut` with an invalid UTF-8 literal. ### Example ```rust std::str::from_utf8(b"ru\x82st"); ``` ### Explanation Trying to create such a `str` would always return an error as per documentation for `std::str::from_utf8` and `std::str::from_utf8_mut`. ----- Mostly followed the instructions for uplifting a clippy lint described here: rust-lang#99696 (review) ```@rustbot``` label: +I-lang-nominated r? compiler ----- For Clippy: changelog: Moves: Uplifted `clippy::invalid_utf8_in_unchecked` into rustc
2 parents 39ed4e2 + b84c190 commit 8cec9ec

19 files changed

+423
-168
lines changed

compiler/rustc_lint/messages.ftl

+8
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,14 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
304304
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
305305
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
306306
307+
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
308+
lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
309+
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
310+
311+
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
312+
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
313+
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
314+
307315
lint_lintpass_by_hand = implementing `LintPass` by hand
308316
.help = try using `declare_lint_pass!` or `impl_lint_pass!` instead
309317
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
use std::str::Utf8Error;
2+
3+
use rustc_ast::{BorrowKind, LitKind};
4+
use rustc_hir::{Expr, ExprKind};
5+
use rustc_span::source_map::Spanned;
6+
use rustc_span::sym;
7+
8+
use crate::lints::InvalidFromUtf8Diag;
9+
use crate::{LateContext, LateLintPass, LintContext};
10+
11+
declare_lint! {
12+
/// The `invalid_from_utf8_unchecked` lint checks for calls to
13+
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
14+
/// with an invalid UTF-8 literal.
15+
///
16+
/// ### Example
17+
///
18+
/// ```rust,compile_fail
19+
/// # #[allow(unused)]
20+
/// unsafe {
21+
/// std::str::from_utf8_unchecked(b"Ru\x82st");
22+
/// }
23+
/// ```
24+
///
25+
/// {{produces}}
26+
///
27+
/// ### Explanation
28+
///
29+
/// Creating such a `str` would result in undefined behavior as per documentation
30+
/// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
31+
pub INVALID_FROM_UTF8_UNCHECKED,
32+
Deny,
33+
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
34+
}
35+
36+
declare_lint! {
37+
/// The `invalid_from_utf8` lint checks for calls to
38+
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
39+
/// with an invalid UTF-8 literal.
40+
///
41+
/// ### Example
42+
///
43+
/// ```rust
44+
/// # #[allow(unused)]
45+
/// std::str::from_utf8(b"Ru\x82st");
46+
/// ```
47+
///
48+
/// {{produces}}
49+
///
50+
/// ### Explanation
51+
///
52+
/// Trying to create such a `str` would always return an error as per documentation
53+
/// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
54+
pub INVALID_FROM_UTF8,
55+
Warn,
56+
"using a non UTF-8 literal in `std::str::from_utf8`"
57+
}
58+
59+
declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);
60+
61+
impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
62+
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
63+
if let ExprKind::Call(path, [arg]) = expr.kind
64+
&& let ExprKind::Path(ref qpath) = path.kind
65+
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
66+
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
67+
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
68+
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
69+
{
70+
let lint = |utf8_error: Utf8Error| {
71+
let label = arg.span;
72+
let method = diag_item.as_str().strip_prefix("str_").unwrap();
73+
let method = format!("std::str::{method}");
74+
let valid_up_to = utf8_error.valid_up_to();
75+
let is_unchecked_variant = diag_item.as_str().contains("unchecked");
76+
77+
cx.emit_spanned_lint(
78+
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
79+
expr.span,
80+
if is_unchecked_variant {
81+
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
82+
} else {
83+
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
84+
}
85+
)
86+
};
87+
88+
match &arg.kind {
89+
ExprKind::Lit(Spanned { node: lit, .. }) => {
90+
if let LitKind::ByteStr(bytes, _) = &lit
91+
&& let Err(utf8_error) = std::str::from_utf8(bytes)
92+
{
93+
lint(utf8_error);
94+
}
95+
},
96+
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
97+
let elements = args.iter().map(|e|{
98+
match &e.kind {
99+
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
100+
LitKind::Byte(b) => Some(*b),
101+
LitKind::Int(b, _) => Some(*b as u8),
102+
_ => None
103+
}
104+
_ => None
105+
}
106+
}).collect::<Option<Vec<_>>>();
107+
108+
if let Some(elements) = elements
109+
&& let Err(utf8_error) = std::str::from_utf8(&elements)
110+
{
111+
lint(utf8_error);
112+
}
113+
}
114+
_ => {}
115+
}
116+
}
117+
}
118+
}

compiler/rustc_lint/src/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ mod expect;
6060
mod for_loops_over_fallibles;
6161
pub mod hidden_unicode_codepoints;
6262
mod internal;
63+
mod invalid_from_utf8;
6364
mod late;
6465
mod let_underscore;
6566
mod levels;
@@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
102103
use for_loops_over_fallibles::*;
103104
use hidden_unicode_codepoints::*;
104105
use internal::*;
106+
use invalid_from_utf8::*;
105107
use let_underscore::*;
106108
use map_unit_fn::*;
107109
use methods::*;
@@ -207,6 +209,7 @@ late_lint_methods!(
207209
HardwiredLints: HardwiredLints,
208210
ImproperCTypesDeclarations: ImproperCTypesDeclarations,
209211
ImproperCTypesDefinitions: ImproperCTypesDefinitions,
212+
InvalidFromUtf8: InvalidFromUtf8,
210213
VariantSizeDifferences: VariantSizeDifferences,
211214
BoxPointers: BoxPointers,
212215
PathStatements: PathStatements,

compiler/rustc_lint/src/lints.rs

+19
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,25 @@ pub struct ForgetCopyDiag<'a> {
699699
pub label: Span,
700700
}
701701

702+
// invalid_from_utf8.rs
703+
#[derive(LintDiagnostic)]
704+
pub enum InvalidFromUtf8Diag {
705+
#[diag(lint_invalid_from_utf8_unchecked)]
706+
Unchecked {
707+
method: String,
708+
valid_up_to: usize,
709+
#[label]
710+
label: Span,
711+
},
712+
#[diag(lint_invalid_from_utf8_checked)]
713+
Checked {
714+
method: String,
715+
valid_up_to: usize,
716+
#[label]
717+
label: Span,
718+
},
719+
}
720+
702721
// hidden_unicode_codepoints.rs
703722
#[derive(LintDiagnostic)]
704723
#[diag(lint_hidden_unicode_codepoints)]

compiler/rustc_span/src/symbol.rs

+4
Original file line numberDiff line numberDiff line change
@@ -1454,6 +1454,10 @@ symbols! {
14541454
stop_after_dataflow,
14551455
store,
14561456
str,
1457+
str_from_utf8,
1458+
str_from_utf8_mut,
1459+
str_from_utf8_unchecked,
1460+
str_from_utf8_unchecked_mut,
14571461
str_split_whitespace,
14581462
str_trim,
14591463
str_trim_end,

library/alloc/tests/str.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![cfg_attr(not(bootstrap), allow(invalid_from_utf8))]
2+
13
use std::assert_matches::assert_matches;
24
use std::borrow::Cow;
35
use std::cmp::Ordering::{Equal, Greater, Less};

library/core/src/str/converts.rs

+4
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ use super::Utf8Error;
8484
#[stable(feature = "rust1", since = "1.0.0")]
8585
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
8686
#[rustc_allow_const_fn_unstable(str_internals)]
87+
#[rustc_diagnostic_item = "str_from_utf8"]
8788
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
8889
// FIXME: This should use `?` again, once it's `const`
8990
match run_utf8_validation(v) {
@@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
127128
/// errors that can be returned.
128129
#[stable(feature = "str_mut_extras", since = "1.20.0")]
129130
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
131+
#[rustc_diagnostic_item = "str_from_utf8_mut"]
130132
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
131133
// This should use `?` again, once it's `const`
132134
match run_utf8_validation(v) {
@@ -167,6 +169,7 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
167169
#[must_use]
168170
#[stable(feature = "rust1", since = "1.0.0")]
169171
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")]
172+
#[rustc_diagnostic_item = "str_from_utf8_unchecked"]
170173
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
171174
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
172175
// Also relies on `&str` and `&[u8]` having the same layout.
@@ -194,6 +197,7 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
194197
#[must_use]
195198
#[stable(feature = "str_mut_extras", since = "1.20.0")]
196199
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "91005")]
200+
#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"]
197201
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
198202
// SAFETY: the caller must guarantee that the bytes `v`
199203
// are valid UTF-8, thus the cast to `*mut str` is safe.

src/tools/clippy/clippy_lints/src/declared_lints.rs

-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,6 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
212212
crate::instant_subtraction::UNCHECKED_DURATION_SUBTRACTION_INFO,
213213
crate::int_plus_one::INT_PLUS_ONE_INFO,
214214
crate::invalid_upcast_comparisons::INVALID_UPCAST_COMPARISONS_INFO,
215-
crate::invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED_INFO,
216215
crate::items_after_statements::ITEMS_AFTER_STATEMENTS_INFO,
217216
crate::items_after_test_module::ITEMS_AFTER_TEST_MODULE_INFO,
218217
crate::iter_not_returning_iterator::ITER_NOT_RETURNING_ITERATOR_INFO,

src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs

-74
This file was deleted.

src/tools/clippy/clippy_lints/src/lib.rs

-2
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,6 @@ mod inline_fn_without_body;
157157
mod instant_subtraction;
158158
mod int_plus_one;
159159
mod invalid_upcast_comparisons;
160-
mod invalid_utf8_in_unchecked;
161160
mod items_after_statements;
162161
mod items_after_test_module;
163162
mod iter_not_returning_iterator;
@@ -937,7 +936,6 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
937936
store.register_late_pass(move |_| Box::new(manual_retain::ManualRetain::new(msrv())));
938937
let verbose_bit_mask_threshold = conf.verbose_bit_mask_threshold;
939938
store.register_late_pass(move |_| Box::new(operators::Operators::new(verbose_bit_mask_threshold)));
940-
store.register_late_pass(|_| Box::new(invalid_utf8_in_unchecked::InvalidUtf8InUnchecked));
941939
store.register_late_pass(|_| Box::<std_instead_of_core::StdReexports>::default());
942940
store.register_late_pass(move |_| Box::new(instant_subtraction::InstantSubtraction::new(msrv())));
943941
store.register_late_pass(|_| Box::new(partialeq_to_none::PartialeqToNone));

src/tools/clippy/clippy_lints/src/renamed_lints.rs

+1
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ pub static RENAMED_LINTS: &[(&str, &str)] = &[
4343
("clippy::into_iter_on_array", "array_into_iter"),
4444
("clippy::invalid_atomic_ordering", "invalid_atomic_ordering"),
4545
("clippy::invalid_ref", "invalid_value"),
46+
("clippy::invalid_utf8_in_unchecked", "invalid_from_utf8_unchecked"),
4647
("clippy::let_underscore_drop", "let_underscore_drop"),
4748
("clippy::mem_discriminant_non_enum", "enum_intrinsics_non_enums"),
4849
("clippy::panic_params", "non_fmt_panics"),

src/tools/clippy/tests/ui/invalid_utf8_in_unchecked.rs

-20
This file was deleted.

src/tools/clippy/tests/ui/invalid_utf8_in_unchecked.stderr

-22
This file was deleted.

src/tools/clippy/tests/ui/rename.fixed

+2
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#![allow(array_into_iter)]
3939
#![allow(invalid_atomic_ordering)]
4040
#![allow(invalid_value)]
41+
#![allow(invalid_from_utf8_unchecked)]
4142
#![allow(let_underscore_drop)]
4243
#![allow(enum_intrinsics_non_enums)]
4344
#![allow(non_fmt_panics)]
@@ -87,6 +88,7 @@
8788
#![warn(array_into_iter)]
8889
#![warn(invalid_atomic_ordering)]
8990
#![warn(invalid_value)]
91+
#![warn(invalid_from_utf8_unchecked)]
9092
#![warn(let_underscore_drop)]
9193
#![warn(enum_intrinsics_non_enums)]
9294
#![warn(non_fmt_panics)]

0 commit comments

Comments
 (0)