Skip to content

Commit 4314dff

Browse files
committed
Delay string literal unescaping.
Currently string literals are unescaped twice. - Once during lexing in `cook_quoted`/`cook_c_string`/`cook_common`. This one just checks for errors. - Again in `LitKind::from_token_lit`, which is mostly called when lowering AST to HIR, but also in a few other places during expansion. This one actually constructs the unescaped string. It also has error checking code, but that code handling the error cases is actually dead (and has several bugs) because the check during lexing catches all errors! This commit removes the checking during lexing, and fixes up `LitKind::from_token_lit` so it properly does both checking and construction. This is a language change: some programs now compile that previously did not. For example, it is now possible for macros to be passed "invalid" string literals like "\a\b\c". This is a continuation of a trend of delaying semantic error checking of literals to after expansion, e.g. #102944 did this for some cases for numeric literals, and the detection of NUL chars in C string literals is already delayed in this way. XXX: have Session::report_lit_errors? XXX: have LitKind::from_token_lit so you don't need the .0? Things to note: - `LitError` has a new `EscapeError` variant. - `LitKind::from_token_lit`'s return value changed, to produce multiple errors/warnings, and also to handle lexer warnings. This latter case is annoying but necessary to preserve existing warning behaviour. - `report_lit_error` becomes `report_lit_errors`, in order to handle multiple errors in a single string literal. Notes about test changes: - `tests/rustdoc-ui/ignore-block-help.rs`: this relies on a parsing error occurring. The error present was an unescaping error, which is now delayed to after parsing. So the commit changes it to an "unterminated character literal" error which continues to occurs during parsing. - Several tests had unescaping errors combined with unterminated literal errors. The former are now delayed but the latter remain as lexing errors. So the unterminated literal part needed to be split into a separate test file otherwise compilation would end before the other errors were reported. - issue-62913.rs: The structure and output changed a bit. Issue #62913 was about an ICE due to an unterminated string literal, so the new version should be good enough. - literals-are-validated-before-expansion.rs: this tests exactly the behaviour that has been changed, and so was removed XXX: insert a new test covering more of that - A couple of other test produce the same errors, just in a different order.
1 parent 6f6d73b commit 4314dff

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+488
-438
lines changed

compiler/rustc_ast/src/attr/mod.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ impl AttrArgsEq {
240240
match self {
241241
AttrArgsEq::Ast(expr) => match expr.kind {
242242
ExprKind::Lit(token_lit) => {
243-
LitKind::from_token_lit(token_lit).ok().and_then(|lit| lit.str())
243+
LitKind::from_token_lit(token_lit).0.ok().and_then(|lit| lit.str())
244244
}
245245
_ => None,
246246
},
@@ -426,6 +426,7 @@ impl MetaItemKind {
426426
ExprKind::Lit(token_lit) => {
427427
// Turn failures to `None`, we'll get parse errors elsewhere.
428428
MetaItemLit::from_token_lit(token_lit, expr.span)
429+
.0
429430
.ok()
430431
.map(|lit| MetaItemKind::NameValue(lit))
431432
}

compiler/rustc_ast/src/util/literal.rs

+186-80
Large diffs are not rendered by default.

compiler/rustc_ast_lowering/src/expr.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use rustc_data_structures::stack::ensure_sufficient_stack;
1414
use rustc_hir as hir;
1515
use rustc_hir::def::{DefKind, Res};
1616
use rustc_middle::span_bug;
17-
use rustc_parse::parser::report_lit_error;
17+
use rustc_parse::parser::report_lit_errors;
1818
use rustc_span::source_map::{respan, Spanned};
1919
use rustc_span::symbol::{kw, sym, Ident, Symbol};
2020
use rustc_span::DUMMY_SP;
@@ -119,13 +119,12 @@ impl<'hir> LoweringContext<'_, 'hir> {
119119
hir::ExprKind::Unary(op, ohs)
120120
}
121121
ExprKind::Lit(token_lit) => {
122-
let lit_kind = match LitKind::from_token_lit(*token_lit) {
122+
let (result, errs) = LitKind::from_token_lit(*token_lit);
123+
let lit_kind = match result {
123124
Ok(lit_kind) => lit_kind,
124-
Err(err) => {
125-
report_lit_error(&self.tcx.sess.parse_sess, err, *token_lit, e.span);
126-
LitKind::Err
127-
}
125+
Err(()) => LitKind::Err,
128126
};
127+
report_lit_errors(&self.tcx.sess.parse_sess, errs, *token_lit, e.span);
129128
let lit = self.arena.alloc(respan(self.lower_span(e.span), lit_kind));
130129
hir::ExprKind::Lit(lit)
131130
}

compiler/rustc_ast_lowering/src/format.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -127,11 +127,11 @@ fn inline_literals(mut fmt: Cow<'_, FormatArgs>) -> Cow<'_, FormatArgs> {
127127
&& let ExprKind::Lit(lit) = arg.kind
128128
{
129129
if let token::LitKind::Str | token::LitKind::StrRaw(_) = lit.kind
130-
&& let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit)
130+
&& let Ok(LitKind::Str(s, _)) = LitKind::from_token_lit(lit).0
131131
{
132132
literal = Some(s);
133133
} else if let token::LitKind::Integer = lit.kind
134-
&& let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit)
134+
&& let Ok(LitKind::Int(n, _)) = LitKind::from_token_lit(lit).0
135135
{
136136
literal = Some(Symbol::intern(&n.to_string()));
137137
}

compiler/rustc_ast_lowering/src/lib.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -948,7 +948,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
948948
// In valid code the value always ends up as a single literal. Otherwise, a dummy
949949
// literal suffices because the error is handled elsewhere.
950950
let lit = if let ExprKind::Lit(token_lit) = expr.kind
951-
&& let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span)
951+
&& let Ok(lit) = MetaItemLit::from_token_lit(token_lit, expr.span).0
952952
{
953953
lit
954954
} else {

compiler/rustc_builtin_macros/src/concat.rs

+33-32
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use rustc_ast as ast;
22
use rustc_ast::tokenstream::TokenStream;
33
use rustc_expand::base::{self, DummyResult};
4-
use rustc_parse::parser::report_lit_error;
4+
use rustc_parse::parser::report_lit_errors;
55
use rustc_span::symbol::Symbol;
66

77
use crate::errors;
@@ -19,48 +19,49 @@ pub fn expand_concat(
1919
let mut has_errors = false;
2020
for e in es {
2121
match e.kind {
22-
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
23-
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
24-
accumulator.push_str(s.as_str());
25-
}
26-
Ok(ast::LitKind::Char(c)) => {
27-
accumulator.push(c);
28-
}
29-
Ok(ast::LitKind::Int(i, _)) => {
30-
accumulator.push_str(&i.to_string());
31-
}
32-
Ok(ast::LitKind::Bool(b)) => {
33-
accumulator.push_str(&b.to_string());
34-
}
35-
Ok(ast::LitKind::CStr(..)) => {
36-
cx.emit_err(errors::ConcatCStrLit { span: e.span });
37-
has_errors = true;
38-
}
39-
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
40-
cx.emit_err(errors::ConcatBytestr { span: e.span });
41-
has_errors = true;
42-
}
43-
Ok(ast::LitKind::Err) => {
44-
has_errors = true;
45-
}
46-
Err(err) => {
47-
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
48-
has_errors = true;
22+
ast::ExprKind::Lit(token_lit) => {
23+
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
24+
match res {
25+
Ok(ast::LitKind::Str(s, _) | ast::LitKind::Float(s, _)) => {
26+
accumulator.push_str(s.as_str());
27+
}
28+
Ok(ast::LitKind::Char(c)) => {
29+
accumulator.push(c);
30+
}
31+
Ok(ast::LitKind::Int(i, _)) => {
32+
accumulator.push_str(&i.to_string());
33+
}
34+
Ok(ast::LitKind::Bool(b)) => {
35+
accumulator.push_str(&b.to_string());
36+
}
37+
Ok(ast::LitKind::CStr(..)) => {
38+
cx.emit_err(errors::ConcatCStrLit { span: e.span });
39+
has_errors = true;
40+
}
41+
Ok(ast::LitKind::Byte(..) | ast::LitKind::ByteStr(..)) => {
42+
cx.emit_err(errors::ConcatBytestr { span: e.span });
43+
has_errors = true;
44+
}
45+
Ok(ast::LitKind::Err) | Err(()) => {
46+
has_errors = true;
47+
}
4948
}
50-
},
49+
report_lit_errors(&cx.sess.parse_sess, errs, token_lit, e.span);
50+
}
5151
// We also want to allow negative numeric literals.
5252
ast::ExprKind::Unary(ast::UnOp::Neg, ref expr)
5353
if let ast::ExprKind::Lit(token_lit) = expr.kind =>
5454
{
55-
match ast::LitKind::from_token_lit(token_lit) {
55+
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
56+
match res {
5657
Ok(ast::LitKind::Int(i, _)) => accumulator.push_str(&format!("-{i}")),
5758
Ok(ast::LitKind::Float(f, _)) => accumulator.push_str(&format!("-{f}")),
58-
Err(err) => {
59-
report_lit_error(&cx.sess.parse_sess, err, token_lit, e.span);
59+
Err(()) => {
6060
has_errors = true;
6161
}
6262
_ => missing_literal.push(e.span),
6363
}
64+
report_lit_errors(&cx.sess.parse_sess, errs, token_lit, e.span);
6465
}
6566
ast::ExprKind::IncludedBytes(..) => {
6667
cx.emit_err(errors::ConcatBytestr { span: e.span });

compiler/rustc_builtin_macros/src/concat_bytes.rs

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use rustc_ast as ast;
22
use rustc_ast::{ptr::P, tokenstream::TokenStream};
33
use rustc_expand::base::{self, DummyResult};
4-
use rustc_parse::parser::report_lit_error;
4+
use rustc_parse::parser::report_lit_errors;
55
use rustc_span::Span;
66

77
use crate::errors;
@@ -17,7 +17,8 @@ fn invalid_type_err(
1717
ConcatBytesInvalid, ConcatBytesInvalidSuggestion, ConcatBytesNonU8, ConcatBytesOob,
1818
};
1919
let snippet = cx.sess.source_map().span_to_snippet(span).ok();
20-
match ast::LitKind::from_token_lit(token_lit) {
20+
let (res, errs) = ast::LitKind::from_token_lit(token_lit);
21+
match res {
2122
Ok(ast::LitKind::CStr(_, _)) => {
2223
// Avoid ambiguity in handling of terminal `NUL` by refusing to
2324
// concatenate C string literals as bytes.
@@ -60,10 +61,9 @@ fn invalid_type_err(
6061
cx.emit_err(ConcatBytesNonU8 { span });
6162
}
6263
Ok(ast::LitKind::ByteStr(..) | ast::LitKind::Byte(_)) => unreachable!(),
63-
Err(err) => {
64-
report_lit_error(&cx.sess.parse_sess, err, token_lit, span);
65-
}
64+
Err(()) => {}
6665
}
66+
report_lit_errors(&cx.sess.parse_sess, errs, token_lit, span);
6767
}
6868

6969
fn handle_array_element(
@@ -80,7 +80,7 @@ fn handle_array_element(
8080
*has_errors = true;
8181
None
8282
}
83-
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
83+
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 {
8484
Ok(ast::LitKind::Int(
8585
val,
8686
ast::LitIntType::Unsuffixed | ast::LitIntType::Unsigned(ast::UintTy::U8),
@@ -141,7 +141,7 @@ pub fn expand_concat_bytes(
141141
ast::ExprKind::Repeat(expr, count) => {
142142
if let ast::ExprKind::Lit(token_lit) = count.value.kind
143143
&& let Ok(ast::LitKind::Int(count_val, _)) =
144-
ast::LitKind::from_token_lit(token_lit)
144+
ast::LitKind::from_token_lit(token_lit).0
145145
{
146146
if let Some(elem) =
147147
handle_array_element(cx, &mut has_errors, &mut missing_literals, expr)
@@ -154,7 +154,7 @@ pub fn expand_concat_bytes(
154154
cx.emit_err(errors::ConcatBytesBadRepeat { span: count.value.span });
155155
}
156156
}
157-
&ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
157+
&ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit).0 {
158158
Ok(ast::LitKind::Byte(val)) => {
159159
accumulator.push(val);
160160
}

compiler/rustc_expand/src/base.rs

+22-20
Original file line numberDiff line numberDiff line change
@@ -1235,26 +1235,28 @@ pub fn expr_to_spanned_string<'a>(
12351235
let expr = cx.expander().fully_expand_fragment(AstFragment::Expr(expr)).make_expr();
12361236

12371237
Err(match expr.kind {
1238-
ast::ExprKind::Lit(token_lit) => match ast::LitKind::from_token_lit(token_lit) {
1239-
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
1240-
Ok(ast::LitKind::ByteStr(..)) => {
1241-
let mut err = cx.struct_span_err(expr.span, err_msg);
1242-
let span = expr.span.shrink_to_lo();
1243-
err.span_suggestion(
1244-
span.with_hi(span.lo() + BytePos(1)),
1245-
"consider removing the leading `b`",
1246-
"",
1247-
Applicability::MaybeIncorrect,
1248-
);
1249-
Some((err, true))
1250-
}
1251-
Ok(ast::LitKind::Err) => None,
1252-
Err(err) => {
1253-
parser::report_lit_error(&cx.sess.parse_sess, err, token_lit, expr.span);
1254-
None
1255-
}
1256-
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
1257-
},
1238+
ast::ExprKind::Lit(token_lit) => {
1239+
let (lit_kind, errs) = ast::LitKind::from_token_lit(token_lit);
1240+
let res = match lit_kind {
1241+
Ok(ast::LitKind::Str(s, style)) => return Ok((s, style, expr.span)),
1242+
Ok(ast::LitKind::ByteStr(..)) => {
1243+
let mut err = cx.struct_span_err(expr.span, err_msg);
1244+
let span = expr.span.shrink_to_lo();
1245+
err.span_suggestion(
1246+
span.with_hi(span.lo() + BytePos(1)),
1247+
"consider removing the leading `b`",
1248+
"",
1249+
Applicability::MaybeIncorrect,
1250+
);
1251+
Some((err, true))
1252+
}
1253+
Ok(ast::LitKind::Err) => None,
1254+
Err(()) => None,
1255+
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
1256+
};
1257+
parser::report_lit_errors(&cx.sess.parse_sess, errs, token_lit, expr.span);
1258+
res
1259+
}
12581260
ast::ExprKind::Err => None,
12591261
_ => Some((cx.struct_span_err(expr.span, err_msg), false)),
12601262
})

compiler/rustc_expand/src/mbe/metavar_expr.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ fn parse_depth<'sess>(
119119
.span_diagnostic
120120
.struct_span_err(span, "meta-variable expression depth must be a literal"));
121121
};
122-
if let Ok(lit_kind) = LitKind::from_token_lit(*lit)
122+
if let Ok(lit_kind) = LitKind::from_token_lit(*lit).0
123123
&& let LitKind::Int(n_u128, LitIntType::Unsuffixed) = lit_kind
124124
&& let Ok(n_usize) = usize::try_from(n_u128)
125125
{

compiler/rustc_lexer/src/unescape.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ where
347347
// them in the range computation.
348348
while let Some(c) = chars.next() {
349349
let start = src.len() - chars.as_str().len() - c.len_utf8();
350-
let res = match c {
350+
let res: Result<T, EscapeError> = match c {
351351
'\\' => {
352352
match chars.clone().next() {
353353
Some('\n') => {

0 commit comments

Comments
 (0)