Skip to content

Commit 6663638

Browse files
tiehuisandrewrk
authored andcommitted
Improve invalid character error messages (#566)
See #544
1 parent f4ca348 commit 6663638

File tree

2 files changed

+70
-9
lines changed

2 files changed

+70
-9
lines changed

src/tokenizer.cpp

+47-9
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,44 @@ static void handle_string_escape(Tokenize *t, uint8_t c) {
416416
}
417417
}
418418

419+
static const char* get_escape_shorthand(uint8_t c) {
420+
switch (c) {
421+
case '\0':
422+
return "\\0";
423+
case '\a':
424+
return "\\a";
425+
case '\b':
426+
return "\\b";
427+
case '\t':
428+
return "\\t";
429+
case '\n':
430+
return "\\n";
431+
case '\v':
432+
return "\\v";
433+
case '\f':
434+
return "\\f";
435+
case '\r':
436+
return "\\r";
437+
default:
438+
return nullptr;
439+
}
440+
}
441+
442+
static void invalid_char_error(Tokenize *t, uint8_t c) {
443+
if (c == '\r') {
444+
tokenize_error(t, "invalid carriage return, only '\\n' line endings are supported");
445+
} else if (isprint(c)) {
446+
tokenize_error(t, "invalid character: '%c'", c);
447+
} else {
448+
const char *sh = get_escape_shorthand(c);
449+
if (sh) {
450+
tokenize_error(t, "invalid character: '%s'", sh);
451+
} else {
452+
tokenize_error(t, "invalid character: '\\x%x'", c);
453+
}
454+
}
455+
}
456+
419457
void tokenize(Buf *buf, Tokenization *out) {
420458
Tokenize t = {0};
421459
t.out = out;
@@ -580,7 +618,7 @@ void tokenize(Buf *buf, Tokenization *out) {
580618
t.state = TokenizeStateSawQuestionMark;
581619
break;
582620
default:
583-
tokenize_error(&t, "invalid character: '%c'", c);
621+
invalid_char_error(&t, c);
584622
}
585623
break;
586624
case TokenizeStateSawQuestionMark:
@@ -890,7 +928,7 @@ void tokenize(Buf *buf, Tokenization *out) {
890928
t.state = TokenizeStateLineString;
891929
break;
892930
default:
893-
tokenize_error(&t, "invalid character: '%c'", c);
931+
invalid_char_error(&t, c);
894932
break;
895933
}
896934
break;
@@ -919,7 +957,7 @@ void tokenize(Buf *buf, Tokenization *out) {
919957
break;
920958
case '\\':
921959
if (t.cur_tok->data.str_lit.is_c_str) {
922-
tokenize_error(&t, "invalid character: '%c'", c);
960+
invalid_char_error(&t, c);
923961
}
924962
t.state = TokenizeStateLineStringContinue;
925963
break;
@@ -949,7 +987,7 @@ void tokenize(Buf *buf, Tokenization *out) {
949987
buf_append_char(&t.cur_tok->data.str_lit.str, '\n');
950988
break;
951989
default:
952-
tokenize_error(&t, "invalid character: '%c'", c);
990+
invalid_char_error(&t, c);
953991
break;
954992
}
955993
break;
@@ -1073,7 +1111,7 @@ void tokenize(Buf *buf, Tokenization *out) {
10731111
handle_string_escape(&t, '\"');
10741112
break;
10751113
default:
1076-
tokenize_error(&t, "invalid character: '%c'", c);
1114+
invalid_char_error(&t, c);
10771115
}
10781116
break;
10791117
case TokenizeStateCharCode:
@@ -1147,7 +1185,7 @@ void tokenize(Buf *buf, Tokenization *out) {
11471185
t.state = TokenizeStateStart;
11481186
break;
11491187
default:
1150-
tokenize_error(&t, "invalid character: '%c'", c);
1188+
invalid_char_error(&t, c);
11511189
}
11521190
break;
11531191
case TokenizeStateZero:
@@ -1189,7 +1227,7 @@ void tokenize(Buf *buf, Tokenization *out) {
11891227
uint32_t digit_value = get_digit_value(c);
11901228
if (digit_value >= t.radix) {
11911229
if (is_symbol_char(c)) {
1192-
tokenize_error(&t, "invalid character: '%c'", c);
1230+
invalid_char_error(&t, c);
11931231
}
11941232
// not my char
11951233
t.pos -= 1;
@@ -1233,7 +1271,7 @@ void tokenize(Buf *buf, Tokenization *out) {
12331271
uint32_t digit_value = get_digit_value(c);
12341272
if (digit_value >= t.radix) {
12351273
if (is_symbol_char(c)) {
1236-
tokenize_error(&t, "invalid character: '%c'", c);
1274+
invalid_char_error(&t, c);
12371275
}
12381276
// not my char
12391277
t.pos -= 1;
@@ -1282,7 +1320,7 @@ void tokenize(Buf *buf, Tokenization *out) {
12821320
uint32_t digit_value = get_digit_value(c);
12831321
if (digit_value >= t.radix) {
12841322
if (is_symbol_char(c)) {
1285-
tokenize_error(&t, "invalid character: '%c'", c);
1323+
invalid_char_error(&t, c);
12861324
}
12871325
// not my char
12881326
t.pos -= 1;

test/compile_errors.zig

+23
Original file line numberDiff line numberDiff line change
@@ -2252,4 +2252,27 @@ pub fn addCases(cases: &tests.CompileErrorContext) {
22522252
\\}
22532253
,
22542254
".tmp_source.zig:9:13: error: type '&MyType' does not support field access");
2255+
2256+
cases.add("carriage return special case",
2257+
"fn test() -> bool {\r\n" ++
2258+
" true\r\n" ++
2259+
"}\r\n"
2260+
,
2261+
".tmp_source.zig:1:20: error: invalid carriage return, only '\\n' line endings are supported");
2262+
2263+
cases.add("non-printable invalid character",
2264+
"\xff\xfe" ++
2265+
\\fn test() -> bool {\r
2266+
\\ true\r
2267+
\\}
2268+
,
2269+
".tmp_source.zig:1:1: error: invalid character: '\\xff'");
2270+
2271+
cases.add("non-printable invalid character with escape alternative",
2272+
"fn test() -> bool {\n" ++
2273+
"\ttrue\n" ++
2274+
"}\n"
2275+
,
2276+
".tmp_source.zig:2:1: error: invalid character: '\\t'");
2277+
22552278
}

0 commit comments

Comments
 (0)