@@ -416,6 +416,44 @@ static void handle_string_escape(Tokenize *t, uint8_t c) {
416
416
}
417
417
}
418
418
419
+ static const char * get_escape_shorthand (uint8_t c) {
420
+ switch (c) {
421
+ case ' \0 ' :
422
+ return " \\ 0" ;
423
+ case ' \a ' :
424
+ return " \\ a" ;
425
+ case ' \b ' :
426
+ return " \\ b" ;
427
+ case ' \t ' :
428
+ return " \\ t" ;
429
+ case ' \n ' :
430
+ return " \\ n" ;
431
+ case ' \v ' :
432
+ return " \\ v" ;
433
+ case ' \f ' :
434
+ return " \\ f" ;
435
+ case ' \r ' :
436
+ return " \\ r" ;
437
+ default :
438
+ return nullptr ;
439
+ }
440
+ }
441
+
442
+ static void invalid_char_error (Tokenize *t, uint8_t c) {
443
+ if (c == ' \r ' ) {
444
+ tokenize_error (t, " invalid carriage return, only '\\ n' line endings are supported" );
445
+ } else if (isprint (c)) {
446
+ tokenize_error (t, " invalid character: '%c'" , c);
447
+ } else {
448
+ const char *sh = get_escape_shorthand (c);
449
+ if (sh) {
450
+ tokenize_error (t, " invalid character: '%s'" , sh);
451
+ } else {
452
+ tokenize_error (t, " invalid character: '\\ x%x'" , c);
453
+ }
454
+ }
455
+ }
456
+
419
457
void tokenize (Buf *buf, Tokenization *out) {
420
458
Tokenize t = {0 };
421
459
t.out = out;
@@ -580,7 +618,7 @@ void tokenize(Buf *buf, Tokenization *out) {
580
618
t.state = TokenizeStateSawQuestionMark;
581
619
break ;
582
620
default :
583
- tokenize_error (&t, " invalid character: '%c' " , c);
621
+ invalid_char_error (&t, c);
584
622
}
585
623
break ;
586
624
case TokenizeStateSawQuestionMark:
@@ -890,7 +928,7 @@ void tokenize(Buf *buf, Tokenization *out) {
890
928
t.state = TokenizeStateLineString;
891
929
break ;
892
930
default :
893
- tokenize_error (&t, " invalid character: '%c' " , c);
931
+ invalid_char_error (&t, c);
894
932
break ;
895
933
}
896
934
break ;
@@ -919,7 +957,7 @@ void tokenize(Buf *buf, Tokenization *out) {
919
957
break ;
920
958
case ' \\ ' :
921
959
if (t.cur_tok ->data .str_lit .is_c_str ) {
922
- tokenize_error (&t, " invalid character: '%c' " , c);
960
+ invalid_char_error (&t, c);
923
961
}
924
962
t.state = TokenizeStateLineStringContinue;
925
963
break ;
@@ -949,7 +987,7 @@ void tokenize(Buf *buf, Tokenization *out) {
949
987
buf_append_char (&t.cur_tok ->data .str_lit .str , ' \n ' );
950
988
break ;
951
989
default :
952
- tokenize_error (&t, " invalid character: '%c' " , c);
990
+ invalid_char_error (&t, c);
953
991
break ;
954
992
}
955
993
break ;
@@ -1073,7 +1111,7 @@ void tokenize(Buf *buf, Tokenization *out) {
1073
1111
handle_string_escape (&t, ' \" ' );
1074
1112
break ;
1075
1113
default :
1076
- tokenize_error (&t, " invalid character: '%c' " , c);
1114
+ invalid_char_error (&t, c);
1077
1115
}
1078
1116
break ;
1079
1117
case TokenizeStateCharCode:
@@ -1147,7 +1185,7 @@ void tokenize(Buf *buf, Tokenization *out) {
1147
1185
t.state = TokenizeStateStart;
1148
1186
break ;
1149
1187
default :
1150
- tokenize_error (&t, " invalid character: '%c' " , c);
1188
+ invalid_char_error (&t, c);
1151
1189
}
1152
1190
break ;
1153
1191
case TokenizeStateZero:
@@ -1189,7 +1227,7 @@ void tokenize(Buf *buf, Tokenization *out) {
1189
1227
uint32_t digit_value = get_digit_value (c);
1190
1228
if (digit_value >= t.radix ) {
1191
1229
if (is_symbol_char (c)) {
1192
- tokenize_error (&t, " invalid character: '%c' " , c);
1230
+ invalid_char_error (&t, c);
1193
1231
}
1194
1232
// not my char
1195
1233
t.pos -= 1 ;
@@ -1233,7 +1271,7 @@ void tokenize(Buf *buf, Tokenization *out) {
1233
1271
uint32_t digit_value = get_digit_value (c);
1234
1272
if (digit_value >= t.radix ) {
1235
1273
if (is_symbol_char (c)) {
1236
- tokenize_error (&t, " invalid character: '%c' " , c);
1274
+ invalid_char_error (&t, c);
1237
1275
}
1238
1276
// not my char
1239
1277
t.pos -= 1 ;
@@ -1282,7 +1320,7 @@ void tokenize(Buf *buf, Tokenization *out) {
1282
1320
uint32_t digit_value = get_digit_value (c);
1283
1321
if (digit_value >= t.radix ) {
1284
1322
if (is_symbol_char (c)) {
1285
- tokenize_error (&t, " invalid character: '%c' " , c);
1323
+ invalid_char_error (&t, c);
1286
1324
}
1287
1325
// not my char
1288
1326
t.pos -= 1 ;
0 commit comments