@@ -442,7 +442,7 @@ pub const Tokenizer = struct {
442
442
switch (state ) {
443
443
.start = > switch (c ) {
444
444
0 = > break ,
445
- ' ' , '\n ' , '\t ' , ' \r ' = > {
445
+ ' ' , '\n ' , '\t ' = > {
446
446
result .loc .start = self .index + 1 ;
447
447
},
448
448
'"' = > {
@@ -565,6 +565,18 @@ pub const Tokenizer = struct {
565
565
state = .int_literal_dec ;
566
566
result .tag = .integer_literal ;
567
567
},
568
+ '\r ' = > {
569
+ // Carriage returns are *only* allowed just before a linefeed as part of a CRLF pair, otherwise
570
+ // they constitute an illegal byte!
571
+ if (self .index + 1 < self .buffer .len and self .buffer [self .index + 1 ] == '\n ' ) {
572
+ result .loc .start = self .index + 1 ;
573
+ } else {
574
+ result .tag = .invalid ;
575
+ result .loc .end = self .index ;
576
+ self .index += 1 ;
577
+ return result ;
578
+ }
579
+ },
568
580
else = > {
569
581
result .tag = .invalid ;
570
582
result .loc .end = self .index ;
@@ -903,7 +915,7 @@ pub const Tokenizer = struct {
903
915
self .index += 1 ;
904
916
break ;
905
917
},
906
- '\t ' , ' \r ' = > {},
918
+ '\t ' = > {},
907
919
else = > self .checkLiteralCharacter (),
908
920
},
909
921
@@ -1137,7 +1149,7 @@ pub const Tokenizer = struct {
1137
1149
state = .start ;
1138
1150
result .loc .start = self .index + 1 ;
1139
1151
},
1140
- '\t ' , ' \r ' = > state = .line_comment ,
1152
+ '\t ' = > state = .line_comment ,
1141
1153
else = > {
1142
1154
state = .line_comment ;
1143
1155
self .checkLiteralCharacter ();
@@ -1151,7 +1163,7 @@ pub const Tokenizer = struct {
1151
1163
result .tag = .doc_comment ;
1152
1164
break ;
1153
1165
},
1154
- '\t ' , ' \r ' = > {
1166
+ '\t ' = > {
1155
1167
state = .doc_comment ;
1156
1168
result .tag = .doc_comment ;
1157
1169
},
@@ -1167,12 +1179,12 @@ pub const Tokenizer = struct {
1167
1179
state = .start ;
1168
1180
result .loc .start = self .index + 1 ;
1169
1181
},
1170
- '\t ' , ' \r ' = > {},
1182
+ '\t ' = > {},
1171
1183
else = > self .checkLiteralCharacter (),
1172
1184
},
1173
1185
.doc_comment = > switch (c ) {
1174
1186
0 , '\n ' = > break ,
1175
- '\t ' , ' \r ' = > {},
1187
+ '\t ' = > {},
1176
1188
else = > self .checkLiteralCharacter (),
1177
1189
},
1178
1190
.zero = > switch (c ) {
@@ -1433,7 +1445,15 @@ pub const Tokenizer = struct {
1433
1445
fn getInvalidCharacterLength (self : * Tokenizer ) u3 {
1434
1446
const c0 = self .buffer [self .index ];
1435
1447
if (std .ascii .isASCII (c0 )) {
1436
- if (std .ascii .isCntrl (c0 )) {
1448
+ if (c0 == '\r ' ) {
1449
+ if (self .index + 1 < self .buffer .len and self .buffer [self .index + 1 ] == '\n ' ) {
1450
+ // Carriage returns are *only* allowed just before a linefeed as part of a CRLF pair, otherwise
1451
+ // they constitute an illegal byte!
1452
+ return 0 ;
1453
+ } else {
1454
+ return 1 ;
1455
+ }
1456
+ } else if (std .ascii .isCntrl (c0 )) {
1437
1457
// ascii control codes are never allowed
1438
1458
// (note that \n was checked before we got here)
1439
1459
return 1 ;
0 commit comments