igor84
diff --git a/‎lib/std/unicode.zig
+14-19 b/‎lib/std/unicode.zig
+14-19
diff --git a/‎lib/std/zig/Ast.zig
+1-1 b/‎lib/std/zig/Ast.zig
+1-1
diff --git a/‎lib/std/zig/AstGen.zig
+3-12 b/‎lib/std/zig/AstGen.zig
+3-12
diff --git a/‎lib/std/zig/parser_test.zig
-1 b/‎lib/std/zig/parser_test.zig
-1
diff --git a/‎lib/std/zig/string_literal.zig
+19-5 b/‎lib/std/zig/string_literal.zig
+19-5
@@ -95,16 +95,13 @@ pub inline fn utf8EncodeComptime(comptime c: u21) [
 
 const Utf8DecodeError = Utf8Decode2Error || Utf8Decode3Error || Utf8Decode4Error;
 
-/// Decodes the UTF-8 codepoint encoded in the given slice of bytes.
-/// bytes.len must be equal to utf8ByteSequenceLength(bytes[0]) catch unreachable.
-/// If you already know the length at comptime, you can call one of
-/// utf8Decode2,utf8Decode3,utf8Decode4 directly instead of this function.
+/// Deprecated. This function has an awkward API that is too easy to use incorrectly.
 pub fn utf8Decode(bytes: []const u8) Utf8DecodeError!u21 {
     return switch (bytes.len) {
-        1 => @as(u21, bytes[0]),
-        2 => utf8Decode2(bytes),
-        3 => utf8Decode3(bytes),
-        4 => utf8Decode4(bytes),
+        1 => bytes[0],
+        2 => utf8Decode2(bytes[0..2].*),
+        3 => utf8Decode3(bytes[0..3].*),
+        4 => utf8Decode4(bytes[0..4].*),
         else => unreachable,
     };
 }
@@ -113,8 +110,7 @@ const Utf8Decode2Error = error{
     Utf8ExpectedContinuation,
     Utf8OverlongEncoding,
 };
-pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
-    assert(bytes.len == 2);
+pub fn utf8Decode2(bytes: [2]u8) Utf8Decode2Error!u21 {
     assert(bytes[0] & 0b11100000 == 0b11000000);
     var value: u21 = bytes[0] & 0b00011111;
 
@@ -130,7 +126,7 @@ pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
 const Utf8Decode3Error = Utf8Decode3AllowSurrogateHalfError || error{
     Utf8EncodesSurrogateHalf,
 };
-pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
+pub fn utf8Decode3(bytes: [3]u8) Utf8Decode3Error!u21 {
     const value = try utf8Decode3AllowSurrogateHalf(bytes);
 
     if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
@@ -142,8 +138,7 @@ const Utf8Decode3AllowSurrogateHalfError = error{
     Utf8ExpectedContinuation,
     Utf8OverlongEncoding,
 };
-pub fn utf8Decode3AllowSurrogateHalf(bytes: []const u8) Utf8Decode3AllowSurrogateHalfError!u21 {
-    assert(bytes.len == 3);
+pub fn utf8Decode3AllowSurrogateHalf(bytes: [3]u8) Utf8Decode3AllowSurrogateHalfError!u21 {
     assert(bytes[0] & 0b11110000 == 0b11100000);
     var value: u21 = bytes[0] & 0b00001111;
 
@@ -165,8 +160,7 @@ const Utf8Decode4Error = error{
     Utf8OverlongEncoding,
     Utf8CodepointTooLarge,
 };
-pub fn utf8Decode4(bytes: []const u8) Utf8Decode4Error!u21 {
-    assert(bytes.len == 4);
+pub fn utf8Decode4(bytes: [4]u8) Utf8Decode4Error!u21 {
     assert(bytes[0] & 0b11111000 == 0b11110000);
     var value: u21 = bytes[0] & 0b00000111;
 
@@ -1637,12 +1631,13 @@ pub fn wtf8Encode(c: u21, out: []u8) error{CodepointTooLarge}!u3 {
 
 const Wtf8DecodeError = Utf8Decode2Error || Utf8Decode3AllowSurrogateHalfError || Utf8Decode4Error;
 
+/// Deprecated. This function has an awkward API that is too easy to use incorrectly.
 pub fn wtf8Decode(bytes: []const u8) Wtf8DecodeError!u21 {
     return switch (bytes.len) {
-        1 => @as(u21, bytes[0]),
-        2 => utf8Decode2(bytes),
-        3 => utf8Decode3AllowSurrogateHalf(bytes),
-        4 => utf8Decode4(bytes),
+        1 => bytes[0],
+        2 => utf8Decode2(bytes[0..2].*),
+        3 => utf8Decode3AllowSurrogateHalf(bytes[0..3].*),
+        4 => utf8Decode4(bytes[0..4].*),
         else => unreachable,
     };
 }
 
@@ -69,7 +69,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!A
         const token = tokenizer.next();
         try tokens.append(gpa, .{
             .tag = token.tag,
-            .start = @as(u32, @intCast(token.loc.start)),
+            .start = @intCast(token.loc.start),
         });
         if (token.tag == .eof) break;
     }
 
@@ -11351,6 +11351,9 @@ fn failWithStrLitError(astgen: *AstGen, err: std.zig.string_literal.Error, token
                 .{raw_string[bad_index]},
             );
         },
+        .empty_char_literal => {
+            return astgen.failOff(token, offset, "empty character literal", .{});
+        },
     }
 }
 
@@ -13820,21 +13823,9 @@ fn lowerAstErrors(astgen: *AstGen) !void {
     var msg: std.ArrayListUnmanaged(u8) = .{};
     defer msg.deinit(gpa);
 
-    const token_starts = tree.tokens.items(.start);
-    const token_tags = tree.tokens.items(.tag);
-
     var notes: std.ArrayListUnmanaged(u32) = .{};
     defer notes.deinit(gpa);
 
-    const tok = parse_err.token + @intFromBool(parse_err.token_is_prev);
-    if (token_tags[tok] == .invalid) {
-        const bad_off: u32 = @intCast(tree.tokenSlice(tok).len);
-        const byte_abs = token_starts[tok] + bad_off;
-        try notes.append(gpa, try astgen.errNoteTokOff(tok, bad_off, "invalid byte: '{'}'", .{
-            std.zig.fmtEscapes(tree.source[byte_abs..][0..1]),
-        }));
-    }
-
     for (tree.errors[1..]) |note| {
         if (!note.is_note) break;
 
 
@@ -6061,7 +6061,6 @@ test "recovery: invalid container members" {
     , &[_]Error{
         .expected_expr,
         .expected_comma_after_field,
-        .expected_type_expr,
         .expected_semi_after_stmt,
     });
 }
 
@@ -1,6 +1,5 @@
 const std = @import("../std.zig");
 const assert = std.debug.assert;
-const utf8Decode = std.unicode.utf8Decode;
 const utf8Encode = std.unicode.utf8Encode;
 
 pub const ParseError = error{
@@ -37,12 +36,16 @@ pub const Error = union(enum) {
     expected_single_quote: usize,
     /// The character at this index cannot be represented without an escape sequence.
     invalid_character: usize,
+    /// `''`. Not returned for string literals.
+    empty_char_literal,
 };
 
-/// Only validates escape sequence characters.
-/// Slice must be valid utf8 starting and ending with "'" and exactly one codepoint in between.
+/// Asserts the slice starts and ends with single-quotes.
+/// Returns an error if there is not exactly one UTF-8 codepoint in between.
 pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
-    assert(slice.len >= 3 and slice[0] == '\'' and slice[slice.len - 1] == '\'');
+    if (slice.len < 3) return .{ .failure = .empty_char_literal };
+    assert(slice[0] == '\'');
+    assert(slice[slice.len - 1] == '\'');
 
     switch (slice[1]) {
         '\\' => {
@@ -55,7 +58,18 @@ pub fn parseCharLiteral(slice: []const u8) ParsedCharLiteral {
         },
         0 => return .{ .failure = .{ .invalid_character = 1 } },
         else => {
-            const codepoint = utf8Decode(slice[1 .. slice.len - 1]) catch unreachable;
+            const inner = slice[1 .. slice.len - 1];
+            const n = std.unicode.utf8ByteSequenceLength(inner[0]) catch return .{
+                .failure = .{ .invalid_unicode_codepoint = 1 },
+            };
+            if (inner.len > n) return .{ .failure = .{ .expected_single_quote = 1 + n } };
+            const codepoint = switch (n) {
+                1 => inner[0],
+                2 => std.unicode.utf8Decode2(inner[0..2].*),
+                3 => std.unicode.utf8Decode3(inner[0..3].*),
+                4 => std.unicode.utf8Decode4(inner[0..4].*),
+                else => unreachable,
+            } catch return .{ .failure = .{ .invalid_unicode_codepoint = 1 } };
             return .{ .success = codepoint };
         },
     }
Original file line number	Diff line number	Diff line change
`@@ -69,7 +69,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8, mode: Mode) Allocator.Error!A`
`69`	`69`	`const token = tokenizer.next();`
`70`	`70`	`try tokens.append(gpa, .{`
`71`	`71`	`.tag = token.tag,`
`72`		`- .start = @as(u32, @intCast(token.loc.start)),`
	`72`	`+ .start = @intCast(token.loc.start),`
`73`	`73`	`});`
`74`	`74`	`if (token.tag == .eof) break;`
`75`	`75`	`}`
Original file line number	Diff line number	Diff line change
`@@ -6061,7 +6061,6 @@ test "recovery: invalid container members" {`
`6061`	`6061`	`, &[_]Error{`
`6062`	`6062`	`.expected_expr,`
`6063`	`6063`	`.expected_comma_after_field,`
`6064`		`- .expected_type_expr,`
`6065`	`6064`	`.expected_semi_after_stmt,`
`6066`	`6065`	`});`
`6067`	`6066`	`}`