Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Roughly track the last token in each AST node #14391

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/std/zig.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@ const std = @import("std.zig");
const tokenizer = @import("zig/tokenizer.zig");
const fmt = @import("zig/fmt.zig");
const assert = std.debug.assert;
const parse_mod = @import("zig/parse.zig");

pub const Token = tokenizer.Token;
pub const Tokenizer = tokenizer.Tokenizer;
pub const fmtId = fmt.fmtId;
pub const fmtEscapes = fmt.fmtEscapes;
pub const isValidId = fmt.isValidId;
pub const parse = @import("zig/parse.zig").parse;
pub const parse = parse_mod.parse;
pub const parseWithOptions = parse_mod.parseWithOptions;
pub const string_literal = @import("zig/string_literal.zig");
pub const number_literal = @import("zig/number_literal.zig");
pub const primitives = @import("zig/primitives.zig");
Expand Down
11 changes: 11 additions & 0 deletions lib/std/zig/Ast.zig
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ tokens: TokenList.Slice,
/// The root AST node is assumed to be index 0. Since there can be no
/// references to the root node, this means 0 is available to indicate null.
nodes: NodeList.Slice,
/// List of tokens which end a node. We can't properly reconstitute
/// AST extents in all cases, so we just store the final token in each AST
/// node instead.
node_ends: ?[]TokenIndex,
extra_data: []Node.Index,

errors: []const Error,
Expand Down Expand Up @@ -39,6 +43,9 @@ pub fn deinit(tree: *Ast, gpa: mem.Allocator) void {
tree.nodes.deinit(gpa);
gpa.free(tree.extra_data);
gpa.free(tree.errors);
if (tree.node_ends) |ne| {
gpa.free(ne);
}
tree.* = undefined;
}

Expand Down Expand Up @@ -78,6 +85,7 @@ pub fn tokenLocation(self: Ast, start_offset: ByteOffset, token_index: TokenInde
.line = 0,
.column = 0,
.line_start = start_offset,

.line_end = self.source.len,
};
const token_start = self.tokens.items(.start)[token_index];
Expand Down Expand Up @@ -696,6 +704,9 @@ pub fn firstToken(tree: Ast, node: Node.Index) TokenIndex {
}

pub fn lastToken(tree: Ast, node: Node.Index) TokenIndex {
if (tree.node_ends) |ne| {
return ne[node];
}
const tags = tree.nodes.items(.tag);
const datas = tree.nodes.items(.data);
const main_tokens = tree.nodes.items(.main_token);
Expand Down
44 changes: 43 additions & 1 deletion lib/std/zig/parse.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,21 @@ const Token = std.zig.Token;

pub const Error = error{ParseError} || Allocator.Error;

pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
return parseWithOptions(gpa, source, .{});
}

pub const ParseOptions = struct {
/// Populate the node_ends field in the returned Ast. If you don't
/// set this, then node_ends will be null. This is only really useful
/// for tools, not setting this will just result in slightly incorrect
/// extents calculated with Ast.lastToken.
track_node_ends: bool = false,
};

/// Result should be freed with tree.deinit() when there are
/// no more references to any of the tokens or nodes.
pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
pub fn parseWithOptions(gpa: Allocator, source: [:0]const u8, options: ParseOptions) Allocator.Error!Ast {
var tokens = Ast.TokenList{};
defer tokens.deinit(gpa);

Expand All @@ -36,9 +48,11 @@ pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
.token_starts = tokens.items(.start),
.errors = .{},
.nodes = .{},
.node_ends = if (options.track_node_ends) .{} else null,
.extra_data = .{},
.scratch = .{},
.tok_i = 0,
.options = options,
};
defer parser.errors.deinit(gpa);
defer parser.nodes.deinit(gpa);
Expand All @@ -49,6 +63,9 @@ pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
// Make sure at least 1 so we can use appendAssumeCapacity on the root node below.
const estimated_node_count = (tokens.len + 2) / 2;
try parser.nodes.ensureTotalCapacity(gpa, estimated_node_count);
if (parser.node_ends) |*ne| {
try ne.ensureTotalCapacity(gpa, estimated_node_count);
}

try parser.parseRoot();

Expand All @@ -57,6 +74,7 @@ pub fn parse(gpa: Allocator, source: [:0]const u8) Allocator.Error!Ast {
.source = source,
.tokens = tokens.toOwnedSlice(),
.nodes = parser.nodes.toOwnedSlice(),
.node_ends = if (parser.node_ends) |*ne| try ne.toOwnedSlice(gpa) else null,
.extra_data = try parser.extra_data.toOwnedSlice(gpa),
.errors = try parser.errors.toOwnedSlice(gpa),
};
Expand All @@ -73,8 +91,13 @@ const Parser = struct {
tok_i: TokenIndex,
errors: std.ArrayListUnmanaged(AstError),
nodes: Ast.NodeList,
/// Stores the 'end' of each node so we can properly compute
/// extents of AST nodes in cases where children have failed to
/// properly parse
node_ends: ?std.ArrayListUnmanaged(TokenIndex),
extra_data: std.ArrayListUnmanaged(Node.Index),
scratch: std.ArrayListUnmanaged(Node.Index),
options: ParseOptions,

const SmallSpan = union(enum) {
zero_or_one: Node.Index,
Expand Down Expand Up @@ -107,23 +130,39 @@ const Parser = struct {

fn addNode(p: *Parser, elem: Ast.NodeList.Elem) Allocator.Error!Node.Index {
const result = @intCast(Node.Index, p.nodes.len);
if (p.node_ends) |*ne| {
std.debug.assert(ne.items.len == p.nodes.len);
try ne.append(p.gpa, p.tok_i - 1);
}
try p.nodes.append(p.gpa, elem);
return result;
}

fn setNode(p: *Parser, i: usize, elem: Ast.NodeList.Elem) Node.Index {
if (p.node_ends) |*ne| {
std.debug.assert(ne.items.len == p.nodes.len);
ne.items[i] = p.tok_i - 1;
}
p.nodes.set(i, elem);
return @intCast(Node.Index, i);
}

fn reserveNode(p: *Parser, tag: Ast.Node.Tag) !usize {
if (p.node_ends) |*ne| {
std.debug.assert(ne.items.len == p.nodes.len);
try ne.resize(p.gpa, ne.items.len + 1);
}
try p.nodes.resize(p.gpa, p.nodes.len + 1);
p.nodes.items(.tag)[p.nodes.len - 1] = tag;
return p.nodes.len - 1;
}

fn unreserveNode(p: *Parser, node_index: usize) void {
if (p.nodes.len == node_index) {
if (p.node_ends) |*ne| {
std.debug.assert(ne.items.len == p.nodes.len);
ne.resize(p.gpa, p.nodes.len - 1) catch unreachable;
}
p.nodes.resize(p.gpa, p.nodes.len - 1) catch unreachable;
} else {
// There is zombie node left in the tree, let's make it as inoffensive as possible
Expand Down Expand Up @@ -230,6 +269,9 @@ const Parser = struct {
.main_token = 0,
.data = undefined,
});
if (p.node_ends) |*ne| {
ne.appendAssumeCapacity(@intCast(TokenIndex, p.token_tags.len - 1));
}
const root_members = try p.parseContainerMembers();
const root_decls = try root_members.toSpan(p);
if (p.token_tags[p.tok_i] != .eof) {
Expand Down
1 change: 1 addition & 0 deletions src/translate_c/ast.zig
Original file line number Diff line number Diff line change
Expand Up @@ -788,6 +788,7 @@ pub fn render(gpa: Allocator, nodes: []const Node) !std.zig.Ast {
.tokens = ctx.tokens.toOwnedSlice(),
.nodes = ctx.nodes.toOwnedSlice(),
.extra_data = try ctx.extra_data.toOwnedSlice(gpa),
.node_ends = null,
.errors = &.{},
};
}
Expand Down