Skip to content

Commit 0434b68

Browse files
authored
Add Node::unparse and tweak the cursor creation API (#666)
Closes #583 Ref #628 for the modified cursor/offset API Changes are best reviewed commit-by-commit.
1 parent 54af80d commit 0434b68

File tree

28 files changed

+249
-146
lines changed

28 files changed

+249
-146
lines changed

.changeset/strange-hats-itch.md

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@nomicfoundation/slang": minor
3+
---
4+
5+
Add `Node::unparse()` that allows to reconstruct the source code from the CST node

.cspell.json

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"structs",
2020
"tera",
2121
"ufixed",
22+
"unparse",
2223
"usize"
2324
]
2425
}

crates/codegen/parser/runtime/src/cst.rs

+53-33
Original file line numberDiff line numberDiff line change
@@ -54,63 +54,83 @@ impl Node {
5454
}
5555
}
5656

57-
pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
57+
/// Creates a [`Cursor`] that starts at the current node as the root and a given initial `text_offset`.
58+
pub fn cursor_with_offset(&self, text_offset: TextIndex) -> Cursor {
5859
Cursor::new(self.clone(), text_offset)
5960
}
6061

62+
/// Reconstructs the original source code from the parse tree.
63+
pub fn unparse(self) -> String {
64+
match self {
65+
Self::Rule(rule) => rule.unparse(),
66+
Self::Token(token) => token.text.clone(),
67+
}
68+
}
69+
6170
pub fn as_rule(&self) -> Option<&Rc<RuleNode>> {
6271
match self {
6372
Self::Rule(node) => Some(node),
6473
_ => None,
6574
}
6675
}
6776

77+
pub fn into_rule(self) -> Option<Rc<RuleNode>> {
78+
match self {
79+
Self::Rule(node) => Some(node),
80+
_ => None,
81+
}
82+
}
83+
6884
pub fn as_token(&self) -> Option<&Rc<TokenNode>> {
6985
match self {
7086
Self::Token(node) => Some(node),
7187
_ => None,
7288
}
7389
}
7490

75-
pub fn as_token_with_kind(&self, kinds: &[TokenKind]) -> Option<&Rc<TokenNode>> {
76-
if let Node::Token(token_node) = self {
77-
if kinds.contains(&token_node.kind) {
78-
return Some(token_node);
79-
}
91+
pub fn into_token(self) -> Option<Rc<TokenNode>> {
92+
match self {
93+
Self::Token(node) => Some(node),
94+
_ => None,
8095
}
81-
return None;
8296
}
8397

84-
pub fn as_token_matching<F: Fn(&Rc<TokenNode>) -> bool>(
85-
&self,
86-
predicate: F,
87-
) -> Option<&Rc<TokenNode>> {
88-
if let Node::Token(token_node) = self {
89-
if predicate(&token_node) {
90-
return Some(token_node);
91-
}
92-
}
93-
return None;
98+
pub fn as_token_with_kind(&self, kinds: &[TokenKind]) -> Option<&Rc<TokenNode>> {
99+
self.as_token().filter(|token| kinds.contains(&token.kind))
94100
}
95101

96102
pub fn as_rule_with_kind(&self, kinds: &[RuleKind]) -> Option<&Rc<RuleNode>> {
97-
if let Node::Rule(rule_node) = self {
98-
if kinds.contains(&rule_node.kind) {
99-
return Some(rule_node);
100-
}
101-
}
102-
return None;
103+
self.as_rule().filter(|rule| kinds.contains(&rule.kind))
103104
}
105+
}
104106

105-
pub fn as_rule_matching<F: Fn(&Rc<RuleNode>) -> bool>(
106-
&self,
107-
predicate: F,
108-
) -> Option<&Rc<RuleNode>> {
109-
if let Node::Rule(rule_node) = self {
110-
if predicate(&rule_node) {
111-
return Some(rule_node);
112-
}
113-
}
114-
return None;
107+
impl From<Rc<RuleNode>> for Node {
108+
fn from(node: Rc<RuleNode>) -> Self {
109+
Self::Rule(node)
110+
}
111+
}
112+
113+
impl From<Rc<TokenNode>> for Node {
114+
fn from(node: Rc<TokenNode>) -> Self {
115+
Self::Token(node)
116+
}
117+
}
118+
119+
impl RuleNode {
120+
/// Creates a [`Cursor`] that starts at the current node as the root and a given initial `text_offset`.
121+
pub fn cursor_with_offset(self: Rc<Self>, text_offset: TextIndex) -> Cursor {
122+
Cursor::new(Node::Rule(self), text_offset)
123+
}
124+
125+
/// Reconstructs the original source code from the parse tree.
126+
pub fn unparse(self: Rc<Self>) -> String {
127+
let acc = String::with_capacity(self.text_len.utf8);
128+
129+
self.cursor_with_offset(TextIndex::ZERO)
130+
.filter_map(Node::into_token)
131+
.fold(acc, |mut acc, token| {
132+
acc.push_str(&token.text);
133+
acc
134+
})
115135
}
116136
}

crates/codegen/parser/runtime/src/cursor.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ impl Cursor {
370370
None
371371
}
372372

373-
/// In contract to `Iterator::find_*`, this does not consume the first item when found.
373+
/// In contrast to `Iterator::find_*`, this does not consume the first item when found.
374374
fn find_noconsume<F: Fn(&Node) -> Option<R>, R>(&mut self, predicate: F) -> Option<R> {
375375
while !self.is_completed {
376376
match predicate(&self.current.node) {
@@ -398,7 +398,7 @@ impl Cursor {
398398
&mut self,
399399
predicate: F,
400400
) -> Option<Rc<TokenNode>> {
401-
self.find_noconsume(|node| node.as_token_matching(&predicate).cloned())
401+
self.find_noconsume(|node| node.as_token().filter(|node| predicate(node)).cloned())
402402
}
403403

404404
/// Finds the first rule node with either of the given kinds.
@@ -415,6 +415,6 @@ impl Cursor {
415415
&mut self,
416416
predicate: F,
417417
) -> Option<Rc<RuleNode>> {
418-
self.find_noconsume(|node| node.as_rule_matching(&predicate).cloned())
418+
self.find_noconsume(|node| node.as_rule().filter(|node| predicate(node)).cloned())
419419
}
420420
}

crates/codegen/parser/runtime/src/napi/napi_cst.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ impl RuleNode {
5454
#[napi(ts_return_type = "cursor.Cursor")]
5555
pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
5656
RustNode::Rule(self.0.clone())
57-
.create_cursor((&text_offset).into())
57+
.cursor_with_offset((&text_offset).into())
5858
.into()
5959
}
6060
}
@@ -88,7 +88,7 @@ impl TokenNode {
8888
#[napi(ts_return_type = "cursor.Cursor")]
8989
pub fn create_cursor(&self, text_offset: TextIndex) -> Cursor {
9090
RustNode::Token(self.0.clone())
91-
.create_cursor((&text_offset).into())
91+
.cursor_with_offset((&text_offset).into())
9292
.into()
9393
}
9494
}

crates/codegen/parser/runtime/src/parse_output.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use crate::{cst, cursor::Cursor, parse_error::ParseError};
1+
use crate::{cst, cursor::Cursor, parse_error::ParseError, text_index::TextIndex};
22

33
#[derive(Debug, PartialEq)]
44
pub struct ParseOutput {
@@ -21,6 +21,6 @@ impl ParseOutput {
2121

2222
/// Creates a cursor that starts at the root of the parse tree.
2323
pub fn create_tree_cursor(&self) -> Cursor {
24-
return self.parse_tree.create_cursor(Default::default());
24+
return self.parse_tree.cursor_with_offset(TextIndex::ZERO);
2525
}
2626
}

crates/codegen/parser/runtime/src/support/choice_helper.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ pub fn total_not_skipped_span(result: &ParserResult) -> usize {
146146

147147
nodes
148148
.iter()
149-
.flat_map(|node| cst::Node::create_cursor(node, Default::default()))
149+
.flat_map(|node| cst::Node::cursor_with_offset(node, TextIndex::ZERO))
150150
.filter_map(|node| match node {
151151
cst::Node::Token(token) if token.kind != TokenKind::SKIPPED => Some(token.text.len()),
152152
_ => None,

crates/codegen/parser/runtime/src/support/parser_function.rs

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
use std::rc::Rc;
22

33
use super::{
4-
super::{cst, kinds::TokenKind, parse_error::ParseError, parse_output::ParseOutput},
4+
super::{
5+
cst, kinds::TokenKind, parse_error::ParseError, parse_output::ParseOutput,
6+
text_index::TextIndex,
7+
},
58
context::ParserContext,
69
parser_result::*,
710
};
@@ -95,7 +98,7 @@ where
9598
debug_assert_eq!(
9699
errors.len() > 0,
97100
parse_tree
98-
.create_cursor(Default::default())
101+
.cursor_with_offset(TextIndex::ZERO)
99102
.any(|x| x.as_token_with_kind(&[TokenKind::SKIPPED]).is_some())
100103
);
101104

crates/codegen/parser/runtime/src/support/parser_result.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::super::{cst, kinds::*};
1+
use super::super::{cst, kinds::*, text_index::TextIndex};
22

33
#[derive(PartialEq, Eq, Clone, Debug)]
44
pub enum ParserResult {
@@ -93,7 +93,7 @@ impl Match {
9393
pub fn is_full_recursive(&self) -> bool {
9494
self.nodes
9595
.iter()
96-
.flat_map(|node| cst::Node::create_cursor(node, Default::default()))
96+
.flat_map(|node| cst::Node::cursor_with_offset(node, TextIndex::ZERO))
9797
.all(|node| node.as_token_with_kind(&[TokenKind::SKIPPED]).is_none())
9898
}
9999
}

crates/codegen/parser/runtime/src/text_index.rs

+9
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,15 @@ pub struct TextIndex {
1212
pub char: usize,
1313
}
1414

15+
impl TextIndex {
16+
/// Shorthand for `TextIndex { utf8: 0, utf16: 0, char: 0 }`.
17+
pub const ZERO: TextIndex = TextIndex {
18+
utf8: 0,
19+
utf16: 0,
20+
char: 0,
21+
};
22+
}
23+
1524
impl PartialOrd for TextIndex {
1625
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
1726
self.utf8.partial_cmp(&other.utf8)

crates/solidity/outputs/cargo/crate/src/generated/cst.rs

+53-33
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/solidity/outputs/cargo/crate/src/generated/cursor.rs

+3-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)