Skip to content

Commit 395ee0b

Browse files
committed
Introduce rustc_lexer
The idea here is to make a reusable library out of the existing rust-lexer, by separating out pure lexing and rustc-specific concerns, like spans, error reporting an interning. So, rustc_lexer operates directly on `&str`, produces simple tokens which are a pair of type-tag and a bit of original text, and does not report errors, instead storing them as flags on the token.
1 parent 95b1fe5 commit 395ee0b

15 files changed

+1335
-1259
lines changed

Cargo.lock

+8
Original file line numberDiff line numberDiff line change
@@ -2972,6 +2972,13 @@ dependencies = [
29722972
"tempfile 3.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
29732973
]
29742974

2975+
[[package]]
2976+
name = "rustc_lexer"
2977+
version = "0.1.0"
2978+
dependencies = [
2979+
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
2980+
]
2981+
29752982
[[package]]
29762983
name = "rustc_lint"
29772984
version = "0.0.0"
@@ -3622,6 +3629,7 @@ dependencies = [
36223629
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
36233630
"rustc_data_structures 0.0.0",
36243631
"rustc_errors 0.0.0",
3632+
"rustc_lexer 0.1.0",
36253633
"rustc_macros 0.1.0",
36263634
"rustc_target 0.0.0",
36273635
"scoped-tls 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",

src/librustc_lexer/Cargo.toml

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[package]
2+
authors = ["The Rust Project Developers"]
3+
name = "rustc_lexer"
4+
version = "0.1.0"
5+
edition = "2018"
6+
7+
# Note that this crate purposefully does not depend on other rustc crates
8+
[dependencies]
9+
unicode-xid = { version = "0.1.0", optional = true }

src/librustc_lexer/src/cursor.rs

+57
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
use std::str::Chars;
2+
3+
pub(crate) struct Cursor<'a> {
4+
initial_len: usize,
5+
chars: Chars<'a>,
6+
#[cfg(debug_assertions)]
7+
prev: char,
8+
}
9+
10+
pub(crate) const EOF_CHAR: char = '\0';
11+
12+
impl<'a> Cursor<'a> {
13+
pub(crate) fn new(input: &'a str) -> Cursor<'a> {
14+
Cursor {
15+
initial_len: input.len(),
16+
chars: input.chars(),
17+
#[cfg(debug_assertions)]
18+
prev: EOF_CHAR,
19+
}
20+
}
21+
/// For debug assertions only
22+
pub(crate) fn prev(&self) -> char {
23+
#[cfg(debug_assertions)]
24+
{
25+
self.prev
26+
}
27+
28+
#[cfg(not(debug_assertions))]
29+
{
30+
'\0'
31+
}
32+
}
33+
pub(crate) fn nth_char(&self, n: usize) -> char {
34+
self.chars().nth(n).unwrap_or(EOF_CHAR)
35+
}
36+
pub(crate) fn is_eof(&self) -> bool {
37+
self.chars.as_str().is_empty()
38+
}
39+
pub(crate) fn len_consumed(&self) -> usize {
40+
self.initial_len - self.chars.as_str().len()
41+
}
42+
/// Returns an iterator over the remaining characters.
43+
fn chars(&self) -> Chars<'a> {
44+
self.chars.clone()
45+
}
46+
/// Moves to the next character.
47+
pub(crate) fn bump(&mut self) -> Option<char> {
48+
let c = self.chars.next()?;
49+
50+
#[cfg(debug_assertions)]
51+
{
52+
self.prev = c;
53+
}
54+
55+
Some(c)
56+
}
57+
}

0 commit comments

Comments
 (0)