Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat!: better type parsing #47

Merged
merged 18 commits into from
Oct 12, 2022
304 changes: 132 additions & 172 deletions src/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,180 +1,32 @@
mod token;
pub use token::*;

use std::ops::Range;

use chumsky::{
prelude::{any, choice, end, filter, just, take_until, Simple},
text::{ident, keyword, newline, TextParser},
recursive::recursive,
text::{ident, keyword, newline, whitespace, TextParser},
Parser,
};

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Kind {
Dot,
Colon,
Local,
}

impl Kind {
pub fn as_char(&self) -> char {
match self {
Self::Dot => '.',
Self::Colon => ':',
Self::Local => '#',
}
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum Scope {
Public,
Private,
Protected,
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum TagType {
/// ```lua
/// ---@toc <name>
/// ```
Toc(String),
/// ```lua
/// ---@mod <name> [desc]
/// ```
Module(String, Option<String>),
/// ```lua
/// ---@divider <char>
/// ```
Divider(char),
/// ```lua
/// function one.two() end
/// one.two = function() end
/// ```
Func {
prefix: Option<String>,
name: String,
kind: Kind,
},
/// ```lua
/// one = 1
/// one.two = 12
/// ```
Expr {
prefix: Option<String>,
name: String,
kind: Kind,
},
/// ```lua
/// ---@export <module>
/// or
/// return <module>\eof
/// ```
Export(String),
/// ```lua
/// ---@brief [[
/// ```
BriefStart,
/// ```lua
/// ---@brief ]]
/// ```
BriefEnd,
/// ```lua
/// ---@param <name[?]> <type[|type...]> [description]
/// ```
Param {
name: String,
ty: String,
desc: Option<String>,
},
/// ```lua
/// ---@return <type> [<name> [comment] | [name] #<comment>]
/// ```
Return {
ty: String,
name: Option<String>,
desc: Option<String>,
},
/// ```lua
/// ---@class <name>
/// ```
Class(String),
/// ```lua
/// ---@field [public|private|protected] <name> <type> [description]
/// ```
Field {
scope: Scope,
name: String,
ty: String,
desc: Option<String>,
},
/// ```lua
/// -- Simple Alias
/// ---@alias <name> <type>
///
/// -- Enum alias
/// ---@alias <name>
/// ```
Alias(String, Option<String>),
/// ```lua
/// ---| '<value>' [# description]
/// ```
Variant(String, Option<String>),
/// ```lua
/// ---@type <type> [desc]
/// ```
Type(String, Option<String>),
/// ```lua
/// ---@tag <name>
/// ```
Tag(String),
/// ```lua
/// ---@see <name>
/// ```
See(String),
/// ```lua
/// ---@usage `<code>`
/// ```
Usage(String),
/// ```lua
/// ---@usage [[
/// ```
UsageStart,
/// ```lua
/// ---@usage ]]
/// ```
UsageEnd,
/// ```lua
/// ---TEXT
/// ```
Comment(String),
/// Text nodes which are not needed
Skip,
}

type Spanned = (TagType, Range<usize>);

const C: [char; 3] = ['.', '_', '-'];

#[derive(Debug)]
pub struct Lexer;

impl Lexer {
/// Parse emmylua/lua files into rust token
pub fn parse(src: &str) -> Result<Vec<Spanned>, Vec<Simple<char>>> {
pub fn init() -> impl Parser<char, Vec<Spanned>, Error = Simple<char>> {
let triple = just("---");
let space = just(' ').repeated().at_least(1);
let till_eol = take_until(newline());

let comment = till_eol.map(|(x, _)| x.iter().collect());
let desc = space.ignore_then(comment).or_not();

// Source: https://github.com/sumneko/lua-language-server/wiki/Annotations#documenting-types
// A TYPE could be
// - primary = string|number|boolean
// - fn = func(...):string
// - enum = "one"|"two"|"three"
// - or: primary (| primary)+
// - optional = primary?
// - table = table<string, string>
// - array = primary[]
let ty = filter(|x: &char| !x.is_whitespace()).repeated().collect();

let scope = choice((
keyword("public").to(Scope::Public),
keyword("protected").to(Scope::Protected),
Expand All @@ -196,21 +48,130 @@ impl Lexer {
)))
.ignored();

let union_literal = just('\'')
.ignore_then(filter(|c| c != &'\'').repeated())
.then_ignore(just('\''))
.collect();

let variant = just('|')
.then_ignore(space)
.ignore_then(
just('\'')
.ignore_then(filter(|c| c != &'\'').repeated())
.then_ignore(just('\''))
.collect(),
)
.ignore_then(union_literal)
.then(
space
.ignore_then(just('#').ignore_then(space).ignore_then(comment))
.or_not(),
)
.map(|(t, d)| TagType::Variant(t, d));

let optional = just('?').or_not().map(|c| match c {
Some(_) => TypeVal::Opt as fn(_, _) -> _,
None => TypeVal::Req as fn(_, _) -> _,
});

let name = filter(|x: &char| x.is_alphanumeric() || C.contains(x))
.repeated()
.collect();

let ty = recursive(|inner| {
let comma = just(',').padded();
let colon = just(':').padded();

let any = just("any").to(Ty::Any);
let unknown = just("unknown").to(Ty::Unknown);
let nil = just("nil").to(Ty::Nil);
let boolean = just("boolean").to(Ty::Boolean);
let string = just("string").to(Ty::String);
let num = just("number").to(Ty::Number);
let int = just("integer").to(Ty::Integer);
let function = just("function").to(Ty::Function);
let thread = just("thread").to(Ty::Thread);
let userdata = just("userdata").to(Ty::Userdata);
let lightuserdata = just("lightuserdata").to(Ty::Lightuserdata);

#[inline]
fn array_union(
p: impl Parser<char, Ty, Error = Simple<char>>,
inner: impl Parser<char, Ty, Error = Simple<char>>,
) -> impl Parser<char, Ty, Error = Simple<char>> {
p.then(just("[]").repeated())
.foldl(|arr, _| Ty::Array(Box::new(arr)))
// NOTE: Not the way I wanted i.e., Ty::Union(Vec<Ty>) it to be, but it's better than nothing
.then(just('|').padded().ignore_then(inner).repeated())
.foldl(|x, y| Ty::Union(Box::new(x), Box::new(y)))
}

let list_like = ident()
.padded()
.then(optional)
.then(
colon
.ignore_then(inner.clone())
.or_not()
// NOTE: if param type is missing then LLS treats it as `any`
.map(|x| x.unwrap_or(Ty::Any)),
)
.map(|((n, attr), t)| attr(n, t))
.separated_by(comma)
.allow_trailing();

let fun = just("fun")
.ignore_then(
list_like
.clone()
.delimited_by(just('(').then(whitespace()), whitespace().then(just(')'))),
)
.then(
colon
.ignore_then(inner.clone().separated_by(comma))
.or_not(),
)
.map(|(param, ret)| Ty::Fun(param, ret));

let table = just("table")
.ignore_then(
just('<')
.ignore_then(inner.clone().map(Box::new))
.then_ignore(comma)
.then(inner.clone().map(Box::new))
.then_ignore(just('>'))
.or_not(),
)
.map(Ty::Table);

let dict = list_like
.delimited_by(just('{').then(whitespace()), whitespace().then(just('}')))
.map(Ty::Dict);

let ty_name = name.map(Ty::Ref);

let parens = inner
.clone()
.delimited_by(just('(').padded(), just(')').padded());

// Union of string literals: '"g@"'|'"g@$"'
let string_literal = union_literal.map(Ty::Ref);

choice((
array_union(any, inner.clone()),
array_union(unknown, inner.clone()),
array_union(nil, inner.clone()),
array_union(boolean, inner.clone()),
array_union(string, inner.clone()),
array_union(num, inner.clone()),
array_union(int, inner.clone()),
array_union(function, inner.clone()),
array_union(thread, inner.clone()),
array_union(userdata, inner.clone()),
array_union(lightuserdata, inner.clone()),
array_union(fun, inner.clone()),
array_union(table, inner.clone()),
array_union(dict, inner.clone()),
array_union(parens, inner.clone()),
array_union(string_literal, inner.clone()),
array_union(ty_name, inner),
))
});

let tag = just('@').ignore_then(choice((
private.to(TagType::Skip),
just("toc")
Expand All @@ -219,7 +180,7 @@ impl Lexer {
.map(TagType::Toc),
just("mod")
.then_ignore(space)
.ignore_then(ty)
.ignore_then(name)
.then(desc)
.map(|(name, desc)| TagType::Module(name, desc)),
just("divider")
Expand All @@ -232,14 +193,14 @@ impl Lexer {
))),
just("param")
.ignore_then(space)
.ignore_then(ty) // I am using `ty` here because param can have `?`
.ignore_then(ident().then(optional))
.then_ignore(space)
.then(ty)
.then(ty.clone())
.then(desc)
.map(|((name, ty), desc)| TagType::Param { name, ty, desc }),
.map(|(((name, opt), ty), desc)| TagType::Param(opt(name, ty), desc)),
just("return")
.ignore_then(space)
.ignore_then(ty)
.ignore_then(ty.clone())
.then(choice((
newline().to((None, None)),
space.ignore_then(choice((
Expand All @@ -250,14 +211,14 @@ impl Lexer {
.map(|(ty, (name, desc))| TagType::Return { ty, name, desc }),
just("class")
.ignore_then(space)
.ignore_then(ident())
.ignore_then(name)
.map(TagType::Class),
just("field")
.ignore_then(space.ignore_then(scope).or_not())
.then_ignore(space)
.then(ident())
.then_ignore(space)
.then(ty)
.then(ty.clone())
.then(desc)
.map(|(((scope, name), ty), desc)| TagType::Field {
scope: scope.unwrap_or(Scope::Public),
Expand All @@ -267,8 +228,8 @@ impl Lexer {
}),
just("alias")
.ignore_then(space)
.ignore_then(ident())
.then(space.ignore_then(ty).or_not())
.ignore_then(name)
.then(space.ignore_then(ty.clone()).or_not())
.map(|(name, ty)| TagType::Alias(name, ty)),
just("type")
.ignore_then(space)
Expand Down Expand Up @@ -350,6 +311,5 @@ impl Lexer {
.padded()
.map_with_span(|t, r| (t, r))
.repeated()
.parse(src)
}
}
Loading