diff --git a/Cargo.toml b/Cargo.toml index 79b6d48..9077734 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,13 @@ [workspace] -members = ["byteyarn", "buf-trait", "ilex", "ilex/attr", "twie"] +members = [ + "allman", + "byteyarn", + "buf-trait", + "gilded", "gilded/attr", + "ilex", "ilex/attr", + "proc2decl", + "twie", +] resolver = "2" [workspace.package] diff --git a/README.md b/README.md index cb95104..3c61296 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,15 @@ depend on each other. - ⛩ī¸ [`ilex`](https://github.com/mcy/strings/tree/main/ilex) - The last lexer I ever want to write. +- 🗒ī¸ [`allman`](https://github.com/mcy/strings/tree/main/allman) - A DOM for + code formatters. + +- 👑 [`gilded`](https://github.com/mcy/strings/tree/main/gilded) - How I learned + to stop worrying and love golden testing. + +- đŸ’ĸ [`proc2decl`](https://github.com/mcy/strings/tree/main/proc2decl) - Proc + macros suck! + --- All libraries are Apache-2.0 licensed. diff --git a/allman/Cargo.toml b/allman/Cargo.toml new file mode 100644 index 0000000..8060578 --- /dev/null +++ b/allman/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "allman" +version = "0.1.0" +description = "source code formatting and line reflowing toolkit" + +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true + +[dependencies] +byteyarn = { path = "../byteyarn" } + +unicode-width = "0.2.0" \ No newline at end of file diff --git a/allman/README.md b/allman/README.md new file mode 100644 index 0000000..aa9e33c --- /dev/null +++ b/allman/README.md @@ -0,0 +1,42 @@ +# allman + +`allman` - A code formatting and line reflowing toolkit. 🗒ī¸đŸ–‹ī¸ + +`allman::Doc` is a DOM-like structure that specifies how indentation, +like breaking, and reflowing should be handled. It is a tree of `Tag`s +that dictate layout information for the source code to format. + +For example, the Allman brace style (for which this crate is named) can +be implemented as follows: + +```rust +// flat: fn foo() { ... } +// +// broken: +// fn foo() +// { +// // ... +// } +Doc::new() + .tag("fn") + .tag(Tag::Space) + .tag("foo") + .tag("(").tag(")") + .tag_with(Tag::Group(40), |doc| { + doc + .tag_if(Tag::Space, If::Flat) + .tag_if(Tag::Break(1), If::Broken) + .tag("{") + .tag_if(Tag::Space, If::Flat) + .tag_if(Tag::Break(1), If::Broken) + .tag_with(Tag::Indent(2), |doc| { + // Brace contents here... + }) + .tag_if(Tag::Space, If::Flat) + .tag_if(Tag::Break(1), If::Broken) + .tag("}"); + }); +``` + +When calling `Doc::render()`, the layout algorithm will determine whether +`Tag::Group`s should be "broken", i.e., laid out with newlines inside. diff --git a/allman/src/layout.rs b/allman/src/layout.rs new file mode 100644 index 0000000..4cf6ac9 --- /dev/null +++ b/allman/src/layout.rs @@ -0,0 +1,113 @@ +//! Layout algorithm implementation. +//! +//! The only thing the layout algorithm *actually* has to decide is whether each +//! group breaks or not. The algorithm is as follows. +//! +//! 1. Measure the width of each element recursively. Elements which span +//! multiple lines are treated as being of infinite width. +//! +//! 2. Mark groups as broken recursively: for each group, if at its current +//! position, it would overflow the maximum column length, break it, and +//! recurse into it. + +use unicode_width::UnicodeWidthStr; + +use crate::Cursor; +use crate::Doc; +use crate::If; +use crate::Measure; +use crate::Options; +use crate::Tag; +use crate::TagInfo; + +impl Doc<'_> { + pub(crate) fn do_layout(&self, opts: &Options) { + for (t, c) in self.cursor() { + measure(t, c); + } + + LayoutState { opts, indent: 0, column: 0 }.do_layout(self.cursor()); + } +} + +struct LayoutState<'a> { + opts: &'a Options, + + /// The column to start the next line at. + indent: usize, + + /// The next column that we would be writing at. + column: usize, +} + +impl LayoutState<'_> { + /// Advances state for rendering a tag within a broken group. + fn do_layout(&mut self, cursor: Cursor) { + for (tag, cursor) in cursor { + let cond = tag.cond != Some(If::Flat); + + let mut m = tag.measure.get(); + m.column = self.column; + match &tag.tag { + Tag::Text(text) => match text.rfind("\n") { + Some(nl) => self.column = self.indent + text[nl..].width(), + None => self.column += m.width.unwrap(), + }, + + Tag::Space => self.column += 1, + Tag::Break(0) => {} + Tag::Break(_) => self.column = self.indent, + + Tag::Group(max) => { + let mut width = + m.width.filter(|w| self.column + w <= self.opts.max_columns); + + if width.is_some_and(|w| w > *max) { + width = None; + } + + if let Some(w) = width { + // Don't need to do layout here: everything already fits. + self.column += w; + } else { + m.width = None; + + self.do_layout(cursor); + } + } + + Tag::Indent(columns) => { + if cond { + let prev = self.indent; + self.indent = self.indent.saturating_add_signed(*columns); + self.do_layout(cursor); + self.indent = prev; + } + } + } + tag.measure.set(m); + } + } +} + +/// Calculates the width of each element if it was laid out in one line. +fn measure(tag: &TagInfo, cursor: Cursor) { + let tag_width = match &tag.tag { + _ if tag.cond == Some(If::Broken) => Some(0), + + Tag::Text(text) => (!text.contains("\n")).then(|| text.width()), + Tag::Space => Some(1), + Tag::Break(_) => None, + + _ => Some(0), + }; + + let width = cursor + .map(|(t, c)| { + measure(t, c); + t.measure.get().width + }) + .fold(tag_width, |a, b| a?.checked_add(b?)); + + tag.measure.set(Measure { width, column: 0 }); +} diff --git a/allman/src/lib.rs b/allman/src/lib.rs new file mode 100644 index 0000000..439457c --- /dev/null +++ b/allman/src/lib.rs @@ -0,0 +1,301 @@ +//! `allman` - A code formatting and line reflowing toolkit. 🗒ī¸đŸ–‹ī¸ +//! +//! [`allman::Doc`][Doc] is a DOM-like structure that specifies how indentation, +//! like breaking, and reflowing should be handled. It is a tree of [`Tag`]s +//! that dictate layout information for the source code to format. +//! +//! For example, the Allman brace style (for which this crate is named) can +//! be implemented as follows: +//! +//! ``` +//! # use allman::*; +//! // flat: fn foo() { ... } +//! // +//! // broken: +//! // fn foo() +//! // { +//! // // ... +//! // } +//! Doc::new() +//! .tag("fn") +//! .tag(Tag::Space) +//! .tag("foo") +//! .tag("(").tag(")") +//! .tag_with(Tag::Group(40), |doc| { +//! doc +//! .tag_if(Tag::Space, If::Flat) +//! .tag_if(Tag::Break(1), If::Broken) +//! .tag("{") +//! .tag_if(Tag::Space, If::Flat) +//! .tag_if(Tag::Break(1), If::Broken) +//! .tag_with(Tag::Indent(2), |doc| { +//! // Brace contents here... +//! }) +//! .tag_if(Tag::Space, If::Flat) +//! .tag_if(Tag::Break(1), If::Broken) +//! .tag("}"); +//! }); +//! ``` +//! +//! When calling [`Doc::render()`], the layout algorithm will determine whether +//! [`Tag::Group`]s should be "broken", i.e., laid out with newlines inside. + +use core::slice; +use std::cell::Cell; +use std::fmt; +use std::io; + +use byteyarn::YarnBox; + +mod layout; +mod render; + +/// A source code document, which can be rendered as formatted text. +/// +/// A [`Doc`] is analogous to an HTML DOM, which is text along with markup for +/// laying out that text. The difference being that rather than being converted +/// into raster graphics by a browser engine, a [`Doc`] is rendered as a text +/// file. +#[derive(Clone, Default)] +pub struct Doc<'text> { + /// This is a flattened tree: each node specifies how many elements after it + /// make up its children. The `Cursor` type implements walking this tree. + tags: Vec>, +} + +/// A condition that can be applied to a tag. +/// +/// If a condition is set on a tag, and the condition is false, the tag is +/// treated as a no-op: its contents are not printed. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum If { + /// True when the containing group is printed on one line. + Flat, + /// True when the containing group does not fit on one line. + Broken, +} + +/// Options for [`Doc::render()`]. +pub struct Options { + /// The maximum number of columns in a line. + pub max_columns: usize, +} + +impl<'text> Doc<'text> { + /// Returns a new, empty document. + pub fn new() -> Self { + Self::default() + } + + /// Renders this document to the given writer. + pub fn render( + &self, + out: &mut dyn io::Write, + options: &Options, + ) -> io::Result<()> { + self.do_layout(options); + render::Printer::new(out).render(self.cursor(), options, true) + } + + /// Inserts a new self-closing tag into this doc. + pub fn tag(&mut self, tag: impl Into>) -> &mut Self { + self.tag_if_with(tag, None, |_| {}) + } + + /// Inserts a new tag into this doc. The given closure can be used to insert + /// tags into it. + /// + /// # Panics + /// + /// Panics if children are inserted and [`Tag::can_have_children()`] is false. + pub fn tag_with( + &mut self, + tag: impl Into>, + body: impl FnOnce(&mut Self), + ) -> &mut Self { + self.tag_if_with(tag, None, body) + } + + /// Inserts a new tag into this doc, with an optional condition. + pub fn tag_if( + &mut self, + tag: impl Into>, + cond: impl Into>, + ) -> &mut Self { + self.tag_if_with(tag, cond, |_| {}) + } + + /// Inserts a new tag into this doc, with an optional condition. The given + /// closure can be used to insert tags into it. + /// + /// # Panics + /// + /// Panics if children are inserted and [`Tag::can_have_children()`] is false. + pub fn tag_if_with( + &mut self, + tag: impl Into>, + cond: impl Into>, + body: impl FnOnce(&mut Self), + ) -> &mut Self { + let tag = tag.into(); + let compound = tag.can_have_children(); + + let consolidate = matches!( + (&tag, self.tags.last().map(|t| &t.tag)), + (Tag::Space, Some(Tag::Space)) + ); + + let idx = self.tags.len(); + self.tags.push(TagInfo { + tag, + len: 0, + cond: cond.into(), + measure: Cell::default(), + }); + body(self); + + let len = self.tags.len() - idx - 1; + assert!( + compound || len == 0, + "inserted children for {:?}", + &self.tags[idx].tag + ); + + if consolidate { + self.tags.pop(); + } + + self.tags[idx].len = len; + self + } + + fn cursor(&self) -> Cursor { + Cursor { iter: self.tags.iter() } + } +} + +#[derive(Clone, Debug)] +struct TagInfo<'text> { + tag: Tag<'text>, + len: usize, + cond: Option, + + measure: Cell, +} + +#[derive(Copy, Clone, Default, Debug)] +struct Measure { + /// The number of columns this tag takes up when it is formatted on one line. + /// + /// None if its width should be treated as infinite. + width: Option, + column: usize, +} + +/// An element of a [`Doc`]. +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Tag<'text> { + /// Verbatim text. Line breaks inside of this text cause any groups that + /// contain it to be broken. + Text(YarnBox<'text, str>), + + /// Inserts a space, except if it would end a line. This is intended for + /// ensuring lines do not have trailing whitespace. [`Tag::Text`] containing + /// a space can be used to force a space at the end of a line. + /// + /// Consecutive space tags are consolidated into one. + Space, + + /// Inserts the given number of newlines, and breaks the surrounding group. + /// + /// Consecutive breaks are consolidated into one. A `Break(0)` can be used + /// to force a break without inserting an actual newline. + Break(usize), + + /// A sequence of tags that may either be rendered as one line, or broken into + /// multiple lines if it does not fit. + /// + /// The group will also break itself if it is wider than the given width; + /// use [`usize::MAX`] to disable this. + Group(usize), + + /// Change indentation by the given number of columns. + Indent(isize), +} + +impl Tag<'_> { + /// Returns whether or not this tag can contain child tags. + pub fn can_have_children(&self) -> bool { + matches!(self, Self::Group(..) | Self::Indent(..)) + } +} + +impl<'text, Y: Into>> From for Tag<'text> { + fn from(yarn: Y) -> Self { + Self::Text(yarn.into()) + } +} + +/// A cursor over a piece of a [`Doc`]. +struct Cursor<'a> { + iter: slice::Iter<'a, TagInfo<'a>>, +} + +impl<'a> Iterator for Cursor<'a> { + type Item = (&'a TagInfo<'a>, Cursor<'a>); + + fn next(&mut self) -> Option { + let next = self.iter.next()?; + if next.len == 0 { + // Fast path that avoids an extra bounds check. + return Some((next, Cursor { iter: [].iter() })); + } + + let (contents, rest) = self.iter.as_slice().split_at(next.len); + self.iter = rest.iter(); + Some((next, Cursor { iter: contents.iter() })) + } +} + +impl fmt::Debug for Doc<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fn fmt( + indent: usize, + cursor: Cursor, + f: &mut fmt::Formatter, + ) -> fmt::Result { + for (tag, cursor) in cursor { + write!(f, "{:<1$}", "\n", indent + 1)?; + match &tag.tag { + Tag::Text(y) => write!(f, "{y:?}")?, + Tag::Space => write!(f, "")?, + Tag::Break(n) => write!(f, "")?, + Tag::Group(w) => { + if cursor.iter.as_slice().is_empty() { + write!(f, "")?; + continue; + } + + write!(f, "")?; + fmt(indent + 2, cursor, f)?; + write!(f, "")?; + } + Tag::Indent(c) => { + if cursor.iter.as_slice().is_empty() { + write!(f, "")?; + continue; + } + + write!(f, "")?; + fmt(indent + 2, cursor, f)?; + write!(f, "")?; + } + } + } + write!(f, "{:<1$}", "\n", indent - 2 + 1)?; + Ok(()) + } + + fmt(0, self.cursor(), f) + } +} diff --git a/allman/src/render.rs b/allman/src/render.rs new file mode 100644 index 0000000..60c83a8 --- /dev/null +++ b/allman/src/render.rs @@ -0,0 +1,139 @@ +use std::io; +use std::io::Write; +use std::mem; + +use crate::If; +use crate::Options; +use crate::Tag; + +/// An indentation-aware pretty-printer. +pub struct Printer<'a> { + out: &'a mut dyn io::Write, + indent: usize, + space: bool, + newlines: usize, +} + +impl<'a> Printer<'a> { + /// Returns a new printer with the given output and options. + pub fn new(out: &'a mut dyn io::Write) -> Self { + Self { + out, + indent: 0, + space: false, + newlines: 0, + } + } + + /// Updates the indentation level with the given diff. + pub fn with_indent( + &mut self, + diff: isize, + body: impl FnOnce(&mut Self) -> R, + ) -> R { + let prev = self.indent; + self.indent = self.indent.saturating_add_signed(diff); + let r = body(self); + self.indent = prev; + r + } + + /// Writes indentation, if necessary. + pub fn write_indent(&mut self) -> io::Result<()> { + if mem::take(&mut self.newlines) == 0 { + return Ok(()); + } + + self.write_spaces(self.indent) + } + + /// Writes len ASCII spaces to the output. + pub fn write_spaces(&mut self, mut len: usize) -> io::Result<()> { + const SPACES: &[u8; 32] = b" "; + + while len > SPACES.len() { + self.out.write_all(SPACES)?; + len -= SPACES.len(); + } + self.out.write_all(&SPACES[..len])?; + Ok(()) + } + + pub fn render( + &mut self, + cursor: crate::Cursor, + _options: &Options, + parent_is_broken: bool, + ) -> io::Result<()> { + for (tag, cursor) in cursor { + let cond = match tag.cond { + Some(If::Broken) => parent_is_broken, + Some(If::Flat) => !parent_is_broken, + None => true, + }; + + match &tag.tag { + Tag::Text(text) => { + if cond { + write!(self, "{text}")?; + } + } + + Tag::Space => self.space |= cond, + Tag::Break(n) => { + if cond { + for _ in self.newlines..*n { + writeln!(self)?; + } + } + } + + Tag::Group(..) => { + let m = tag.measure.get(); + self.render(cursor, _options, m.width.is_none())?; + } + + Tag::Indent(columns) => { + if cond { + self.with_indent(*columns, |p| { + p.render(cursor, _options, parent_is_broken) + })?; + } + } + } + } + + Ok(()) + } +} + +impl io::Write for Printer<'_> { + fn write(&mut self, buf: &[u8]) -> io::Result { + if buf.is_empty() { + return Ok(0); + } + + if mem::take(&mut self.space) && !buf.starts_with(b"\n") { + self.write_all(b" ")?; + } + + for line in buf.split_inclusive(|&b| b == b'\n') { + if line == b"\n" { + self.newlines += 1; + self.out.write_all(line)?; + continue; + } + + self.write_indent()?; + self.out.write_all(line)?; + if line.ends_with(b"\n") { + self.newlines = 1; + } + } + Ok(buf.len()) + } + + fn flush(&mut self) -> io::Result<()> { + self.out.flush() + } +} diff --git a/gilded/Cargo.toml b/gilded/Cargo.toml new file mode 100644 index 0000000..6faa6b8 --- /dev/null +++ b/gilded/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "gilded" +version = "0.1.0" +description = "Dead simple golden tests" + +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true + +[dependencies] +gilded-attr = { path = "attr" } + +allman = { path = "../allman" } +byteyarn = { path = "../byteyarn" } + +camino = "1.1.9" +diffy = "0.4.0" +nu-glob = "0.101.0" +unicode-width = "0.2.0" diff --git a/gilded/README.md b/gilded/README.md new file mode 100644 index 0000000..97a10e2 --- /dev/null +++ b/gilded/README.md @@ -0,0 +1,64 @@ +# gilded + +`gilded` - Easy-peesy golden testing. 👑 + +## Why Golden Testing? + +A "golden test" is a test that transforms data in some way, and validates it +by diffing it against an expected result: the "golden". + +This is especially useful for testing scenarios that consume an input file +(say, a source code file, for testing a compiler) and generate structured, +diffable textual output (such as JSON or CSV data, or even a `Debug`). + +Golden tests are best for cases where the output must be deterministic, and +where capturing fine-grained detail is valuable. + +Because they simply compare the result to an expected value byte-for-byte, +changes can quickly regenerate the test output by using the output of the +test itself. Diffs can be examined in code review directly. + +This crate also provides the `doc::Doc` type, enabling quick-and-dirty +construction of highly readable structured tree data for golden outputs. + +## Defining a Test + +A `gilded` test is defined like so: + +```rust +#[gilded::test("testdata/**/*.txt")] +fn my_test(test: &gilded::Test) { + // ... +} +``` + +`my_test` will be run as a separate unit test for every file (relative to +the crate root) which matches the glob passed to the attribute. The input +file's path and contents can be accessed through the `Test` accessors. + +To specify golden outputs, use `Test::outputs()`. This specifies the +file extension for the golden, and its computed contents. The extension is +used to construct the path of the result. If the input is `foo/bar.txt`, and +the extension for this output is `csv`, the output will be read/written to +`foo/bar.csv`. + +Panicking within the test body will fail the test as normal, tests should +not contain output assertions; those are handled by the framework. + +## Generating Goldens + +Once the test is created, simply set the `GILDED_REGENERATE` environment +variable: `GILDED_REGENERATE=1 cargo test`. + +To regenerate a specific test, simply pass its name as a filter to the test. +See `cargo test -- --help` for available flags.` + +Regenerating goldens will cause a `GILDED_CHANGED` file to be crated at the +crate root, which will cause all `gilded` tests in the crate to fail until +it is deleted. Deleting it forces the user to acknowledge that goldens have +been regenerated, to avoid blindly committing them. + +## Known Issues + +Golden tests can run under MIRI but have extremely large overhead. For the +time being, they are `#[cfg]`'d out in MIRI mode. diff --git a/gilded/attr/Cargo.toml b/gilded/attr/Cargo.toml new file mode 100644 index 0000000..5bc4467 --- /dev/null +++ b/gilded/attr/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "gilded-attr" +version = "0.1.0" +edition = "2021" + +[dependencies] +proc2decl = { path = "../../proc2decl" } + +[lib] +path = "lib.rs" +proc-macro = true \ No newline at end of file diff --git a/gilded/attr/lib.rs b/gilded/attr/lib.rs new file mode 100644 index 0000000..3ac56f6 --- /dev/null +++ b/gilded/attr/lib.rs @@ -0,0 +1,11 @@ +//! Implementation detail of `gilded`. + +proc2decl::fs_bridge! { + /// Turns a function into a golden test suite. + /// + /// See the [crate documentation][crate] for more information on how to use + /// this attribute. + /// + /// [crate]: https://docs.rs/gilded + macro #[test] => gilded::__test__; +} diff --git a/gilded/src/doc/json.rs b/gilded/src/doc/json.rs new file mode 100644 index 0000000..214ce0b --- /dev/null +++ b/gilded/src/doc/json.rs @@ -0,0 +1,116 @@ +//! Output implementation for JSON. + +use std::fmt; + +use allman::If; +use allman::Tag; +use byteyarn::YarnRef; + +use crate::doc::Doc; +use crate::doc::Elem; +use crate::doc::Options; + +pub fn build<'t>(options: &Options, doc: &Doc<'t>, out: &mut allman::Doc<'t>) { + let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); + if is_array { + out.tag_with(Tag::Group(options.max_array_width), |out| { + out + .tag("[") + .tag_with(Tag::Indent(options.tab_width as isize), |out| { + for (i, (_, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag(","); + out.tag_if(Tag::Space, If::Flat); + } + out.tag_if("\n", If::Broken); + value(options, entry, out); + } + }) + .tag_if("\n", If::Broken) + .tag("]"); + }); + } else { + out.tag_with(Tag::Group(options.max_object_width), |out| { + out + .tag("{") + .tag_with(Tag::Indent(options.tab_width as isize), |out| { + for (i, (key, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag(","); + out.tag_if(Tag::Space, If::Flat); + } + out + .tag_if("\n", If::Broken) + .tag( + Escape(key.as_deref().unwrap_or_default().as_bytes()) + .to_string(), + ) + .tag(":") + .tag(Tag::Space); + value(options, entry, out); + } + }) + .tag_if("\n", If::Broken) + .tag("}"); + }); + } +} + +fn value<'t>(options: &Options, v: &Elem<'t>, out: &mut allman::Doc<'t>) { + match v { + Elem::Bool(v) => { + out.tag(v.to_string()); + } + Elem::Int(v) => { + out.tag(v.to_string()); + } + Elem::UInt(v) => { + out.tag(v.to_string()); + } + Elem::Fp(v) => { + out.tag(v.to_string()); + } + Elem::String(v) => { + out.tag(Escape(v).to_string()); + } + Elem::Doc(v) => build(options, v, out), + } +} + +/// A displayable that prints the given data as a JSON string. +pub struct Escape<'a>(&'a [u8]); + +impl fmt::Display for Escape<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "\"")?; + for chunk in YarnRef::new(self.0).utf8_chunks() { + let chunk = match chunk { + Ok(s) => s, + Err(e) => { + for b in e { + write!(f, "<{b:02x}>")?; + } + continue; + } + }; + + for c in chunk.chars() { + match c { + '\n' => write!(f, "\\n")?, + '\r' => write!(f, "\\r")?, + '\t' => write!(f, "\\t")?, + '\\' => write!(f, "\\\\")?, + '\"' => write!(f, "\\\"")?, + c if !c.is_control() => write!(f, "{c}")?, + c => { + for u in c.encode_utf16(&mut [0, 0]) { + write!(f, "\\u{u:04x}")?; + } + } + } + } + } + + write!(f, "\"") + } +} diff --git a/gilded/src/doc/mod.rs b/gilded/src/doc/mod.rs new file mode 100644 index 0000000..7d3874d --- /dev/null +++ b/gilded/src/doc/mod.rs @@ -0,0 +1,204 @@ +//! Readable test output generating from tree-structured data. + +use std::io; +use std::io::Write; + +use byteyarn::YarnBox; + +mod json; +mod yaml; + +/// A tree-shaped document that can be pretty-printed, for generating goldens. +/// +/// Golden tests that output tree-shaped data can use `Doc` to generate +/// diff-friendly, readable output. +#[derive(Clone)] +pub struct Doc<'a> { + entries: Vec<(Option>, Elem<'a>)>, +} + +/// The format output to use when rendering a document. +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Format { + /// Output as YAML. + Yaml, + /// Output as JSON. + Json, +} + +impl Default for Format { + fn default() -> Self { + Self::Yaml + } +} + +/// Options for rendering a [`Doc`] as a string. +pub struct Options { + // The format to output in; defaults to YAML. + pub format: Format, + // The number of spaces to use for indentation. + pub tab_width: usize, + + // The maximum number of columns to have before wrapping occurs. + pub max_columns: usize, + // The maximum number of columns for a one-line array. + pub max_array_width: usize, + // The maximum number of columns for a one-line object. + pub max_object_width: usize, +} + +impl Default for Options { + fn default() -> Self { + Self { + format: Format::default(), + tab_width: 2, + max_columns: 80, + max_array_width: 50, + max_object_width: 40, + } + } +} + +/// A type which can be an element of a [`Doc`]. +/// +/// All of the primitive number types and types which convert to `YarnBox<[u8]>` +/// can be used as `Doc` values. `Option` for `T: DocValue` can also be +/// used, and will only be inserted if it is `Some`. +pub trait Value<'a> { + fn append_to(self, doc: &mut Doc<'a>); +} + +impl<'a> Doc<'a> { + /// Returns a new, empty `Doc`. + pub fn new() -> Self { + Self { entries: Vec::new() } + } + + /// Returns a new `Doc` with a single entry. + pub fn single( + name: impl Into>, + value: impl Value<'a>, + ) -> Self { + Self::new().entry(name, value) + } + + /// Appends a sequence of values to this document. + pub fn push(mut self, elements: impl IntoIterator>) -> Self { + for e in elements { + e.append_to(&mut self); + } + self + } + + /// Appends an entry with the given name to this document. + pub fn entry( + mut self, + name: impl Into>, + value: impl Value<'a>, + ) -> Self { + let prev = self.entries.len(); + value.append_to(&mut self); + if prev < self.entries.len() { + self.entries.last_mut().unwrap().0 = Some(name.into()); + } + self + } + + /// Appends an entry which is an array with the given elements. + pub fn array( + self, + name: impl Into>, + elements: impl IntoIterator>, + ) -> Self { + self.entry(name, Self::new().push(elements)) + } + + // Converts this document into a string, using the given options. + pub fn to_string(&self, options: &Options) -> String { + let mut out = Vec::new(); + let _ = self.render(&mut out, options); + String::from_utf8(out).unwrap() + } + + /// Converts this document into a string, writing it to the given output with + /// the given options. + pub fn render( + &self, + out: &mut dyn Write, + options: &Options, + ) -> io::Result<()> { + let mut doc = allman::Doc::new(); + + match options.format { + Format::Yaml => yaml::build( + yaml::Args { options, root: true, in_list: false }, + self, + &mut doc, + ), + Format::Json => json::build(options, self, &mut doc), + } + + doc.render(out, &allman::Options { max_columns: options.max_columns }) + } +} + +impl Default for Doc<'_> { + fn default() -> Self { + Self::new() + } +} + +#[derive(Clone)] +enum Elem<'a> { + Bool(bool), + Int(i128), + UInt(u128), + Fp(f64), + String(YarnBox<'a>), + Doc(Doc<'a>), +} + +impl<'a, T: Value<'a>> Value<'a> for Option { + fn append_to(self, doc: &mut Doc<'a>) { + if let Some(v) = self { + v.append_to(doc) + } + } +} +impl<'a> Value<'a> for Doc<'a> { + fn append_to(self, doc: &mut Doc<'a>) { + doc.entries.push((None, Elem::Doc(self))) + } +} + +macro_rules! impl_from { + ($({$($T:ty),*} => $V:ident,)*) => {$($( + impl<'a> Value<'a> for $T { + fn append_to(self, doc: &mut Doc<'a>) { + doc.entries.push((None, Elem::$V(self as _))) + } + } + )*)*} +} + +impl_from! { + {bool} => Bool, + {i8, i16, i32, i64, i128, isize} => Int, + {u8, u16, u32, u64, u128, usize} => UInt, + {f32, f64} => Fp, +} + +macro_rules! impl_from_yarn { + ($(for<$lt:lifetime> $($T:ty),* => $U:ty,)*) => {$($( + impl<$lt> Value<$lt> for $T { + fn append_to(self, doc: &mut Doc<$lt>) { + doc.entries.push((None, Elem::String(<$U>::from(self).into_bytes()))) + } + } + )*)*} +} + +impl_from_yarn! { + for<'a> &'a [u8], Vec, YarnBox<'a, [u8]> => YarnBox<'a, [u8]>, + for<'a> char, &'a str, String, YarnBox<'a, str> => YarnBox<'a, str>, +} diff --git a/gilded/src/doc/yaml.rs b/gilded/src/doc/yaml.rs new file mode 100644 index 0000000..68e5105 --- /dev/null +++ b/gilded/src/doc/yaml.rs @@ -0,0 +1,174 @@ +//! Output implementation for YAML. + +use std::fmt; + +use allman::If; +use allman::Tag; +use byteyarn::YarnRef; + +use crate::doc::Doc; +use crate::doc::Elem; +use crate::doc::Options; + +pub struct Args<'a> { + pub root: bool, + pub in_list: bool, + pub options: &'a Options, +} + +pub fn build<'t>(args: Args, doc: &'t Doc<'t>, out: &mut allman::Doc<'t>) { + let is_array = doc.entries.iter().all(|(k, _)| k.is_none()); + if is_array { + out.tag_with(Tag::Group(args.options.max_array_width), |out| { + out.tag_if("[", If::Flat); + if !args.root { + out.tag_if(Tag::Break(1), If::Broken); + } + for (i, (_, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag_if(",", If::Flat); + out.tag_if(Tag::Space, If::Flat); + } + + out.tag_if("-", If::Broken); + out.tag_if(Tag::Space, If::Broken); + //out.tag_with(Tag::Indent(args.options.tab_width as isize), |out| { + value(Args { root: false, in_list: true, ..args }, entry, out); + //}); + + out.tag_if(Tag::Break(1), If::Broken); + } + out.tag_if("]", If::Flat); + }); + } else { + out.tag_with(Tag::Group(args.options.max_object_width), |out| { + let in_map = !args.root && !args.in_list; + if in_map { + out.tag_if(Tag::Break(1), If::Broken); + } + out + .tag_if("{", If::Flat) + .tag_with(Tag::Indent(args.options.tab_width as isize), |out| { + for (i, (key, entry)) in doc.entries.iter().enumerate() { + if i > 0 { + out.tag_if(",", If::Flat); + out.tag_if(Tag::Space, If::Flat); + } + + let key_bytes = key.as_deref().unwrap_or_default().as_bytes(); + let ident = is_ident(key_bytes); + + if let Some(ident) = ident { + out.tag(ident.to_box()); + + let mut entry = entry; + while let Elem::Doc(d) = entry { + let [(Some(k), v)] = d.entries.as_slice() else { break }; + let Some(ident) = is_ident(k.as_bytes()) else { break }; + + out.tag(".").tag(ident.to_box()); + entry = v; + } + } else { + out.tag(Escape(key_bytes).to_string()); + } + out.tag(":").tag(Tag::Space); + + value(Args { root: false, in_list: false, ..args }, entry, out); + out.tag_if(Tag::Break(1), If::Broken); + } + }) + .tag_if("}", If::Flat); + }); + } +} + +fn value<'t>(args: Args, v: &'t Elem<'t>, out: &mut allman::Doc<'t>) { + match v { + Elem::Bool(v) => { + out.tag(v.to_string()); + } + Elem::Int(v) => { + out.tag(v.to_string()); + } + Elem::UInt(v) => { + out.tag(v.to_string()); + } + Elem::Fp(v) => { + out.tag(v.to_string()); + } + Elem::String(v) => { + if is_raw_string(v.as_ref()) { + out.tag("|").tag(Tag::Break(1)).tag_with( + Tag::Indent(args.options.tab_width as isize), + |out| { + out.tag(v.as_ref().to_utf8().unwrap().to_box()); + }, + ); + return; + } + out.tag(Escape(v).to_string()); + } + Elem::Doc(v) => build(args, v, out), + } +} + +/// A displayable that prints the given data as a JSON string. +pub struct Escape<'a>(&'a [u8]); + +impl fmt::Display for Escape<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "\"")?; + for chunk in YarnRef::new(self.0).utf8_chunks() { + let chunk = match chunk { + Ok(s) => s, + Err(e) => { + for b in e { + write!(f, "\\x{b:02x}")?; + } + continue; + } + }; + + for c in chunk.chars() { + match c { + '\0' => write!(f, "\\0")?, + '\n' => write!(f, "\\n")?, + '\r' => write!(f, "\\r")?, + '\t' => write!(f, "\\t")?, + '\\' => write!(f, "\\\\")?, + '\"' => write!(f, "\\\"")?, + c if !c.is_control() => write!(f, "{c}")?, + c => { + for u in c.encode_utf16(&mut [0, 0]) { + write!(f, "\\u{u:04x}")?; + } + } + } + } + } + + write!(f, "\"") + } +} + +fn is_raw_string(data: YarnRef<[u8]>) -> bool { + data.to_utf8().is_ok_and(|s| { + s.contains("\n") && s.chars().all(|c| c == '\n' || !c.is_control()) + }) +} + +fn is_ident(data: &[u8]) -> Option> { + fn is_start(c: char) -> bool { + c.is_alphabetic() || c == '_' || c == '-' + } + fn is_continue(c: char) -> bool { + is_start(c) || c.is_numeric() + } + + let s = YarnRef::from(data).to_utf8().ok()?; + + let mut chars = s.chars(); + let is_ident = chars.next().is_some_and(is_start) && chars.all(is_continue); + is_ident.then_some(s) +} diff --git a/gilded/src/lib.rs b/gilded/src/lib.rs new file mode 100644 index 0000000..a62a632 --- /dev/null +++ b/gilded/src/lib.rs @@ -0,0 +1,427 @@ +//! `gilded` - Easy-peesy golden testing. 👑 +//! +//! # Why Golden Testing? +//! +//! A "golden test" is a test that transforms data in some way, and validates it +//! by diffing it against an expected result: the "golden". +//! +//! This is especially useful for testing scenarios that consume an input file +//! (say, a source code file, for testing a compiler) and generate structured, +//! diffable textual output (such as JSON or CSV data, or even a `Debug`). +//! +//! Golden tests are best for cases where the output must be deterministic, and +//! where capturing fine-grained detail is valuable. +//! +//! Because they simply compare the result to an expected value byte-for-byte, +//! changes can quickly regenerate the test output by using the output of the +//! test itself. Diffs can be examined in code review directly. +//! +//! This crate also provides the [`doc::Doc`] type, enabling quick-and-dirty +//! construction of highly readable structured tree data for golden outputs. +//! +//! # Defining a Test +//! +//! A `gilded` test is defined like so: +//! +//! ``` +//! #[gilded::test("testdata/**/*.txt")] +//! fn my_test(test: &gilded::Test) { +//! // ... +//! } +//! ``` +//! +//! `my_test` will be run as a separate unit test for every file (relative to +//! the crate root) which matches the glob passed to the attribute. The input +//! file's path and contents can be accessed through the [`Test`] accessors. +//! +//! To specify golden outputs, use [`Test::outputs()`]. This specifies the +//! file extension for the golden, and its computed contents. The extension is +//! used to construct the path of the result. If the input is `foo/bar.txt`, and +//! the extension for this output is `csv`, the output will be read/written to +//! `foo/bar.csv`. +//! +//! Panicking within the test body will fail the test as normal, tests should +//! not contain output assertions; those are handled by the framework. +//! +//! # Generating Goldens +//! +//! Once the test is created, simply set the `GILDED_REGENERATE` environment +//! variable: `GILDED_REGENERATE=1 cargo test`. +//! +//! To regenerate a specific test, simply pass its name as a filter to the test. +//! See `cargo test -- --help` for available flags.` +//! +//! Regenerating goldens will cause a `GILDED_CHANGED` file to be crated at the +//! crate root, which will cause all `gilded` tests in the crate to fail until +//! it is deleted. Deleting it forces the user to acknowledge that goldens have +//! been regenerated, to avoid blindly committing them. +//! +//! # Known Issues +//! +//! Golden tests can run under MIRI but have extremely large overhead. For the +//! time being, they are `#[cfg]`'d out in MIRI mode. + +use std::cell::RefCell; +use std::cell::RefMut; +use std::env; +use std::fs; +use std::fs::File; +use std::io; +use std::io::Write; +use std::panic::Location; +use std::path::Path; +use std::str; +use std::time::SystemTime; + +use camino::Utf8Path; + +pub use gilded_attr::test; + +pub mod doc; + +/// The environment variable that is checked to decide whether or not to +/// regenerate goldens. +pub const REGENERATE: &str = "GILDED_REGENERATE"; + +/// A golden test suite, corresponding to a single invocation of the +/// [`#[gilded::test]`][test] macro. +pub struct Suite { + name: &'static str, + glob: &'static str, + crate_root: &'static Path, + test_root: &'static Utf8Path, + location: &'static Location<'static>, + run: fn(&Test), + poisoned: bool, +} + +impl Suite { + /// Returns the name of this test suite (i.e., the name of the function that + /// `#[gilded::test]` was applied to). + pub fn name(&self) -> &str { + self.name + } + + /// Constructs a new test suite. + #[doc(hidden)] + pub fn new( + name: &'static str, + glob: &'static str, + crate_root: &'static str, + location: &'static Location<'static>, + run: fn(&Test), + paths: &[&'static str], + ) -> Suite { + let crate_root = Path::new(crate_root); + + let common_prefix = paths.first().copied().map(|mut common_prefix| { + common_prefix = Utf8Path::new(common_prefix) + .parent() + .map(Utf8Path::as_str) + .unwrap_or(""); + + let sep = std::path::MAIN_SEPARATOR; + for path in &paths[1..] { + let common = common_prefix.split_inclusive(sep); + let chunks = path.split_inclusive(sep); + + let len = common + .zip(chunks) + .take_while(|(a, b)| a == b) + .map(|(a, _)| a.len()) + .sum(); + + common_prefix = &common_prefix[..len]; + } + + common_prefix = common_prefix.trim_end_matches(sep); + common_prefix + }); + + let mut suite = Suite { + name, + glob, + crate_root, + location, + run, + test_root: Utf8Path::new(common_prefix.unwrap_or_default()), + poisoned: false, + }; + + if suite.inputs_have_changed() { + // Poke the mtime of the file that contains the #[gilded::test], and then + // fail it. + // + // Alas, finding this file may be non-trivial, since the Location path + // will be rooted at the Cargo workspace, but the cwd will be the manifest + // dir of the crate we are testing. + let mut cwd = env::current_dir().unwrap(); + let mut test_file = cwd.join(suite.location.file()); + while !test_file.exists() { + cwd.pop(); + test_file = cwd.join(suite.location.file()); + } + + // Bump the mtime. + File::open(test_file) + .unwrap() + .set_modified(SystemTime::now()) + .unwrap(); + + let _ = write!( + io::stderr(), // Dodge stderr capture. + "\nerror: #[gilded::test] inputs for `{}` are out of date; rerun the test to pick up updated inputs\n\n", + suite.name, + ); + suite.poisoned = true; + } + + suite + } + + /// Executes a test in this test suite with the given data. Panics to signal + /// test failure. + /// + /// This is the function called in the body of a generated test function. + #[doc(hidden)] + #[track_caller] + pub fn run(&'static self, path: &'static str, text: &'static [u8]) { + if self.poisoned { + std::process::exit(128); + } + + let path = Utf8Path::new(path); + let file = self.crate_root.join(path); + let lock = self.crate_root.join("GILDED_CHANGED"); + let lock_name = "GILDED_CHANGED"; + + // TODO: make sure this is normalized to being a Unix path on Windows. + let name = path.strip_prefix(self.test_root).unwrap(); + + let test = Test { + suite: self, + path: name, + text, + outputs: Default::default(), + }; + (self.run)(&test); + + let regen = env::var_os(REGENERATE).is_some(); + assert!( + regen || !lock.exists(), + "golden files have changed: verify changes and then delete {lock_name}", + ); + if regen { + eprintln!("{}", lock.display()); + fs::write(lock, "delete this file to confirm changes to golden tests\n") + .unwrap() + } + + let outputs = test.outputs.borrow(); + let outputs = outputs + .as_ref() + .expect("test function failed to call Test::outputs()"); + + let mut failed = false; + for (extn, text) in outputs { + let file = file.with_extension(extn); + let name = name.with_extension(extn); + + if regen { + if text.is_empty() { + if file.exists() { + fs::remove_file(file).unwrap(); + } + } else { + fs::write(file, text).unwrap(); + } + + continue; + } + + let mut want = String::new(); + if file.exists() { + want = fs::read_to_string(file).unwrap() + } + + if text == &*want { + continue; + } + + let fmt = diffy::PatchFormatter::new().with_color(); + let patch = diffy::create_patch(text, &want); + let patch = fmt.fmt_patch(&patch); + eprintln!("mismatch for {name}:\n{patch}\n"); + failed = true; + } + + assert!(!failed, "golden output did not match test output"); + assert!( + !regen, + "golden files have changed: verify changes and then delete {lock_name}", + ) + } + + /// Checks for files that ostensibly belong to this suite which have changed, + /// by comparing the mtime of the files with the mtime of the executable. + fn inputs_have_changed(&self) -> bool { + let this = env::current_exe().expect("argv[0] missing for test binary"); + let built_at = this.metadata().unwrap().modified().unwrap(); + + nu_glob::glob(self.glob) + .unwrap() + .filter_map(Result::ok) + .any(|path| { + let mtime = path.metadata().unwrap().modified().unwrap(); + mtime > built_at + }) + } +} + +/// A handle for a single golden test case. +pub struct Test<'t> { + suite: &'t Suite, + path: &'t Utf8Path, + text: &'t [u8], + + #[allow(clippy::type_complexity)] + outputs: RefCell>>, +} + +impl<'t> Test<'t> { + /// Returns the test suite this test case belongs to. + pub fn suite(&self) -> &'t Suite { + self.suite + } + + /// Returns a path for the test input. + /// + /// This path will be unique among test outputs, and will be the same + /// regardless of platform. However, it need not correspond to the actual + /// path used to read and write the test data. + pub fn path(&self) -> &'t Utf8Path { + self.path + } + + /// Returns the textual content of the test input. + pub fn text(&self) -> &'t [u8] { + self.text + } + + /// Declares the outputs for this test. + /// + /// A test may have many results, each of which has the same path as the input + /// with an extra extension. For example, for a `foo.txt` input, the output + /// might be `foo.txt.stderr`, in which case `extension` would be `stderr`. + /// + /// Returns output functions for test, one for each output. They should be + /// called with the result of the test. + /// + /// # Panics + /// + /// The test must call this function exactly; calling it more than once or not + /// at all will cause the test to panic. + pub fn outputs<'a, const N: usize>( + &'a self, + extensions: [&str; N], + ) -> [impl FnOnce(String) + 'a; N] { + let outputs: RefMut> = self + .outputs + .try_borrow_mut() + .expect("called Test::outputs() more than once"); + assert!(outputs.is_none(), "called Test::outputs() more than once"); + + let outputs: RefMut<[_; N]> = RefMut::map(outputs, |o| { + o.insert(extensions.map(|extn| (extn.into(), String::new())).into()) + .as_mut() + .try_into() + .unwrap() + }); + + split(outputs).map(|mut slot| move |value| slot.1 = value) + } +} + +fn split(orig: RefMut<[T; N]>) -> [RefMut; N] { + let mut orig: Option> = Some(orig); + [(); N].map(|_| { + let (elem, rest) = + RefMut::map_split(orig.take().unwrap(), |s| s.split_first_mut().unwrap()); + orig = Some(rest); + elem + }) +} + +/// Implementation macro for `#[gilded::test]`. +#[doc(hidden)] +#[macro_export] +macro_rules! __test__ { + ( + #[test($glob:literal)] + $(#[$attr:meta])* + fn $name:ident($($args:tt)*) { $($body:tt)* } + $($tt:tt)* + ) => { + #[cfg(test)] + mod $name { + use super::*; + pub static __SUITE__: ::std::sync::LazyLock<$crate::Suite> = + ::std::sync::LazyLock::new(|| $crate::Suite::new( + stringify!($name), + $glob, + env!("CARGO_MANIFEST_DIR"), + ::std::panic::Location::caller(), + |$($args)*| -> () { $($body)* }, + &$crate::__test__!(@paths[] $($tt)*), + )); + + $crate::__test__! { @tests $(#[$attr])* $($tt)* } + } + }; + + ( + @tests + $(#[$attr:meta])* + $mod:ident { $(inner:tt)* } + $($outer:tt)* + ) => { + mod $mod { + use super::__SUITE__; + $crate::__test__! { @tests $(#[$attr])* $(inner)* } + } + $crate::__test__! { @tests $(#[$attr])* $(outer)* } + }; + + ( + @tests + $(#[$attr:meta])* + $test:ident($path:expr, $text:expr) + $($tt:tt)* + ) => { + $(#[$attr])* + #[::std::prelude::rust_2021::test] + #[cfg_attr(miri, ignore)] + fn $test() { __SUITE__.run($path, $text) } + $crate::__test__! { @tests $(#[$attr])* $($tt)* } + }; + + (@tests $(#[$attr:meta])*) => {}; + + ( + @paths[$($e:expr,)*] + $mod:ident { $(inner:tt)* } + $($outer:tt)* + ) => { + $crate::__test__!(@paths[$($e,)*] $(inner)* $(outer)*) + }; + + ( + @paths[$($e:expr,)*] + $test:ident($path:expr, $text:expr) + $($tt:tt)* + ) => { + $crate::__test__!(@paths[$($e,)* $path,] $($tt)*) + }; + + (@paths $e:expr) => { $e }; +} diff --git a/ilex/Cargo.toml b/ilex/Cargo.toml index 2eb82d2..78ac86d 100644 --- a/ilex/Cargo.toml +++ b/ilex/Cargo.toml @@ -12,6 +12,7 @@ license.workspace = true [dependencies] byteyarn = { version = "0.5", path = "../byteyarn" } +gilded = { path = "../gilded" } twie = { version = "0.5", path = "../twie" } ilex-attr = { version = "0.5.0", path = "attr" } @@ -19,6 +20,7 @@ ilex-attr = { version = "0.5.0", path = "attr" } annotate-snippets = "0.10.0" camino = "1.1.6" num-traits = "0.2.17" +ptree = "0.5.2" similar-asserts = "1.5.0" regex-syntax = "0.8.2" regex-automata = "0.4.3" # Bless Andrew for his patience. diff --git a/ilex/attr/Cargo.toml b/ilex/attr/Cargo.toml index f178e94..959f80d 100644 --- a/ilex/attr/Cargo.toml +++ b/ilex/attr/Cargo.toml @@ -13,3 +13,6 @@ license.workspace = true [lib] path = "lib.rs" proc-macro = true + +[dependencies] +proc2decl = { path = "../../proc2decl" } \ No newline at end of file diff --git a/ilex/attr/lib.rs b/ilex/attr/lib.rs index 97cd792..dc9d55d 100644 --- a/ilex/attr/lib.rs +++ b/ilex/attr/lib.rs @@ -1,93 +1,6 @@ //! Implementation detail of `ilex`. -use proc_macro::Delimiter; -use proc_macro::Group; -use proc_macro::Ident; -use proc_macro::Punct; -use proc_macro::Spacing; -use proc_macro::Span; use proc_macro::TokenStream; -use proc_macro::TokenTree; - -/// Generates a lexer spec struct. -/// -/// This macro generates the type of struct described in the -/// [crate documentation][crate]. The syntax is as follows. -/// -/// ```ignore -/// use ilex::rule::Keyword; -/// use ilex::Lexeme; -/// -/// /// My cool spec. -/// #[ilex::spec] -/// struct MySpec { -/// #[named("...")] -/// #[rule(/* ... */)] -/// dollar: Lexeme = "$", -/// } -/// ``` -/// -/// The type of each field must be a [`Lexeme`] with a [`Rule`] type as its -/// parameter. There are two special attributes that can follow. -/// -/// - `#[named]` makes the rule into a *named* rule. This name can be used by -/// diagnostics, and corresponds to calling `Spec::named_rule()`. -/// -/// - `#[rule]` is the value to use to construct the rule, which must be -/// `Into`, where `R` is the type inside `Lexeme` (so, above, the rule -/// value must be `Into`). By default, this value is the name of the -/// rule, to make the common case of declaring a keyword as simple as writing -/// `nullptr: Lexeme`, assuming Rust itself doesn't already use that -/// keyword. -/// -/// Note that *order matters* for the fields: when breaking a tie between two -/// potential tokens of the same length, the first one in the struct will win. -/// In practice, this means you should put keywords before identifiers. -/// -/// Additionally, the following functions will be defined for the `MySpec` type. -/// -/// ``` -/// # struct Spec; -/// # struct MySpec; -/// # fn norun(_: i32) { -/// impl MySpec { -/// /// Gets the global instance of this spec. -/// pub fn get() -> &'static Self { -/// // ... -/// # todo!() -/// } -/// -/// /// Gets the actual compiled spec. -/// pub fn spec(&self) -> &Spec { -/// // ... -/// # todo!() -/// } -/// } -/// # } -/// ``` -/// -// God cross-trait links suck. -/// [`Lexeme`]: https://docs.rs/ilex/latest/ilex/struct.Lexeme.html -/// [`Rule`]: https://docs.rs/ilex/latest/ilex/rule/trait.Rule.html -/// [crate]: https://docs.rs/ilex -#[proc_macro_attribute] -pub fn spec(_attr: TokenStream, item: TokenStream) -> TokenStream { - // This is implemented as a decl macro, because that's easier to - // understand and debug than proc macros. I hate proc macros so much. - let span = Span::call_site(); - let macro_call: [TokenTree; 8] = [ - Punct::new(':', Spacing::Joint).into(), - Punct::new(':', Spacing::Alone).into(), - Ident::new("ilex", span).into(), - Punct::new(':', Spacing::Joint).into(), - Punct::new(':', Spacing::Alone).into(), - Ident::new("__spec__", span).into(), - Punct::new('!', Spacing::Alone).into(), - Group::new(Delimiter::Brace, item).into(), - ]; - - macro_call.into_iter().collect() -} // This helper exists only to make the #[spec] field attributes inert. #[doc(hidden)] @@ -95,3 +8,68 @@ pub fn spec(_attr: TokenStream, item: TokenStream) -> TokenStream { pub fn derive(_: TokenStream) -> TokenStream { TokenStream::new() } + +proc2decl::bridge! { + /// Generates a lexer spec struct. + /// + /// This macro generates the type of struct described in the + /// [crate documentation][crate]. The syntax is as follows. + /// + /// ```ignore + /// use ilex::rule::Keyword; + /// use ilex::Lexeme; + /// + /// /// My cool spec. + /// #[ilex::spec] + /// struct MySpec { + /// #[named("...")] + /// #[rule(/* ... */)] + /// dollar: Lexeme = "$", + /// } + /// ``` + /// + /// The type of each field must be a [`Lexeme`] with a [`Rule`] type as its + /// parameter. There are two special attributes that can follow. + /// + /// - `#[named]` makes the rule into a *named* rule. This name can be used by + /// diagnostics, and corresponds to calling `Spec::named_rule()`. + /// + /// - `#[rule]` is the value to use to construct the rule, which must be + /// `Into`, where `R` is the type inside `Lexeme` (so, above, the rule + /// value must be `Into`). By default, this value is the name of the + /// rule, to make the common case of declaring a keyword as simple as writing + /// `nullptr: Lexeme`, assuming Rust itself doesn't already use that + /// keyword. + /// + /// Note that *order matters* for the fields: when breaking a tie between two + /// potential tokens of the same length, the first one in the struct will win. + /// In practice, this means you should put keywords before identifiers. + /// + /// Additionally, the following functions will be defined for the `MySpec` type. + /// + /// ``` + /// # struct Spec; + /// # struct MySpec; + /// # fn norun(_: i32) { + /// impl MySpec { + /// /// Gets the global instance of this spec. + /// pub fn get() -> &'static Self { + /// // ... + /// # todo!() + /// } + /// + /// /// Gets the actual compiled spec. + /// pub fn spec(&self) -> &Spec { + /// // ... + /// # todo!() + /// } + /// } + /// # } + /// ``` + /// + // God cross-trait links suck. + /// [`Lexeme`]: https://docs.rs/ilex/latest/ilex/struct.Lexeme.html + /// [`Rule`]: https://docs.rs/ilex/latest/ilex/rule/trait.Rule.html + /// [crate]: https://docs.rs/ilex + macro #[spec] => ilex::__spec__; +} diff --git a/ilex/src/file/context.rs b/ilex/src/file/context.rs index 5d774e7..b83acf4 100644 --- a/ilex/src/file/context.rs +++ b/ilex/src/file/context.rs @@ -101,6 +101,28 @@ impl Context { self.file(idx).unwrap() } + /// Adds a new file to this source context, validating that it is valid + /// UTF-8. + pub fn new_file_from_bytes<'a>( + &self, + path: impl Into<&'a Utf8Path>, + text: impl Into>, + report: &Report, + ) -> Result { + let path = path.into(); + let text = String::from_utf8(text.into()).map_err(|e| { + let n = e.utf8_error().valid_up_to(); + let b = e.as_bytes()[n]; + + report + .error(f!("input file `{path}` was not valid UTF-8")) + .note(f!("encountered non-UTF-8 byte {b:#02x} at offset {n}")); + report.fatal().unwrap() + })?; + + Ok(self.new_file(path, text)) + } + /// Adds a new file to this source context by opening `name` and reading it /// from the file system. pub fn open_file<'a>( @@ -118,12 +140,7 @@ impl Context { } }; - let Ok(utf8) = String::from_utf8(bytes) else { - report.error(f!("input file `{path}` was not valid UTF-8")); - return report.fatal(); - }; - - Ok(self.new_file(path, utf8)) + self.new_file_from_bytes(path, bytes, report) } /// Gets the `idx`th file in this source context. diff --git a/ilex/src/lib.rs b/ilex/src/lib.rs index 06f2088..493fb38 100644 --- a/ilex/src/lib.rs +++ b/ilex/src/lib.rs @@ -59,9 +59,9 @@ //! can. This will make it easier for you to just pin a version and avoid //! thinking about this problem. //! -//! Diagnostics are completely unstable. Don't try to parse them, don't write -//! golden tests against them. If you must, use [`testing::check_report()`] so -//! that you can regenerate them. +//! Diagnostics are completely unstable. Don't try to parse them, and if you +//! need to test them, using something like [`gilded`](https://docs.rs/gilded) +//! to make it easy to regenerate when the output changes. //! //! # Quick Start //! @@ -263,10 +263,8 @@ pub mod fp; pub mod ice; pub mod report; pub mod rule; -pub mod testing; pub mod token; -#[cfg(not(test))] pub use { crate::{ file::Context, diff --git a/ilex/src/report/mod.rs b/ilex/src/report/mod.rs index 2c19d0e..50463fb 100644 --- a/ilex/src/report/mod.rs +++ b/ilex/src/report/mod.rs @@ -141,21 +141,6 @@ impl Report { render::finish(self, sink) } - pub(crate) fn write_out_for_test(&self) -> String { - eprintln!("{}", self.fatal::<()>().unwrap_err()); - let mut sink = String::new(); - render::render_fmt( - self, - &Options { - color: false, - show_report_locations: false, - }, - &mut sink, - ) - .unwrap(); - sink - } - pub(crate) fn new(ctx: &Context, opts: Options) -> Self { Self { ctx: ctx.copy(), diff --git a/ilex/src/spec.rs b/ilex/src/spec.rs index 9139843..ea49840 100644 --- a/ilex/src/spec.rs +++ b/ilex/src/spec.rs @@ -297,6 +297,7 @@ impl Lexeme { #[macro_export] macro_rules! __spec__ { ( + #[spec] $(#[$meta:meta])* $vis:vis struct $name:ident {$( $(#[$($fmeta:tt)*])* diff --git a/ilex/src/testing/mod.rs b/ilex/src/testing/mod.rs deleted file mode 100644 index 8528d9b..0000000 --- a/ilex/src/testing/mod.rs +++ /dev/null @@ -1,532 +0,0 @@ -//! Lexer testing helpers. -//! -//! This type provides testing-oriented matchers for matching on a -//! [`TokenStream`][`crate::token::Stream`]. -//! -//! These matchers are intended for writing *tests*. To write a parser, you\ -//! should use [`Cursor`][crate::token::Cursor] instead. - -use byteyarn::Yarn; -use std::env; -use std::fmt; -use std::fs; -use std::ops::Range; -use std::path::Path; - -use crate::file::Span; -use crate::file::Spanned; -use crate::report::Report; -use crate::rule; -use crate::spec::Lexeme; -use crate::token; -use crate::token::Content; -use crate::token::Sign; - -mod recognize; -use recognize::Kind; - -/// Checks that `report` contains the expected diagnostics in `path`, verbatim. -/// -/// If the contents do not match, it will print a diff to stderr and panic. -/// -/// If the `ILEX_REGENERATE` env var is set, instead of reading the file and -/// performing the check, it will write the expected contents to the file, -/// allowing for easy generation of test data. -#[track_caller] -pub fn check_report(report: &Report, path: &(impl AsRef + ?Sized)) { - let path = path.as_ref(); - let got = report.write_out_for_test(); - let want = if env::var("ILEX_REGENERATE").is_ok() { - if let Some(parent) = path.parent() { - fs::create_dir_all(parent).unwrap(); - } - fs::write(path, got).unwrap(); - return; - } else { - fs::read_to_string(path).unwrap() - }; - - eprintln!("checking against {}...", path.display()); - similar_asserts::assert_eq!(got, want); -} - -/// Checks that `report` contains no diagnostics. -/// -/// If it does, it will print them to stderr and panic. -#[track_caller] -pub fn check_report_ok(report: &Report) { - if let Err(e) = report.fatal_or(()) { - e.panic(); - } -} - -/// A matcher for a token stream. -/// -/// For usage examples, see the `ilex/tests` directory. -pub struct Matcher { - stream: Vec, -} - -impl Matcher { - /// Creates a new matcher. - pub fn new() -> Self { - Self { stream: Vec::new() } - } - - /// Adds a new expected token for this matcher, from a lexeme and an argument. - /// - /// What is allowed for `arg` for a particular rule type is specified by - /// the [`Match`] trait. You can even define your own! - pub fn then1, A1>( - mut self, - lexeme: Lexeme, - a1: A1, - ) -> Self { - R::add_token(&mut self, lexeme, (a1,)); - self - } - - /// Adds a new expected token for this matcher, from a lexeme and two - /// arguments. - /// - /// What is allowed for `arg` for a particular rule type is specified by - /// the [`Match`] trait. You can even define your own! - pub fn then2, A1, A2>( - mut self, - lexeme: Lexeme, - a1: A1, - a2: A2, - ) -> Self { - R::add_token(&mut self, lexeme, (a1, a2)); - self - } - - /// Like [`Matcher::then1()`], but adds a prefix matcher too. - pub fn prefix1, A1>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - ) -> Self { - self.then1(lexeme, a1).prefix(prefix) - } - - /// Like [`Matcher::then2()`], but adds a prefix matcher too. - pub fn prefix2, A1, A2>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - a2: A2, - ) -> Self { - self.then2(lexeme, a1, a2).prefix(prefix) - } - - /// Like [`Matcher::then1()`], but adds a suffix matcher too. - pub fn suffix1, A1>( - self, - lexeme: Lexeme, - a1: A1, - suffix: impl Into, - ) -> Self { - self.then1(lexeme, a1).suffix(suffix) - } - - /// Like [`Matcher::then2()`], but adds a suffix matcher too. - pub fn suffix2, A1, A2>( - self, - lexeme: Lexeme, - a1: A1, - a2: A2, - suffix: impl Into, - ) -> Self { - self.then2(lexeme, a1, a2).suffix(suffix) - } - - /// Like [`Matcher::then1()`], but adds a prefix matcher and a suffix matcher too. - pub fn affix1, A1>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - suffix: impl Into, - ) -> Self { - self.then1(lexeme, a1).prefix(prefix).suffix(suffix) - } - - /// Like [`Matcher::then2()`], but adds a prefix matcher and a suffix matcher too. - pub fn affix2, A1, A2>( - self, - lexeme: Lexeme, - prefix: impl Into, - a1: A1, - a2: A2, - suffix: impl Into, - ) -> Self { - self.then2(lexeme, a1, a2).prefix(prefix).suffix(suffix) - } - - /// Adds an EOF matcher. - /// - /// Every token stream ends with an EOF token, so you always need to include - /// one. - pub fn eof(mut self) -> Self { - self.stream.push(recognize::Matcher { - which: Some(Lexeme::eof().any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Eof, - }); - self - } - - /// Matches `cursor` against this matcher, and panics if it doesn't. - #[track_caller] - pub fn assert_matches<'lex>( - &self, - that: impl IntoIterator>, - ) { - self.matches(that).unwrap() - } - - /// Sets an expectation for the overall span of the most recently added - /// token matcher. - /// - /// # Panics - /// - /// Panics if none of the matcher-adding methods has been called yet. - pub fn span(mut self, text: impl Into) -> Self { - self.stream.last_mut().unwrap().span = text.into(); - self - } - - /// Adds some expected comments to the most recently added token matcher. - /// - /// # Panics - /// - /// Panics if none of the matcher-adding methods has been called yet. - pub fn comments(mut self, iter: I) -> Self - where - I: IntoIterator, - I::Item: Into, - { - self - .stream - .last_mut() - .unwrap() - .comments - .extend(iter.into_iter().map(Into::into)); - self - } - - /// Matches `cursor` against this matcher. - /// - /// If matching fails, returns an error describing why. - pub fn matches<'lex>( - &self, - that: impl IntoIterator>, - ) -> Result<(), impl fmt::Debug> { - struct DebugBy(String); - impl fmt::Debug for DebugBy { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(&self.0) - } - } - - let mut state = recognize::MatchState::new(); - recognize::zip_eq( - "token streams", - &mut state, - &self.stream, - that, - |state, ours, theirs| ours.recognizes(state, theirs), - ); - state.finish().map_err(DebugBy) - } - - /// Sets the prefix for the most recently added token matcher. - /// - /// # Panics - /// - /// Panics if [`Matcher::then()`] has not been called yet, or if the most - /// recent matcher is not for [`rule::Ident`], [`rule::Digital`], or - /// [`rule::Quoted`], - fn prefix(mut self, text: impl Into) -> Self { - match &mut self.stream.last_mut().unwrap().kind { - Kind::Ident { prefix, .. } | Kind::Quoted { prefix, .. } => { - *prefix = Some(text.into()); - } - Kind::Digital { digits, .. } => digits[0].prefix = Some(text.into()), - _ => panic!("cannot set prefix on this matcher"), - } - - self - } - - /// Sets the prefix for the most recently added token matcher. - /// - /// # Panics - /// - /// Panics if [`Matcher::then()`] has not been called yet, or if the most - /// recent matcher is not for [`rule::Ident`], [`rule::Digital`], or - /// [`rule::Quoted`], - fn suffix(mut self, text: impl Into) -> Self { - match &mut self.stream.last_mut().unwrap().kind { - Kind::Ident { suffix, .. } - | Kind::Quoted { suffix, .. } - | Kind::Digital { suffix, .. } => { - *suffix = Some(text.into()); - } - _ => panic!("cannot set suffix on this matcher"), - } - - self - } -} - -impl Default for Matcher { - fn default() -> Self { - Self::new() - } -} - -/// A matcher for a chunk of text from the input source. -/// -/// This is slightly more general than a span, since it can specify the content -/// of the text and the offsets separately, and optionally. `Text` values are -/// intended to *recognize* various spans. -/// -/// `&str` and `Range` are both convertible to `Text`. -#[derive(Clone)] -pub struct Text { - text: Option, - range: Option>, -} - -impl Text { - /// Returns a matcher that recognizes all spans. - pub fn any() -> Self { - Text { text: None, range: None } - } - - /// Returns a matcher that recognizes spans with the given text. - pub fn new(text: impl Into) -> Self { - Text { text: Some(text.into()), range: None } - } - - /// Returns a matcher that recognizes spans with the given byte range. - pub fn range(range: Range) -> Self { - Text { text: None, range: Some(range) } - } - - /// Returns a matcher that recognizes spans with the given byte range and - /// text. - pub fn text_and_range(text: impl Into, range: Range) -> Self { - Text { - text: Some(text.into()), - range: Some(range), - } - } - - /// Returns whether this span recognizes a particular span. - fn recognizes(&self, span: Span) -> bool { - self.text.as_ref().is_none_or(|text| text == span.text()) - && !self.range.as_ref().is_some_and(|range| { - let r = span.span(); - range != &(r.start()..r.end()) - }) - } -} - -impl> From for Text { - fn from(value: Y) -> Self { - Text::new(value) - } -} - -impl fmt::Debug for Text { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match (&self.text, &self.range) { - (Some(text), Some(range)) => write!(f, "{text:?} @ {range:?}"), - (Some(text), None) => fmt::Debug::fmt(text, f), - (None, Some(range)) => write!(f, " @ {range:?}"), - (None, None) => f.write_str(""), - } - } -} - -/// Records a way in which a matcher can be added for a particular token rule. -/// -/// See [`Matcher::then1()`]. -pub trait Match: rule::Rule { - /// Adds a new token to `matcher`. - fn add_token(matcher: &mut Matcher, lexeme: Lexeme, arg: Arg); -} - -impl> Match<(T,)> for rule::Keyword { - fn add_token(matcher: &mut Matcher, lexeme: Lexeme, (arg,): (T,)) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: arg.into(), - comments: Vec::new(), - kind: Kind::Keyword, - }) - } -} - -impl Match<((Open, Close), Matcher)> for rule::Bracket -where - Open: Into, - Close: Into, -{ - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - ((open, close), contents): ((Open, Close), Matcher), - ) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Delimited { - tokens: contents.stream, - delims: (open.into(), close.into()), - }, - }) - } -} - -impl> Match<(T,)> for rule::Ident { - fn add_token(matcher: &mut Matcher, lexeme: Lexeme, (arg,): (T,)) { - let arg = arg.into(); - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Ident { name: arg, prefix: None, suffix: None }, - }) - } -} - -/// A complex digital token matcher. -/// -/// This type is used the matcher argument type for complex digital rules, such -/// as those that have signs and exponents. -#[derive(Default)] -pub struct DigitalMatcher { - chunks: Vec, -} - -impl DigitalMatcher { - /// Creates a new matcher, with the given radix and digit blocks for the - /// mantissa. - pub fn new>( - radix: u8, - digits: impl IntoIterator, - ) -> Self { - Self { - chunks: vec![recognize::DigitalMatcher { - radix, - sign: None, - digits: digits.into_iter().map(Into::into).collect(), - prefix: None, - }], - } - } - - /// Sets the sign for the most recently added chunk of digits. - pub fn sign(self, sign: Sign) -> Self { - self.sign_span(sign, Text::any()) - } - - /// Sets the sign (and sign span) for the most recently added chunk of digits. - pub fn sign_span(mut self, sign: Sign, span: impl Into) -> Self { - self - .chunks - .last_mut() - .unwrap() - .sign - .get_or_insert_with(|| (sign, span.into())); - self - } - - /// Adds an expected exponent. - /// - /// The exponent must be in the given radix, delimited by the given prefix, - /// and have the given digits. - pub fn exp>( - mut self, - radix: u8, - prefix: impl Into, - digits: impl IntoIterator, - ) -> Self { - self.chunks.push(recognize::DigitalMatcher { - radix, - sign: None, - digits: digits.into_iter().map(Into::into).collect(), - prefix: Some(prefix.into()), - }); - self - } -} - -impl Match<(u8, Digits)> for rule::Digital -where - Digits: IntoIterator, - Digits::Item: Into, -{ - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - (radix, digits): (u8, Digits), - ) { - Self::add_token(matcher, lexeme, (DigitalMatcher::new(radix, digits),)); - } -} - -impl Match<(DigitalMatcher,)> for rule::Digital { - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - digits: (DigitalMatcher,), - ) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Digital { digits: digits.0.chunks, suffix: None }, - }) - } -} - -impl From<&'static str> for Content { - fn from(value: &'static str) -> Self { - Content::lit(value) - } -} - -impl Match<((Open, Close), Iter)> for rule::Quoted -where - Open: Into, - Close: Into, - Iter: IntoIterator, - Iter::Item: Into>, -{ - fn add_token( - matcher: &mut Matcher, - lexeme: Lexeme, - ((open, close), content): ((Open, Close), Iter), - ) { - matcher.stream.push(recognize::Matcher { - which: Some(lexeme.any()), - span: Text::any(), - comments: Vec::new(), - kind: Kind::Quoted { - content: content.into_iter().map(Into::into).collect(), - delims: (open.into(), close.into()), - prefix: None, - suffix: None, - }, - }) - } -} diff --git a/ilex/src/testing/recognize.rs b/ilex/src/testing/recognize.rs deleted file mode 100644 index dd7d495..0000000 --- a/ilex/src/testing/recognize.rs +++ /dev/null @@ -1,354 +0,0 @@ -//! Visitor code for token matching. -//! -//! This code is not very pretty or fast, since it's meant to generate -//! diagnostics in lexer/parser unit tests. - -use std::fmt; -use std::fmt::DebugStruct; -use std::fmt::Display; - -use crate::f; -use crate::file::Spanned; -use crate::rule; -use crate::spec::Lexeme; -use crate::testing::Text; -use crate::token; -use crate::token::Any; -use crate::token::Sign; -use crate::token::Token; - -pub struct Matcher { - pub which: Option>, - pub span: Text, - pub comments: Vec, - pub kind: Kind, -} - -pub enum Kind { - Eof, - Keyword, - Ident { - name: Text, - prefix: Option, - suffix: Option, - }, - Quoted { - content: Vec>, - delims: (Text, Text), - prefix: Option, - suffix: Option, - }, - Digital { - digits: Vec, - suffix: Option, - }, - Delimited { - delims: (Text, Text), - tokens: Vec, - }, -} - -#[derive(Debug)] -pub struct DigitalMatcher { - pub radix: u8, - pub sign: Option<(Sign, Text)>, - pub digits: Vec, - pub prefix: Option, -} - -impl Matcher { - pub fn recognizes(&self, state: &mut MatchState, tok: token::Any) { - state.match_spans("token span", &self.span, Spanned::span(&tok)); - - zip_eq("comments", state, &self.comments, tok.comments(), |state, t, s| { - state.match_spans("comment", t, s); - }); - - match (&self.kind, tok) { - (Kind::Eof, Any::Eof(..)) | (Kind::Keyword, Any::Keyword(..)) => {} - (Kind::Ident { name, prefix, suffix }, Any::Ident(tok)) => { - state.match_spans("identifier name", name, tok.name()); - state.match_options("prefix", prefix.as_ref(), tok.prefix()); - state.match_options("suffix", suffix.as_ref(), tok.suffix()); - } - (Kind::Quoted { delims, content, prefix, suffix }, Any::Quoted(tok)) => { - let [open, close] = tok.delimiters(); - state.match_spans("open quote", &delims.0, open); - state.match_spans("close quote", &delims.1, close); - state.match_options("prefix", prefix.as_ref(), tok.prefix()); - state.match_options("suffix", suffix.as_ref(), tok.suffix()); - - zip_eq( - "string contents", - state, - content, - tok.raw_content(), - |state, ours, theirs| match (ours, theirs) { - (token::Content::Lit(t), token::Content::Lit(s)) => { - state.match_spans("string content", t, s) - } - (token::Content::Esc(t, ours), token::Content::Esc(s, theirs)) => { - state.match_spans("string escape", t, s); - state.match_options("escape data", ours.as_ref(), theirs); - } - _ => state.error("mismatched string content types"), - }, - ); - } - (Kind::Digital { digits, suffix }, Any::Digital(tok)) => { - let recognize = |state: &mut MatchState, - mch: &DigitalMatcher, - tok: token::Digital| { - if mch.radix != tok.radix() { - state.error(f!( - "wrong radix; want {:?}, got {:?}", - mch.radix, - tok.radix() - )); - } - state.match_any_options( - "sign", - mch.sign.as_ref().map(|(s, _)| s), - tok.sign(), - |&a, b| a == b, - ); - state.match_options( - "sign span", - mch.sign.as_ref().map(|(_, sp)| sp), - tok.sign_span(), - ); - state.match_options("prefix", mch.prefix.as_ref(), tok.prefix()); - zip_eq( - "digit blocks", - state, - &mch.digits, - tok.digit_blocks(), - |state, t, s| { - state.match_spans("digit block", t, s); - }, - ); - }; - - recognize(state, &digits[0], tok); - zip_eq( - "exponent list", - state, - &digits[1..], - tok.exponents(), - |state, t, s| { - recognize(state, t, s); - }, - ); - - state.match_options("suffix", suffix.as_ref(), tok.suffix()); - } - (Kind::Delimited { delims, tokens }, Any::Bracket(tok)) => { - state.match_spans("open delimiter", &delims.0, tok.open()); - state.match_spans("close delimiter", &delims.1, tok.close()); - - zip_eq( - "bracket contents", - state, - tokens, - tok.contents(), - |state, ours, theirs| ours.recognizes(state, theirs), - ); - } - _ => state.error("mismatched token types"), - } - } -} - -impl fmt::Debug for Matcher { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let print_spans = - matches!(std::env::var("ILEX_SPANS").as_deref(), Ok("ranges" | "text")); - - let req_field = |d: &mut DebugStruct, name, span| { - if print_spans { - d.field(name, span); - } - }; - - let opt_field = |d: &mut DebugStruct, name, span: &Option| { - if print_spans && span.is_some() { - d.field(name, span.as_ref().unwrap()); - } - }; - - let vec_field = |d: &mut DebugStruct, name, spans: &Vec| { - if !spans.is_empty() { - d.field(name, spans); - } - }; - - let name = match &self.kind { - Kind::Eof => "Eof", - Kind::Keyword => "Keyword", - Kind::Ident { .. } => "Ident", - Kind::Quoted { .. } => "Quoted", - Kind::Digital { .. } => "Digital", - Kind::Delimited { .. } => "Delimited", - }; - - let name = match self.which { - Some(l) => format!("{name}({})", l.index()), - None => name.into(), - }; - - let mut d = f.debug_struct(&name); - - match &self.kind { - Kind::Ident { name, prefix, suffix } => { - req_field(&mut d, "name", name); - opt_field(&mut d, "prefix", prefix); - opt_field(&mut d, "suffix", suffix); - } - Kind::Quoted { content, delims, prefix, suffix } => { - d.field("content", content); - req_field(&mut d, "open", &delims.0); - req_field(&mut d, "close", &delims.1); - opt_field(&mut d, "prefix", prefix); - opt_field(&mut d, "suffix", suffix); - } - Kind::Digital { digits, suffix } => { - d.field("digits", digits); - opt_field(&mut d, "suffix", suffix); - } - Kind::Delimited { delims, tokens } => { - req_field(&mut d, "open", &delims.0); - req_field(&mut d, "close", &delims.1); - d.field("tokens", tokens); - } - _ => {} - }; - - req_field(&mut d, "span", &self.span); - vec_field(&mut d, "comments", &self.comments); - d.finish() - } -} - -pub struct MatchState { - errors: String, - stack: Vec, - error_count: usize, -} - -impl MatchState { - pub fn new() -> Self { - Self { - errors: String::new(), - stack: Vec::new(), - error_count: 0, - } - } - - fn error(&mut self, msg: impl Display) { - use std::fmt::Write; - - self.error_count += 1; - if self.error_count > 10 { - return; - } - - self.errors.push_str("at stream"); - for i in &self.stack { - let _ = write!(self.errors, "[{}]", i); - } - let _ = writeln!(self.errors, ": {msg}"); - } - - fn match_spans<'s>( - &mut self, - what: &str, - text: &Text, - span: impl Spanned<'s>, - ) { - let span = span.span(); - if !text.recognizes(span) { - self.error(f!("wrong {what}; want {:?}, got {:?}", text, span)); - } - } - - fn match_options<'s>( - &mut self, - what: &str, - text: Option<&Text>, - span: Option>, - ) { - let span = span.map(|s| s.span()); - if text.is_none() && span.is_none() { - return; - } - - if !text.zip(span).is_some_and(|(t, s)| t.recognizes(s)) { - self.error(f!("wrong {what}; want {:?}, got {:?}", text, span)); - } - } - - fn match_any_options( - &mut self, - what: &str, - text: Option, - span: Option, - eq: impl FnOnce(&T, &U) -> bool, - ) { - if text.is_none() && span.is_none() { - return; - } - - if !text - .as_ref() - .zip(span.as_ref()) - .is_some_and(|(t, s)| eq(t, s)) - { - self.error(f!("wrong {what}; want {:?}, got {:?}", text, span)); - } - } - - pub fn finish(mut self) -> Result<(), String> { - use std::fmt::Write; - - if self.error_count > 10 { - let _ = - writeln!(self.errors, "... and {} more errors", self.error_count - 1); - } - - if self.error_count > 0 { - return Err(self.errors); - } - Ok(()) - } -} - -pub fn zip_eq( - what: &str, - state: &mut MatchState, - ours: Ours, - theirs: Theirs, - mut cb: impl FnMut(&mut MatchState, Ours::Item, Theirs::Item), -) { - let mut ours = ours.into_iter(); - let mut theirs = theirs.into_iter(); - state.stack.push(0); - loop { - let ours = ours.next(); - let theirs = theirs.next(); - if ours.is_none() && theirs.is_none() { - state.stack.pop(); - break; - } - - if let (Some(ours), Some(theirs)) = (ours, theirs) { - cb(state, ours, theirs); - - *state.stack.last_mut().unwrap() += 1; - continue; - } - - let popped = state.stack.pop().unwrap(); - state.error(f!("{what} had unequal lengths (got to {popped})")); - break; - } -} diff --git a/ilex/src/token/mod.rs b/ilex/src/token/mod.rs index 92c2742..2988559 100644 --- a/ilex/src/token/mod.rs +++ b/ilex/src/token/mod.rs @@ -35,6 +35,7 @@ use crate::Never; use crate::WrongKind; mod stream; +pub mod summary; pub use stream::switch::switch; pub use stream::switch::Switch; diff --git a/ilex/src/token/summary.rs b/ilex/src/token/summary.rs new file mode 100644 index 0000000..5a2fed1 --- /dev/null +++ b/ilex/src/token/summary.rs @@ -0,0 +1,101 @@ +//! Implementation of `Stream::summary()`. + +use gilded::doc::Doc; + +use crate::file::Span; +use crate::file::Spanned; +use crate::token::Any; +use crate::token::Cursor; +use crate::token::Stream; + +use crate::token::Sign; +use crate::token::Token; + +use super::Content; + +impl Stream<'_> { + /// Returns a string that summarizes the contents of this token stream. + pub fn summary(&self) -> String { + self.cursor().summary().to_string(&Default::default()) + } +} + +impl<'a> Cursor<'a> { + fn summary(&self) -> Doc<'a> { + Doc::new().push({ *self }.map(|token| { + let doc = Doc::new() + .entry("lexeme", token.lexeme().index()) + .entry("span", span2doc(token.span())); + + match token { + Any::Eof(..) => Doc::single("eof", doc), + Any::Keyword(..) => Doc::single("keyword", doc), + Any::Bracket(tok) => Doc::single( + "bracket", + doc + .array("delims", tok.delimiters().into_iter().map(span2doc)) + .entry("contents", tok.contents().summary()), + ), + + Any::Ident(tok) => Doc::single( + "ident", + doc + .entry("prefix", tok.prefix().map(span2doc)) + .entry("suffix", tok.suffix().map(span2doc)) + .entry("name", span2doc(tok.name())), + ), + + Any::Digital(tok) => Doc::single( + "ident", + doc + .entry("prefix", tok.prefix().map(span2doc)) + .entry("suffix", tok.suffix().map(span2doc)) + .entry("radix", tok.radix()) + .entry("sign", tok.sign().map(sign2str)) + .array("blocks", tok.digit_blocks().map(span2doc)) + .array( + "exponents", + tok.exponents().map(|exp| { + Doc::new() + .entry("span", span2doc(exp.span())) + .entry("prefix", exp.prefix().map(span2doc)) + .entry("radix", exp.radix()) + .entry("sign", exp.sign().map(sign2str)) + .array("blocks", exp.digit_blocks().map(span2doc)) + }), + ), + ), + + Any::Quoted(tok) => Doc::single( + "quoted", + doc + .entry("prefix", tok.prefix().map(span2doc)) + .entry("suffix", tok.suffix().map(span2doc)) + .array("delims", tok.delimiters().into_iter().map(span2doc)) + .array( + "contents", + tok.raw_content().map(|c| match c { + Content::Lit(lit) => Doc::single("text", span2doc(lit)), + Content::Esc(esc, data) => Doc::new() + .entry("esc", span2doc(esc)) + .entry("data", data.map(span2doc)), + }), + ), + ), + } + })) + } +} + +fn span2doc(span: Span) -> Doc { + Doc::new() + .array("span", [span.start(), span.end()]) + .entry("text", span.text()) +} + +fn sign2str(s: Sign) -> &'static str { + match s { + Sign::Pos => "+", + Sign::Neg => "-", + } +} diff --git a/ilex/tests/greedy.rs b/ilex/tests/greedy.rs deleted file mode 100644 index 306b2f0..0000000 --- a/ilex/tests/greedy.rs +++ /dev/null @@ -1,52 +0,0 @@ -// This test verifies that lexing is greedy in *most* cases. - -use ilex::rule::*; -use ilex::testing::Matcher; - -#[test] -fn greedy() { - let mut spec = ilex::Spec::builder(); - let rust_like = spec.rule(Quoted::with(Bracket::rust_style( - "#%", - ("poisonous", "["), - ("]", ">"), - ))); - - let cpp_like = spec.rule(Quoted::with(Bracket::cxx_style( - Ident::new(), - ("R\"", "("), - (")", "\""), - ))); - - let array = spec.rule(Bracket::from(("[", "]"))); - let poison = spec.rule(Keyword::new("poison")); - let ident = spec.rule(Ident::new()); - - let spec = spec.compile(); - - let text = r#" - poison - poisonous - poisonous[xyz]> - poisonous#%#%[xyz]#%#%> - poisonous [xyz] - R"cc(some c++)" )cc" - "#; - - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let tokens = ctx.new_file("test.file", text).lex(&spec, &report).unwrap(); - eprintln!("stream: {tokens:#?}"); - - Matcher::new() - .then1(poison, "poison") - .then1(ident, "poisonous") - .then2(rust_like, ("poisonous[", "]>"), ["xyz"]) - .then2(rust_like, ("poisonous#%#%[", "]#%#%>"), ["xyz"]) - .then1(ident, "poisonous") - .then2(array, ("[", "]"), Matcher::new().then1(ident, "xyz")) - .then2(cpp_like, ("R\"cc(", ")cc\""), ["some c++)\" "]) - .eof() - .assert_matches(&tokens); -} diff --git a/ilex/tests/greedy/greedy.tokens.yaml b/ilex/tests/greedy/greedy.tokens.yaml new file mode 100644 index 0000000..3bfeb32 --- /dev/null +++ b/ilex/tests/greedy/greedy.tokens.yaml @@ -0,0 +1,52 @@ +- keyword: + lexeme: 3 + span: {span: [0, 6], text: "poison"} +- ident: + lexeme: 4 + span: {span: [7, 16], text: "poisonous"} + name: {span: [7, 16], text: "poisonous"} +- quoted: + lexeme: 0 + span: + span: [17, 32] + text: "poisonous[xyz]>" + delims: + - {span: [17, 27], text: "poisonous["} + - {span: [30, 32], text: "]>"} + contents: [{text: {span: [27, 30], text: "xyz"}}] +- quoted: + lexeme: 0 + span: + span: [33, 56] + text: "poisonous#%#%[xyz]#%#%>" + delims: + - {span: [33, 47], text: "poisonous#%#%["} + - {span: [50, 56], text: "]#%#%>"} + contents: [{text: {span: [47, 50], text: "xyz"}}] +- ident: + lexeme: 4 + span: {span: [57, 66], text: "poisonous"} + name: {span: [57, 66], text: "poisonous"} +- bracket: + lexeme: 2 + span: {span: [67, 72], text: "[xyz]"} + delims: + - {span: [67, 68], text: "["} + - {span: [71, 72], text: "]"} + contents: + - ident: + lexeme: 4 + span: {span: [68, 71], text: "xyz"} + name: {span: [68, 71], text: "xyz"} +- quoted: + lexeme: 1 + span: + span: [73, 93] + text: "R\"cc(some c++)\" )cc\"" + delims: + - {span: [73, 78], text: "R\"cc("} + - {span: [89, 93], text: ")cc\""} + contents: [{text: {span: [78, 89], text: "some c++)\" "}}] +- eof: + lexeme: 2147483647 + span: {span: [93, 93], text: ""} diff --git a/ilex/tests/greedy/greedy.txt b/ilex/tests/greedy/greedy.txt new file mode 100644 index 0000000..2c058ce --- /dev/null +++ b/ilex/tests/greedy/greedy.txt @@ -0,0 +1,6 @@ +poison +poisonous +poisonous[xyz]> +poisonous#%#%[xyz]#%#%> +poisonous [xyz] +R"cc(some c++)" )cc" \ No newline at end of file diff --git a/ilex/tests/greedy/main.rs b/ilex/tests/greedy/main.rs new file mode 100644 index 0000000..c80989c --- /dev/null +++ b/ilex/tests/greedy/main.rs @@ -0,0 +1,45 @@ +use ilex::rule::*; +use ilex::Context; +use ilex::Lexeme; + +#[gilded::test("tests/greedy/*.txt")] +fn greedy(test: &gilded::Test) { + // This test verifies that lexing is greedy in *most* cases. + + #[ilex::spec] + struct Greedy { + #[rule(Quoted::with(Bracket::rust_style( + "#%", + ("poisonous", "["), + ("]", ">"), + )))] + rust_like: Lexeme, + + #[rule(Quoted::with(Bracket::cxx_style( + Ident::new(), + ("R\"", "("), + (")", "\""), + )))] + cpp_like: Lexeme, + + #[rule("[", "]")] + array: Lexeme, + + poison: Lexeme, + + #[rule(Ident::new())] + ident: Lexeme, + } + + let ctx = Context::new(); + let report = ctx.new_report(); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Greedy::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} diff --git a/ilex/tests/json.rs b/ilex/tests/json.rs deleted file mode 100644 index 567ea89..0000000 --- a/ilex/tests/json.rs +++ /dev/null @@ -1,335 +0,0 @@ -use core::fmt; -use std::fmt::Write; - -use ilex::fp::Fp64; -use ilex::report::Expected; -use ilex::report::Report; -use ilex::rule::*; -use ilex::testing::DigitalMatcher; -use ilex::testing::Matcher; -use ilex::token; -use ilex::token::Content as C; -use ilex::token::Cursor; -use ilex::Lexeme; - -#[ilex::spec] -struct JsonSpec { - #[rule(",")] - comma: Lexeme, - - #[rule(":")] - colon: Lexeme, - - #[rule("true")] - true_: Lexeme, - - #[rule("false")] - false_: Lexeme, - - #[rule("null")] - null: Lexeme, - - #[named] - #[rule("[", "]")] - array: Lexeme, - - #[named] - #[rule("{", "}")] - object: Lexeme, - - #[named] - #[rule(Quoted::new('"') - .invalid_escape(r"\") - .escapes([ - "\\\"", r"\\", r"\/", - r"\b", r"\f", r"\n", r"\t", r"\r", - ]) - .fixed_length_escape(r"\u", 4))] - string: Lexeme, - - #[named] - #[rule(Digital::new(10) - .minus() - .point_limit(0..2) - .exponents(["e", "E"], Digits::new(10).plus().minus()))] - number: Lexeme, -} - -const SOME_JSON: &str = r#" -{ - "keywords": [null, true, false], - "string": "abcdefg", - "number": 42, - "int": 42.0, - "frac": 0.42, - "neg": -42, - "exp": 42e+42, - "nest": { - "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" - } -} -"#; - -#[test] -fn check_tokens() { - let json = JsonSpec::get(); - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let tokens = ctx - .new_file("", SOME_JSON) - .lex(json.spec(), &report) - .unwrap(); - eprintln!("stream: {tokens:#?}"); - - Matcher::new() - .then2( - json.object, - ("{", "}"), - Matcher::new() - .then2(json.string, ('"', '"'), ["keywords"]) - .then1(json.colon, ":") - .then2( - json.array, - ("[", "]"), - Matcher::new() - .then1(json.null, "null") - .then1(json.comma, ",") - .then1(json.true_, "true") - .then1(json.comma, ",") - .then1(json.false_, "false"), - ) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["string"]) - .then1(json.colon, ":") - .then2(json.string, ('"', '"'), ["abcdefg"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["number"]) - .then1(json.colon, ":") - .then2(json.number, 10, ["42"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["int"]) - .then1(json.colon, ":") - .then2(json.number, 10, ["42", "0"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["frac"]) - .then1(json.colon, ":") - .then2(json.number, 10, ["0", "42"]) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["neg"]) - .then1(json.colon, ":") - .then1( - json.number, - DigitalMatcher::new(10, ["42"]).sign_span(Sign::Neg, "-"), - ) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["exp"]) - .then1(json.colon, ":") - .then1( - json.number, - DigitalMatcher::new(10, ["42"]) - .exp(10, "e", ["42"]) - .sign_span(Sign::Pos, "+"), - ) - .then1(json.comma, ",") - // - .then2(json.string, ('"', '"'), ["nest"]) - .then1(json.colon, ":") - .then2( - json.object, - ("{", "}"), - Matcher::new() - .then2(json.string, ('"', '"'), [C::lit("escapes"), C::esc(r"\n")]) - .then1(json.colon, ":") - .then2( - json.string, - ('"', '"'), - [ - C::esc("\\\""), - C::esc(r"\\"), - C::esc(r"\/"), - C::esc(r"\b"), - C::esc(r"\f"), - C::esc(r"\n"), - C::esc(r"\t"), - C::esc(r"\r"), - C::esc_with_data(r"\u", "0000"), - C::esc_with_data(r"\u", "1234"), - C::esc_with_data(r"\u", "ffff"), - ], - ), - ), - ) - .eof() - .assert_matches(&tokens); -} - -#[derive(Clone, Debug, PartialEq)] -enum Json { - Null, - Num(f64), - Bool(bool), - Str(String), - Arr(Vec), - Obj(Vec<(String, Json)>), -} - -#[test] -fn parse_test() { - use similar_asserts::assert_eq; - - let value = parse("null").unwrap(); - assert_eq!(value, Json::Null); - - let value = parse("[null, true, false]").unwrap(); - assert_eq!( - value, - Json::Arr(vec![Json::Null, Json::Bool(true), Json::Bool(false)]) - ); - - let value = parse(SOME_JSON).unwrap(); - assert_eq!( - value, - Json::Obj(vec![ - ( - "keywords".into(), - Json::Arr(vec![Json::Null, Json::Bool(true), Json::Bool(false)]) - ), - ("string".into(), Json::Str("abcdefg".into())), - ("number".into(), Json::Num(42.0)), - ("int".into(), Json::Num(42.0)), - ("frac".into(), Json::Num(0.42)), - ("neg".into(), Json::Num(-42.0)), - ("exp".into(), Json::Num(42e42)), - ( - "nest".into(), - Json::Obj(vec![( - "escapes\n".into(), - Json::Str("\"\\/\u{8}\u{c}\n\t\r\0ሴ\u{ffff}".into()) - )]) - ), - ]) - ); -} - -fn parse(data: &str) -> Result { - struct Error(String); - impl fmt::Debug for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_char('\n')?; - f.write_str(&self.0) - } - } - - let json = JsonSpec::get(); - - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let stream = ctx - .new_file("", data) - .lex(json.spec(), &report) - .map_err(|e| Error(e.to_string()))?; - let value = parse0(&report, json, &mut stream.cursor()); - - report.fatal_or(value).map_err(|e| Error(e.to_string())) -} - -fn parse0(report: &Report, json: &JsonSpec, cursor: &mut Cursor) -> Json { - let quote2str = |str: token::Quoted| -> String { - str.to_utf8(|key, data, buf| { - let char = match key.text() { - "\\\"" => '\"', - r"\\" => '\\', - r"\/" => '/', - r"\b" => '\x08', - r"\f" => '\x0c', - r"\n" => '\n', - r"\t" => '\t', - r"\r" => '\r', - // This is sloppy about surrogates but this is just an example. - r"\u" => { - let data = data.unwrap(); - let code = - u16::from_str_radix(data.text(), 16).unwrap_or_else(|_| { - report.builtins(json.spec()).expected( - [Expected::Name("hex-encoded u16".into())], - data.text(), - data, - ); - 0 - }); - for c in char::decode_utf16([code]) { - buf.push(c.unwrap_or('đŸ˜ĸ')) - } - return; - } - esc => panic!("{}", esc), - }; - buf.push(char); - }) - }; - - let value = token::switch() - .case(json.null, |_, _| Json::Null) - .case(json.false_, |_, _| Json::Bool(false)) - .case(json.true_, |_, _| Json::Bool(true)) - .case(json.string, |str: token::Quoted, _| Json::Str(quote2str(str))) - .case(json.number, |num: token::Digital, _| { - Json::Num(num.to_float::(.., report).unwrap().to_hard()) - }) - .case(json.array, |array: token::Bracket, _| { - let mut trailing = None; - let vec = array - .contents() - .delimited(json.comma, |c| Some(parse0(report, json, c))) - .map(|(e, c)| { - trailing = c; - e - }) - .collect(); - - if let Some(comma) = trailing { - report - .error("trailing commas are not allowed in JSON") - .saying(comma, "remove this comma"); - } - - Json::Arr(vec) - }) - .case(json.object, |object: token::Bracket, _| { - let mut trailing = None; - let vec = object - .contents() - .delimited(json.comma, |c| { - let key = c - .take(json.string, report) - .map(|q| quote2str(q)) - .unwrap_or("đŸ˜ĸ".into()); - c.take(json.colon, report); - let value = parse0(report, json, c); - Some((key, value)) - }) - .map(|(e, c)| { - trailing = c; - e - }) - .collect(); - - if let Some(comma) = trailing { - report - .error("trailing commas are not allowed in JSON") - .saying(comma, "remove this comma"); - } - - Json::Obj(vec) - }) - .take(cursor, report); - value.unwrap_or(Json::Null) -} diff --git a/ilex/tests/json/array.ast.txt b/ilex/tests/json/array.ast.txt new file mode 100644 index 0000000..c74a872 --- /dev/null +++ b/ilex/tests/json/array.ast.txt @@ -0,0 +1,11 @@ +Arr( + [ + Null, + Bool( + true, + ), + Bool( + false, + ), + ], +) \ No newline at end of file diff --git a/ilex/tests/json/array.json b/ilex/tests/json/array.json new file mode 100644 index 0000000..db2c3f1 --- /dev/null +++ b/ilex/tests/json/array.json @@ -0,0 +1 @@ +[null, true, false] \ No newline at end of file diff --git a/ilex/tests/json/array.tokens.yaml b/ilex/tests/json/array.tokens.yaml new file mode 100644 index 0000000..6886c09 --- /dev/null +++ b/ilex/tests/json/array.tokens.yaml @@ -0,0 +1,27 @@ +- bracket: + lexeme: 5 + span: + span: [0, 19] + text: "[null, true, false]" + delims: + - {span: [0, 1], text: "["} + - {span: [18, 19], text: "]"} + contents: + - keyword: + lexeme: 4 + span: {span: [1, 5], text: "null"} + - keyword: + lexeme: 0 + span: {span: [5, 6], text: ","} + - keyword: + lexeme: 2 + span: {span: [7, 11], text: "true"} + - keyword: + lexeme: 0 + span: {span: [11, 12], text: ","} + - keyword: + lexeme: 3 + span: {span: [13, 18], text: "false"} +- eof: + lexeme: 2147483647 + span: {span: [19, 19], text: ""} diff --git a/ilex/tests/json/main.rs b/ilex/tests/json/main.rs new file mode 100644 index 0000000..3c23146 --- /dev/null +++ b/ilex/tests/json/main.rs @@ -0,0 +1,183 @@ +use ilex::fp::Fp64; +use ilex::report::Expected; +use ilex::report::Report; +use ilex::rule::*; +use ilex::token; +use ilex::token::Cursor; +use ilex::Context; +use ilex::Lexeme; + +#[ilex::spec] +struct JsonSpec { + #[rule(",")] + comma: Lexeme, + + #[rule(":")] + colon: Lexeme, + + #[rule("true")] + true_: Lexeme, + + #[rule("false")] + false_: Lexeme, + + #[rule("null")] + null: Lexeme, + + #[named] + #[rule("[", "]")] + array: Lexeme, + + #[named] + #[rule("{", "}")] + object: Lexeme, + + #[named] + #[rule(Quoted::new('"') + .invalid_escape(r"\") + .escapes([ + "\\\"", r"\\", r"\/", + r"\b", r"\f", r"\n", r"\t", r"\r", + ]) + .fixed_length_escape(r"\u", 4))] + string: Lexeme, + + #[named] + #[rule(Digital::new(10) + .minus() + .point_limit(0..2) + .exponents(["e", "E"], Digits::new(10).plus().minus()))] + number: Lexeme, +} + +#[gilded::test("tests/json/*.json")] +fn check_tokens(test: &gilded::Test) { + let ctx = Context::new(); + let report = ctx.new_report(); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, ast, stderr] = + test.outputs(["tokens.yaml", "ast.txt", "stderr"]); + + let stream = match file.lex(JsonSpec::get().spec(), &report) { + Ok(stream) => stream, + Err(fatal) => { + stderr(fatal.to_string()); + return; + } + }; + + tokens(stream.summary()); + + let json = parse(&report, JsonSpec::get(), &mut stream.cursor()); + ast(format!("{json:#?}")); + + if let Err(fatal) = report.fatal_or(()) { + stderr(fatal.to_string()); + } +} + +#[derive(Clone, Debug, PartialEq)] +enum Json { + Null, + Num(f64), + Bool(bool), + Str(String), + Arr(Vec), + Obj(Vec<(String, Json)>), +} + +fn parse(report: &Report, json: &JsonSpec, cursor: &mut Cursor) -> Json { + let quote2str = |str: token::Quoted| -> String { + str.to_utf8(|key, data, buf| { + let char = match key.text() { + "\\\"" => '\"', + r"\\" => '\\', + r"\/" => '/', + r"\b" => '\x08', + r"\f" => '\x0c', + r"\n" => '\n', + r"\t" => '\t', + r"\r" => '\r', + // This is sloppy about surrogates but this is just an example. + r"\u" => { + let data = data.unwrap(); + let code = + u16::from_str_radix(data.text(), 16).unwrap_or_else(|_| { + report.builtins(json.spec()).expected( + [Expected::Name("hex-encoded u16".into())], + data.text(), + data, + ); + 0 + }); + for c in char::decode_utf16([code]) { + buf.push(c.unwrap_or('đŸ˜ĸ')) + } + return; + } + esc => panic!("{}", esc), + }; + buf.push(char); + }) + }; + + let value = token::switch() + .case(json.null, |_, _| Json::Null) + .case(json.false_, |_, _| Json::Bool(false)) + .case(json.true_, |_, _| Json::Bool(true)) + .case(json.string, |str: token::Quoted, _| Json::Str(quote2str(str))) + .case(json.number, |num: token::Digital, _| { + Json::Num(num.to_float::(.., report).unwrap().to_hard()) + }) + .case(json.array, |array: token::Bracket, _| { + let mut trailing = None; + let vec = array + .contents() + .delimited(json.comma, |c| Some(parse(report, json, c))) + .map(|(e, c)| { + trailing = c; + e + }) + .collect(); + + if let Some(comma) = trailing { + report + .error("trailing commas are not allowed in JSON") + .saying(comma, "remove this comma"); + } + + Json::Arr(vec) + }) + .case(json.object, |object: token::Bracket, _| { + let mut trailing = None; + let vec = object + .contents() + .delimited(json.comma, |c| { + let key = c + .take(json.string, report) + .map(|q| quote2str(q)) + .unwrap_or("đŸ˜ĸ".into()); + c.take(json.colon, report); + let value = parse(report, json, c); + Some((key, value)) + }) + .map(|(e, c)| { + trailing = c; + e + }) + .collect(); + + if let Some(comma) = trailing { + report + .error("trailing commas are not allowed in JSON") + .saying(comma, "remove this comma"); + } + + Json::Obj(vec) + }) + .take(cursor, report); + value.unwrap_or(Json::Null) +} diff --git a/ilex/tests/json/null.ast.txt b/ilex/tests/json/null.ast.txt new file mode 100644 index 0000000..5f6f79d --- /dev/null +++ b/ilex/tests/json/null.ast.txt @@ -0,0 +1 @@ +Null \ No newline at end of file diff --git a/ilex/tests/json/null.json b/ilex/tests/json/null.json new file mode 100644 index 0000000..ec747fa --- /dev/null +++ b/ilex/tests/json/null.json @@ -0,0 +1 @@ +null \ No newline at end of file diff --git a/ilex/tests/json/null.tokens.yaml b/ilex/tests/json/null.tokens.yaml new file mode 100644 index 0000000..5746ee7 --- /dev/null +++ b/ilex/tests/json/null.tokens.yaml @@ -0,0 +1,6 @@ +- keyword: + lexeme: 4 + span: {span: [0, 4], text: "null"} +- eof: + lexeme: 2147483647 + span: {span: [4, 4], text: ""} diff --git a/ilex/tests/json/obj.ast.txt b/ilex/tests/json/obj.ast.txt new file mode 100644 index 0000000..fe75b3d --- /dev/null +++ b/ilex/tests/json/obj.ast.txt @@ -0,0 +1,67 @@ +Obj( + [ + ( + "keywords", + Arr( + [ + Null, + Bool( + true, + ), + Bool( + false, + ), + ], + ), + ), + ( + "string", + Str( + "abcdefg", + ), + ), + ( + "number", + Num( + 42.0, + ), + ), + ( + "int", + Num( + 42.0, + ), + ), + ( + "frac", + Num( + 0.42, + ), + ), + ( + "neg", + Num( + -42.0, + ), + ), + ( + "exp", + Num( + 4.2e43, + ), + ), + ( + "nest", + Obj( + [ + ( + "escapes\n", + Str( + "\"\\/\u{8}\u{c}\n\t\r\0ሴ\u{ffff}", + ), + ), + ], + ), + ), + ], +) \ No newline at end of file diff --git a/ilex/tests/json/obj.json b/ilex/tests/json/obj.json new file mode 100644 index 0000000..4b0bf1a --- /dev/null +++ b/ilex/tests/json/obj.json @@ -0,0 +1,12 @@ +{ + "keywords": [null, true, false], + "string": "abcdefg", + "number": 42, + "int": 42.0, + "frac": 0.42, + "neg": -42, + "exp": 42e+42, + "nest": { + "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" + } +} \ No newline at end of file diff --git a/ilex/tests/json/obj.tokens.yaml b/ilex/tests/json/obj.tokens.yaml new file mode 100644 index 0000000..26b25dd --- /dev/null +++ b/ilex/tests/json/obj.tokens.yaml @@ -0,0 +1,245 @@ +- bracket: + lexeme: 6 + span: + span: [0, 209] + text: | + { + "keywords": [null, true, false], + "string": "abcdefg", + "number": 42, + "int": 42.0, + "frac": 0.42, + "neg": -42, + "exp": 42e+42, + "nest": { + "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" + } + } + delims: + - {span: [0, 1], text: "{"} + - {span: [208, 209], text: "}"} + contents: + - quoted: + lexeme: 7 + span: {span: [4, 14], text: "\"keywords\""} + delims: + - {span: [4, 5], text: "\""} + - {span: [13, 14], text: "\""} + contents: [{text: {span: [5, 13], text: "keywords"}}] + - keyword: + lexeme: 1 + span: {span: [14, 15], text: ":"} + - bracket: + lexeme: 5 + span: + span: [16, 35] + text: "[null, true, false]" + delims: + - {span: [16, 17], text: "["} + - {span: [34, 35], text: "]"} + contents: + - keyword: + lexeme: 4 + span: {span: [17, 21], text: "null"} + - keyword: + lexeme: 0 + span: {span: [21, 22], text: ","} + - keyword: + lexeme: 2 + span: {span: [23, 27], text: "true"} + - keyword: + lexeme: 0 + span: {span: [27, 28], text: ","} + - keyword: + lexeme: 3 + span: {span: [29, 34], text: "false"} + - keyword: + lexeme: 0 + span: {span: [35, 36], text: ","} + - quoted: + lexeme: 7 + span: {span: [39, 47], text: "\"string\""} + delims: + - {span: [39, 40], text: "\""} + - {span: [46, 47], text: "\""} + contents: [{text: {span: [40, 46], text: "string"}}] + - keyword: + lexeme: 1 + span: {span: [47, 48], text: ":"} + - quoted: + lexeme: 7 + span: {span: [49, 58], text: "\"abcdefg\""} + delims: + - {span: [49, 50], text: "\""} + - {span: [57, 58], text: "\""} + contents: [{text: {span: [50, 57], text: "abcdefg"}}] + - keyword: + lexeme: 0 + span: {span: [58, 59], text: ","} + - quoted: + lexeme: 7 + span: {span: [62, 70], text: "\"number\""} + delims: + - {span: [62, 63], text: "\""} + - {span: [69, 70], text: "\""} + contents: [{text: {span: [63, 69], text: "number"}}] + - keyword: + lexeme: 1 + span: {span: [70, 71], text: ":"} + - ident: + lexeme: 8 + span: {span: [72, 74], text: "42"} + radix: 10 + blocks: [{span: [72, 74], text: "42"}] + exponents: [] + - keyword: + lexeme: 0 + span: {span: [74, 75], text: ","} + - quoted: + lexeme: 7 + span: {span: [78, 83], text: "\"int\""} + delims: + - {span: [78, 79], text: "\""} + - {span: [82, 83], text: "\""} + contents: [{text: {span: [79, 82], text: "int"}}] + - keyword: + lexeme: 1 + span: {span: [83, 84], text: ":"} + - ident: + lexeme: 8 + span: {span: [85, 89], text: "42.0"} + radix: 10 + blocks: + - {span: [85, 87], text: "42"} + - {span: [88, 89], text: "0"} + exponents: [] + - keyword: + lexeme: 0 + span: {span: [89, 90], text: ","} + - quoted: + lexeme: 7 + span: {span: [93, 99], text: "\"frac\""} + delims: + - {span: [93, 94], text: "\""} + - {span: [98, 99], text: "\""} + contents: [{text: {span: [94, 98], text: "frac"}}] + - keyword: + lexeme: 1 + span: {span: [99, 100], text: ":"} + - ident: + lexeme: 8 + span: {span: [101, 105], text: "0.42"} + radix: 10 + blocks: + - {span: [101, 102], text: "0"} + - {span: [103, 105], text: "42"} + exponents: [] + - keyword: + lexeme: 0 + span: {span: [105, 106], text: ","} + - quoted: + lexeme: 7 + span: {span: [109, 114], text: "\"neg\""} + delims: + - {span: [109, 110], text: "\""} + - {span: [113, 114], text: "\""} + contents: [{text: {span: [110, 113], text: "neg"}}] + - keyword: + lexeme: 1 + span: {span: [114, 115], text: ":"} + - ident: + lexeme: 8 + span: {span: [116, 119], text: "-42"} + radix: 10 + sign: "-" + blocks: [{span: [117, 119], text: "42"}] + exponents: [] + - keyword: + lexeme: 0 + span: {span: [119, 120], text: ","} + - quoted: + lexeme: 7 + span: {span: [123, 128], text: "\"exp\""} + delims: + - {span: [123, 124], text: "\""} + - {span: [127, 128], text: "\""} + contents: [{text: {span: [124, 127], text: "exp"}}] + - keyword: + lexeme: 1 + span: {span: [128, 129], text: ":"} + - ident: + lexeme: 8 + span: {span: [130, 136], text: "42e+42"} + radix: 10 + blocks: [{span: [130, 132], text: "42"}] + exponents: + - span: {span: [130, 136], text: "42e+42"} + prefix: {span: [132, 133], text: "e"} + radix: 10 + sign: "+" + blocks: [{span: [134, 136], text: "42"}] + - keyword: + lexeme: 0 + span: {span: [136, 137], text: ","} + - quoted: + lexeme: 7 + span: {span: [140, 146], text: "\"nest\""} + delims: + - {span: [140, 141], text: "\""} + - {span: [145, 146], text: "\""} + contents: [{text: {span: [141, 145], text: "nest"}}] + - keyword: + lexeme: 1 + span: {span: [146, 147], text: ":"} + - bracket: + lexeme: 6 + span: + span: [148, 207] + text: | + { + "escapes\n": "\"\\\/\b\f\n\t\r\u0000\u1234\uffff" + } + delims: + - {span: [148, 149], text: "{"} + - {span: [206, 207], text: "}"} + contents: + - quoted: + lexeme: 7 + span: + span: [154, 165] + text: "\"escapes\\n\"" + delims: + - {span: [154, 155], text: "\""} + - {span: [164, 165], text: "\""} + contents: + - text: {span: [155, 162], text: "escapes"} + - {esc: {span: [162, 164], text: "\\n"}} + - keyword: + lexeme: 1 + span: {span: [165, 166], text: ":"} + - quoted: + lexeme: 7 + span: + span: [167, 203] + text: "\"\\\"\\\\\\/\\b\\f\\n\\t\\r\\u0000\\u1234\\uffff\"" + delims: + - {span: [167, 168], text: "\""} + - {span: [202, 203], text: "\""} + contents: + - {esc: {span: [168, 170], text: "\\\""}} + - {esc: {span: [170, 172], text: "\\\\"}} + - {esc: {span: [172, 174], text: "\\/"}} + - {esc: {span: [174, 176], text: "\\b"}} + - {esc: {span: [176, 178], text: "\\f"}} + - {esc: {span: [178, 180], text: "\\n"}} + - {esc: {span: [180, 182], text: "\\t"}} + - {esc: {span: [182, 184], text: "\\r"}} + - esc: {span: [184, 186], text: "\\u"} + data: {span: [186, 190], text: "0000"} + - esc: {span: [190, 192], text: "\\u"} + data: {span: [192, 196], text: "1234"} + - esc: {span: [196, 198], text: "\\u"} + data: {span: [198, 202], text: "ffff"} +- eof: + lexeme: 2147483647 + span: {span: [209, 209], text: ""} diff --git a/ilex/tests/llvm.rs b/ilex/tests/llvm.rs deleted file mode 100644 index 34e2585..0000000 --- a/ilex/tests/llvm.rs +++ /dev/null @@ -1,284 +0,0 @@ -use ilex::rule::*; -use ilex::testing::Matcher; -use ilex::token::Content as C; -use ilex::Lexeme; - -#[ilex::spec] -struct Llvm { - #[rule(";")] - comment: Lexeme, - - #[rule('(', ')')] - parens: Lexeme, - #[rule('[', ']')] - brackets: Lexeme, - #[rule('<', '>')] - vector: Lexeme, - #[rule('{', '}')] - braces: Lexeme, - #[rule("<{", "}>")] - packed: Lexeme, - #[rule("!{", "}")] - meta: Lexeme, - - #[rule(',')] - comma: Lexeme, - #[rule('=')] - equal: Lexeme, - #[rule('*')] - star: Lexeme, - #[rule('x')] - times: Lexeme, - - br: Lexeme, - call: Lexeme, - icmp: Lexeme, - #[rule("eq")] - icmp_eq: Lexeme, - ret: Lexeme, - unreachable: Lexeme, - - constant: Lexeme, - declare: Lexeme, - define: Lexeme, - global: Lexeme, - - label: Lexeme, - null: Lexeme, - ptr: Lexeme, - #[rule(Digital::new(10).prefix("i"))] - int: Lexeme, - void: Lexeme, - - private: Lexeme, - unnamed_addr: Lexeme, - nocapture: Lexeme, - nounwind: Lexeme, - - #[named] - #[rule(Quoted::new('"') - .fixed_length_escape(r"\", 2) - .prefixes(["", "c"]))] - string: Lexeme, - - #[named("identifier")] - #[rule(Ident::new() - .ascii_only() - .extra_starts(".0123456789".chars()) - .suffix(":"))] - label_ident: Lexeme, - - #[named("identifier")] - #[rule(Ident::new() - .ascii_only() - .extra_starts(".0123456789".chars()) - .prefixes(["!", "@", "%"]))] - bare: Lexeme, - - #[named("quoted identifier")] - #[rule(Quoted::new('"') - .fixed_length_escape(r"\", 2) - .prefixes(["!", "@", "%"]))] - quoted: Lexeme, - - #[named = "number"] - #[rule(Digital::new(10) - .minus() - .point_limit(0..2) - .exponents(["e", "E"], Digits::new(10).plus().minus()))] - dec: Lexeme, - - #[named = "number"] - #[rule(Digital::new(16).minus().prefix("0x"))] - hex: Lexeme, -} - -#[test] -fn llvm() { - let text = r#" - ; Declare the string constant as a global constant. - @.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" - - ; External declaration of the puts function - declare i32 @"non trivial name"(ptr nocapture) nounwind - - ; Definition of main function - define i32 @main(i32 %0, ptr %1) { - ; Call puts function to write out the string to stdout. - call i32 @"non trivial name"(ptr @.str) - ret i32 0 - } - - ; Named metadata - !0 = !{i32 42, null, !"string"} - !foo = !{!0} - @glb = global i8 0 - - define void @f(ptr %a) { - %c = icmp eq ptr %a, @glb - br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a - BB_EXIT: - call void @exit() - unreachable - BB_CONTINUE: - ret void - } - "#; - - let llvm = Llvm::get(); - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); - let report = ctx.new_report(); - let tokens = ctx - .new_file("test.file", text) - .lex(llvm.spec(), &report) - .unwrap(); - eprintln!("stream: {tokens:#?}"); - - Matcher::new() - .prefix1(llvm.bare, "@", ".str") - .comments(["; Declare the string constant as a global constant.\n"]) - .then1(llvm.equal, "=") - .then1(llvm.private, "private") - .then1(llvm.unnamed_addr, "unnamed_addr") - .then1(llvm.constant, "constant") - .then2( - llvm.brackets, - ("[", "]"), - Matcher::new() - .then2(llvm.dec, 10, ["13"]) - .then1(llvm.times, "x") - .prefix2(llvm.int, "i", 10, ["8"]), - ) - .prefix2( - llvm.string, - "c", - ('"', '"'), - [ - C::lit("hello world"), - C::esc_with_data(r"\", "0A"), - C::esc_with_data(r"\", "00"), - ], - ) - // - .then1(llvm.declare, "declare") - .comments(["; External declaration of the puts function\n"]) - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix2(llvm.quoted, "@", ('"', '"'), ["non trivial name"]) - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .then1(llvm.ptr, "ptr") - .then1(llvm.nocapture, "nocapture"), - ) - .then1(llvm.nounwind, "nounwind") - // - .then1(llvm.define, "define") - .comments(["; Definition of main function\n"]) - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix1(llvm.bare, "@", "main") - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix1(llvm.bare, "%", "0") - .then1(llvm.comma, ",") - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "%", "1"), - ) - .then2( - llvm.braces, - ("{", "}"), - Matcher::new() - .then1(llvm.call, "call") - .comments(["; Call puts function to write out the string to stdout.\n"]) - .prefix2(llvm.int, "i", 10, ["32"]) - .prefix2(llvm.quoted, "@", ('"', '"'), ["non trivial name"]) - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "@", ".str"), - ) - .then1(llvm.ret, "ret") - .prefix2(llvm.int, "i", 10, ["32"]) - .then2(llvm.dec, 10, ["0"]), - ) - // - .prefix1(llvm.bare, "!", "0") - .comments(["; Named metadata\n"]) - .then1(llvm.equal, "=") - .then2( - llvm.meta, - ("!{", "}"), - Matcher::new() - .prefix2(llvm.dec, "i", 10, ["32"]) - .then2(llvm.dec, 10, ["42"]) - .then1(llvm.comma, ",") - .then1(llvm.null, "null") - .then1(llvm.comma, ",") - .prefix2(llvm.quoted, "!", ('"', '"'), ["string"]), - ) - .prefix1(llvm.bare, "!", "foo") - .then1(llvm.equal, "=") - .then2(llvm.meta, ("!{", "}"), Matcher::new().prefix1(llvm.bare, "!", "0")) - // - .prefix1(llvm.bare, "@", "glb") - .then1(llvm.equal, "=") - .then1(llvm.global, "global") - .prefix2(llvm.int, "i", 10, ["8"]) - .then2(llvm.dec, 10, ["0"]) - // - .then1(llvm.define, "define") - .then1(llvm.void, "void") - .prefix1(llvm.bare, "@", "f") - .then2( - llvm.parens, - ("(", ")"), - Matcher::new() - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "%", "a"), - ) - .then2( - llvm.braces, - ("{", "}"), - Matcher::new() - .prefix1(llvm.bare, "%", "c") - .then1(llvm.equal, "=") - .then1(llvm.icmp, "icmp") - .then1(llvm.icmp_eq, "eq") - .then1(llvm.ptr, "ptr") - .prefix1(llvm.bare, "%", "a") - .then1(llvm.comma, ",") - .prefix1(llvm.bare, "@", "glb") - // - .then1(llvm.br, "br") - .prefix2(llvm.int, "i", 10, ["1"]) - .prefix1(llvm.bare, "%", "c") - .then1(llvm.comma, ",") - .then1(llvm.label, "label") - .prefix1(llvm.bare, "%", "BB_EXIT") - .then1(llvm.comma, ",") - .then1(llvm.label, "label") - .prefix1(llvm.bare, "%", "BB_CONTINUE") - // - .suffix1(llvm.label_ident, "BB_EXIT", ":") - .comments(["; escapes %a\n"]) - // - .then1(llvm.call, "call") - .then1(llvm.void, "void") - .prefix1(llvm.bare, "@", "exit") - .then2(llvm.parens, ("(", ")"), Matcher::new()) - // - .then1(llvm.unreachable, "unreachable") - // - .suffix1(llvm.label_ident, "BB_CONTINUE", ":") - .then1(llvm.ret, "ret") - .then1(llvm.void, "void"), - ) - .eof() - .assert_matches(&tokens) -} diff --git a/ilex/tests/llvm/main.rs b/ilex/tests/llvm/main.rs new file mode 100644 index 0000000..18a1764 --- /dev/null +++ b/ilex/tests/llvm/main.rs @@ -0,0 +1,108 @@ +use ilex::rule::*; +use ilex::Context; +use ilex::Lexeme; + +#[ilex::spec] +struct Llvm { + #[rule(";")] + comment: Lexeme, + + #[rule('(', ')')] + parens: Lexeme, + #[rule('[', ']')] + brackets: Lexeme, + #[rule('<', '>')] + vector: Lexeme, + #[rule('{', '}')] + braces: Lexeme, + #[rule("<{", "}>")] + packed: Lexeme, + #[rule("!{", "}")] + meta: Lexeme, + + #[rule(',')] + comma: Lexeme, + #[rule('=')] + equal: Lexeme, + #[rule('*')] + star: Lexeme, + #[rule('x')] + times: Lexeme, + + br: Lexeme, + call: Lexeme, + icmp: Lexeme, + #[rule("eq")] + icmp_eq: Lexeme, + ret: Lexeme, + unreachable: Lexeme, + + constant: Lexeme, + declare: Lexeme, + define: Lexeme, + global: Lexeme, + + label: Lexeme, + null: Lexeme, + ptr: Lexeme, + #[rule(Digital::new(10).prefix("i"))] + int: Lexeme, + void: Lexeme, + + private: Lexeme, + unnamed_addr: Lexeme, + nocapture: Lexeme, + nounwind: Lexeme, + + #[named] + #[rule(Quoted::new('"') + .fixed_length_escape(r"\", 2) + .prefixes(["", "c"]))] + string: Lexeme, + + #[named("identifier")] + #[rule(Ident::new() + .ascii_only() + .extra_starts(".0123456789".chars()) + .suffix(":"))] + label_ident: Lexeme, + + #[named("identifier")] + #[rule(Ident::new() + .ascii_only() + .extra_starts(".0123456789".chars()) + .prefixes(["!", "@", "%"]))] + bare: Lexeme, + + #[named("quoted identifier")] + #[rule(Quoted::new('"') + .fixed_length_escape(r"\", 2) + .prefixes(["!", "@", "%"]))] + quoted: Lexeme, + + #[named = "number"] + #[rule(Digital::new(10) + .minus() + .point_limit(0..2) + .exponents(["e", "E"], Digits::new(10).plus().minus()))] + dec: Lexeme, + + #[named = "number"] + #[rule(Digital::new(16).minus().prefix("0x"))] + hex: Lexeme, +} + +#[gilded::test("tests/llvm/*.ll")] +fn llvm(test: &gilded::Test) { + let ctx = Context::new(); + let report = ctx.new_report(); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Llvm::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} diff --git a/ilex/tests/llvm/smoke.ll b/ilex/tests/llvm/smoke.ll new file mode 100644 index 0000000..089954e --- /dev/null +++ b/ilex/tests/llvm/smoke.ll @@ -0,0 +1,27 @@ +; Declare the string constant as a global constant. +@.str = private unnamed_addr constant [13 x i8] c"hello world\0A\00" + +; External declaration of the puts function +declare i32 @"non trivial name"(ptr nocapture) nounwind + +; Definition of main function +define i32 @main(i32 %0, ptr %1) { + ; Call puts function to write out the string to stdout. + call i32 @"non trivial name"(ptr @.str) + ret i32 0 +} + +; Named metadata +!0 = !{i32 42, null, !"string"} +!foo = !{!0} +@glb = global i8 0 + +define void @f(ptr %a) { + %c = icmp eq ptr %a, @glb + br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a +BB_EXIT: + call void @exit() + unreachable +BB_CONTINUE: + ret void +} \ No newline at end of file diff --git a/ilex/tests/llvm/smoke.tokens.yaml b/ilex/tests/llvm/smoke.tokens.yaml new file mode 100644 index 0000000..3400539 --- /dev/null +++ b/ilex/tests/llvm/smoke.tokens.yaml @@ -0,0 +1,453 @@ +- ident: + lexeme: 32 + span: {span: [52, 57], text: "@.str"} + prefix: {span: [52, 53], text: "@"} + name: {span: [53, 57], text: ".str"} +- keyword: + lexeme: 8 + span: {span: [58, 59], text: "="} +- keyword: + lexeme: 26 + span: {span: [60, 67], text: "private"} +- keyword: + lexeme: 27 + span: {span: [68, 80], text: "unnamed_addr"} +- keyword: + lexeme: 17 + span: {span: [81, 89], text: "constant"} +- bracket: + lexeme: 2 + span: {span: [90, 99], text: "[13 x i8]"} + delims: + - {span: [90, 91], text: "["} + - {span: [98, 99], text: "]"} + contents: + - ident: + lexeme: 34 + span: {span: [91, 93], text: "13"} + radix: 10 + blocks: [{span: [91, 93], text: "13"}] + exponents: [] + - keyword: + lexeme: 10 + span: {span: [94, 95], text: "x"} + - ident: + lexeme: 24 + span: {span: [96, 98], text: "i8"} + prefix: {span: [96, 97], text: "i"} + radix: 10 + blocks: [{span: [97, 98], text: "8"}] + exponents: [] +- quoted: + lexeme: 30 + span: + span: [100, 120] + text: "c\"hello world\\0A\\00\"" + prefix: {span: [100, 101], text: "c"} + delims: + - {span: [101, 102], text: "\""} + - {span: [119, 120], text: "\""} + contents: + - text: {span: [102, 113], text: "hello world"} + - esc: {span: [113, 114], text: "\\"} + data: {span: [114, 116], text: "0A"} + - esc: {span: [116, 117], text: "\\"} + data: {span: [117, 119], text: "00"} +- keyword: + lexeme: 18 + span: {span: [166, 173], text: "declare"} +- ident: + lexeme: 24 + span: {span: [174, 177], text: "i32"} + prefix: {span: [174, 175], text: "i"} + radix: 10 + blocks: [{span: [175, 177], text: "32"}] + exponents: [] +- quoted: + lexeme: 33 + span: + span: [178, 197] + text: "@\"non trivial name\"" + prefix: {span: [178, 179], text: "@"} + delims: + - {span: [179, 180], text: "\""} + - {span: [196, 197], text: "\""} + contents: + - text: + span: [180, 196] + text: "non trivial name" +- bracket: + lexeme: 1 + span: + span: [197, 212] + text: "(ptr nocapture)" + delims: + - {span: [197, 198], text: "("} + - {span: [211, 212], text: ")"} + contents: + - keyword: + lexeme: 23 + span: {span: [198, 201], text: "ptr"} + - keyword: + lexeme: 28 + span: {span: [202, 211], text: "nocapture"} +- keyword: + lexeme: 29 + span: {span: [213, 221], text: "nounwind"} +- keyword: + lexeme: 19 + span: {span: [253, 259], text: "define"} +- ident: + lexeme: 24 + span: {span: [260, 263], text: "i32"} + prefix: {span: [260, 261], text: "i"} + radix: 10 + blocks: [{span: [261, 263], text: "32"}] + exponents: [] +- ident: + lexeme: 32 + span: {span: [264, 269], text: "@main"} + prefix: {span: [264, 265], text: "@"} + name: {span: [265, 269], text: "main"} +- bracket: + lexeme: 1 + span: + span: [269, 285] + text: "(i32 %0, ptr %1)" + delims: + - {span: [269, 270], text: "("} + - {span: [284, 285], text: ")"} + contents: + - ident: + lexeme: 24 + span: {span: [270, 273], text: "i32"} + prefix: {span: [270, 271], text: "i"} + radix: 10 + blocks: [{span: [271, 273], text: "32"}] + exponents: [] + - ident: + lexeme: 32 + span: {span: [274, 276], text: "%0"} + prefix: {span: [274, 275], text: "%"} + name: {span: [275, 276], text: "0"} + - keyword: + lexeme: 7 + span: {span: [276, 277], text: ","} + - keyword: + lexeme: 23 + span: {span: [278, 281], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [282, 284], text: "%1"} + prefix: {span: [282, 283], text: "%"} + name: {span: [283, 284], text: "1"} +- bracket: + lexeme: 4 + span: + span: [286, 401] + text: | + { + ; Call puts function to write out the string to stdout. + call i32 @"non trivial name"(ptr @.str) + ret i32 0 + } + delims: + - {span: [286, 287], text: "{"} + - {span: [400, 401], text: "}"} + contents: + - keyword: + lexeme: 12 + span: {span: [348, 352], text: "call"} + - ident: + lexeme: 24 + span: {span: [353, 356], text: "i32"} + prefix: {span: [353, 354], text: "i"} + radix: 10 + blocks: [{span: [354, 356], text: "32"}] + exponents: [] + - quoted: + lexeme: 33 + span: + span: [357, 376] + text: "@\"non trivial name\"" + prefix: {span: [357, 358], text: "@"} + delims: + - {span: [358, 359], text: "\""} + - {span: [375, 376], text: "\""} + contents: + - text: + span: [359, 375] + text: "non trivial name" + - bracket: + lexeme: 1 + span: {span: [376, 387], text: "(ptr @.str)"} + delims: + - {span: [376, 377], text: "("} + - {span: [386, 387], text: ")"} + contents: + - keyword: + lexeme: 23 + span: {span: [377, 380], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [381, 386], text: "@.str"} + prefix: {span: [381, 382], text: "@"} + name: {span: [382, 386], text: ".str"} + - keyword: + lexeme: 15 + span: {span: [390, 393], text: "ret"} + - ident: + lexeme: 24 + span: {span: [394, 397], text: "i32"} + prefix: {span: [394, 395], text: "i"} + radix: 10 + blocks: [{span: [395, 397], text: "32"}] + exponents: [] + - ident: + lexeme: 34 + span: {span: [398, 399], text: "0"} + radix: 10 + blocks: [{span: [398, 399], text: "0"}] + exponents: [] +- ident: + lexeme: 32 + span: {span: [420, 422], text: "!0"} + prefix: {span: [420, 421], text: "!"} + name: {span: [421, 422], text: "0"} +- keyword: + lexeme: 8 + span: {span: [423, 424], text: "="} +- bracket: + lexeme: 6 + span: + span: [425, 451] + text: "!{i32 42, null, !\"string\"}" + delims: + - {span: [425, 427], text: "!{"} + - {span: [450, 451], text: "}"} + contents: + - ident: + lexeme: 24 + span: {span: [427, 430], text: "i32"} + prefix: {span: [427, 428], text: "i"} + radix: 10 + blocks: [{span: [428, 430], text: "32"}] + exponents: [] + - ident: + lexeme: 34 + span: {span: [431, 433], text: "42"} + radix: 10 + blocks: [{span: [431, 433], text: "42"}] + exponents: [] + - keyword: + lexeme: 7 + span: {span: [433, 434], text: ","} + - keyword: + lexeme: 22 + span: {span: [435, 439], text: "null"} + - keyword: + lexeme: 7 + span: {span: [439, 440], text: ","} + - quoted: + lexeme: 33 + span: {span: [441, 450], text: "!\"string\""} + prefix: {span: [441, 442], text: "!"} + delims: + - {span: [442, 443], text: "\""} + - {span: [449, 450], text: "\""} + contents: [{text: {span: [443, 449], text: "string"}}] +- ident: + lexeme: 32 + span: {span: [452, 456], text: "!foo"} + prefix: {span: [452, 453], text: "!"} + name: {span: [453, 456], text: "foo"} +- keyword: + lexeme: 8 + span: {span: [457, 458], text: "="} +- bracket: + lexeme: 6 + span: {span: [459, 464], text: "!{!0}"} + delims: + - {span: [459, 461], text: "!{"} + - {span: [463, 464], text: "}"} + contents: + - ident: + lexeme: 32 + span: {span: [461, 463], text: "!0"} + prefix: {span: [461, 462], text: "!"} + name: {span: [462, 463], text: "0"} +- ident: + lexeme: 32 + span: {span: [465, 469], text: "@glb"} + prefix: {span: [465, 466], text: "@"} + name: {span: [466, 469], text: "glb"} +- keyword: + lexeme: 8 + span: {span: [470, 471], text: "="} +- keyword: + lexeme: 20 + span: {span: [472, 478], text: "global"} +- ident: + lexeme: 24 + span: {span: [479, 481], text: "i8"} + prefix: {span: [479, 480], text: "i"} + radix: 10 + blocks: [{span: [480, 481], text: "8"}] + exponents: [] +- ident: + lexeme: 34 + span: {span: [482, 483], text: "0"} + radix: 10 + blocks: [{span: [482, 483], text: "0"}] + exponents: [] +- keyword: + lexeme: 19 + span: {span: [485, 491], text: "define"} +- keyword: + lexeme: 25 + span: {span: [492, 496], text: "void"} +- ident: + lexeme: 32 + span: {span: [497, 499], text: "@f"} + prefix: {span: [497, 498], text: "@"} + name: {span: [498, 499], text: "f"} +- bracket: + lexeme: 1 + span: {span: [499, 507], text: "(ptr %a)"} + delims: + - {span: [499, 500], text: "("} + - {span: [506, 507], text: ")"} + contents: + - keyword: + lexeme: 23 + span: {span: [500, 503], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [504, 506], text: "%a"} + prefix: {span: [504, 505], text: "%"} + name: {span: [505, 506], text: "a"} +- bracket: + lexeme: 4 + span: + span: [508, 666] + text: | + { + %c = icmp eq ptr %a, @glb + br i1 %c, label %BB_EXIT, label %BB_CONTINUE ; escapes %a + BB_EXIT: + call void @exit() + unreachable + BB_CONTINUE: + ret void + } + delims: + - {span: [508, 509], text: "{"} + - {span: [665, 666], text: "}"} + contents: + - ident: + lexeme: 32 + span: {span: [512, 514], text: "%c"} + prefix: {span: [512, 513], text: "%"} + name: {span: [513, 514], text: "c"} + - keyword: + lexeme: 8 + span: {span: [515, 516], text: "="} + - keyword: + lexeme: 13 + span: {span: [517, 521], text: "icmp"} + - keyword: + lexeme: 14 + span: {span: [522, 524], text: "eq"} + - keyword: + lexeme: 23 + span: {span: [525, 528], text: "ptr"} + - ident: + lexeme: 32 + span: {span: [529, 531], text: "%a"} + prefix: {span: [529, 530], text: "%"} + name: {span: [530, 531], text: "a"} + - keyword: + lexeme: 7 + span: {span: [531, 532], text: ","} + - ident: + lexeme: 32 + span: {span: [533, 537], text: "@glb"} + prefix: {span: [533, 534], text: "@"} + name: {span: [534, 537], text: "glb"} + - keyword: + lexeme: 11 + span: {span: [540, 542], text: "br"} + - ident: + lexeme: 24 + span: {span: [543, 545], text: "i1"} + prefix: {span: [543, 544], text: "i"} + radix: 10 + blocks: [{span: [544, 545], text: "1"}] + exponents: [] + - ident: + lexeme: 32 + span: {span: [546, 548], text: "%c"} + prefix: {span: [546, 547], text: "%"} + name: {span: [547, 548], text: "c"} + - keyword: + lexeme: 7 + span: {span: [548, 549], text: ","} + - keyword: + lexeme: 21 + span: {span: [550, 555], text: "label"} + - ident: + lexeme: 32 + span: {span: [556, 564], text: "%BB_EXIT"} + prefix: {span: [556, 557], text: "%"} + name: {span: [557, 564], text: "BB_EXIT"} + - keyword: + lexeme: 7 + span: {span: [564, 565], text: ","} + - keyword: + lexeme: 21 + span: {span: [566, 571], text: "label"} + - ident: + lexeme: 32 + span: {span: [572, 584], text: "%BB_CONTINUE"} + prefix: {span: [572, 573], text: "%"} + name: {span: [573, 584], text: "BB_CONTINUE"} + - ident: + lexeme: 31 + span: {span: [598, 606], text: "BB_EXIT:"} + suffix: {span: [605, 606], text: ":"} + name: {span: [598, 605], text: "BB_EXIT"} + - keyword: + lexeme: 12 + span: {span: [609, 613], text: "call"} + - keyword: + lexeme: 25 + span: {span: [614, 618], text: "void"} + - ident: + lexeme: 32 + span: {span: [619, 624], text: "@exit"} + prefix: {span: [619, 620], text: "@"} + name: {span: [620, 624], text: "exit"} + - bracket: + lexeme: 1 + span: {span: [624, 626], text: "()"} + delims: + - {span: [624, 625], text: "("} + - {span: [625, 626], text: ")"} + contents: [] + - keyword: + lexeme: 16 + span: {span: [629, 640], text: "unreachable"} + - ident: + lexeme: 31 + span: {span: [641, 653], text: "BB_CONTINUE:"} + suffix: {span: [652, 653], text: ":"} + name: {span: [641, 652], text: "BB_CONTINUE"} + - keyword: + lexeme: 15 + span: {span: [656, 659], text: "ret"} + - keyword: + lexeme: 25 + span: {span: [660, 664], text: "void"} +- eof: + lexeme: 2147483647 + span: {span: [666, 666], text: ""} diff --git a/ilex/tests/numbers.rs b/ilex/tests/numbers/main.rs similarity index 64% rename from ilex/tests/numbers.rs rename to ilex/tests/numbers/main.rs index 97ec5d7..4272c3e 100644 --- a/ilex/tests/numbers.rs +++ b/ilex/tests/numbers/main.rs @@ -1,6 +1,8 @@ use ilex::fp::Fp64; +use ilex::report::Report; use ilex::rule::*; use ilex::token; +use ilex::Context; use ilex::Lexeme; #[ilex::spec] @@ -54,48 +56,43 @@ struct Numbers { dec: Lexeme, } -#[test] -fn numbers() { - let lex = Numbers::get(); - let text = r#" - 0, - -00, - -0.0, - 123.456e78, - 9e9, - -9e9, - +9e+9, - 9e-9, - -0777, - 0o777, - %1210, - 0b0.0000000101, - 0o0.0024, - 0O1.01p01, - 0xfff.eep+10, - $DEADBEEF, - -0q0123.0123, - 3^a, - "#; - - let ctx = ilex::Context::new(); - let _u = ctx.use_for_debugging_spans(); +#[gilded::test("tests/numbers/*.txt")] +fn numbers(test: &gilded::Test) { + let ctx = Context::new(); let report = ctx.new_report(); - let tokens = ctx - .new_file("test.file", text) - .lex(lex.spec(), &report) + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) .unwrap(); - eprintln!("stream: {tokens:#?}"); - let mut cursor = tokens.cursor(); + let [tokens, fp64, stderr] = + test.outputs(["tokens.yaml", "fp64.txt", "stderr"]); + + match file.lex(Numbers::get().spec(), &report) { + Ok(stream) => { + tokens(stream.summary()); + match parse(Numbers::get(), stream.cursor(), &report) { + Ok(v) => fp64(format!("{v:#?}")), + Err(fatal) => stderr(fatal.to_string()), + } + } + + Err(fatal) => stderr(fatal.to_string()), + } +} + +fn parse( + lex: &Numbers, + mut cursor: ilex::token::Cursor, + report: &Report, +) -> Result, ilex::report::Fatal> { let numbers = cursor .delimited(lex.comma, |cursor| loop { let value = token::switch() .case(Lexeme::eof(), |_, _| Err(false)) .cases([lex.dec, lex.bin, lex.oct, lex.hex, lex.qua], |num, _| { - Ok(num.to_float::(.., &report).unwrap()) + Ok(num.to_float::(.., report).unwrap()) }) - .take(cursor, &report); + .take(cursor, report); match value { None => { cursor.back_up(1); @@ -108,33 +105,6 @@ fn numbers() { }) .map(|(v, _)| v) .collect::>(); - cursor.expect_finished(&report); - report.fatal_or(()).unwrap(); - - assert_eq!( - numbers, - [ - "0", - "-0", - "-0", - "123.456e78", - "9e9", - "-9e9", - "9e9", - "9e-9", - "-511", - "511", - "4", - "0.0048828125", - "0.0048828125", - "2.03125", - "4194232", - "3735928559", - "-27.10546875", - "3e10", - ] - .into_iter() - .map(Fp64::new) - .collect::>() - ); + cursor.expect_finished(report); + report.fatal_or(numbers) } diff --git a/ilex/tests/numbers/numbers.fp64.txt b/ilex/tests/numbers/numbers.fp64.txt new file mode 100644 index 0000000..19b5cdd --- /dev/null +++ b/ilex/tests/numbers/numbers.fp64.txt @@ -0,0 +1,20 @@ +[ + 0.0, + -0.0, + -0.0, + 1.23456e+80, + 9.0e+9, + -9.0e+9, + 9.0e+9, + 8.9999999999999995e-9, + -511.0, + 511.0, + 4.0, + 0.0048828125, + 0.0048828125, + 2.03125, + 4194232.0, + 3735928559.0, + -27.10546875, + 3.0e+10, +] \ No newline at end of file diff --git a/ilex/tests/numbers/numbers.tokens.yaml b/ilex/tests/numbers/numbers.tokens.yaml new file mode 100644 index 0000000..0a3cfe9 --- /dev/null +++ b/ilex/tests/numbers/numbers.tokens.yaml @@ -0,0 +1,234 @@ +- ident: + lexeme: 5 + span: {span: [0, 1], text: "0"} + radix: 10 + blocks: [{span: [0, 1], text: "0"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [1, 2], text: ","} +- ident: + lexeme: 4 + span: {span: [3, 6], text: "-00"} + prefix: {span: [3, 4], text: "-"} + radix: 8 + sign: "-" + blocks: [{span: [5, 6], text: "0"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [6, 7], text: ","} +- ident: + lexeme: 5 + span: {span: [8, 12], text: "-0.0"} + radix: 10 + sign: "-" + blocks: + - {span: [9, 10], text: "0"} + - {span: [11, 12], text: "0"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [12, 13], text: ","} +- ident: + lexeme: 5 + span: {span: [14, 24], text: "123.456e78"} + radix: 10 + blocks: + - {span: [14, 17], text: "123"} + - {span: [18, 21], text: "456"} + exponents: + - span: {span: [14, 24], text: "123.456e78"} + prefix: {span: [21, 22], text: "e"} + radix: 10 + blocks: [{span: [22, 24], text: "78"}] +- keyword: + lexeme: 0 + span: {span: [24, 25], text: ","} +- ident: + lexeme: 5 + span: {span: [26, 29], text: "9e9"} + radix: 10 + blocks: [{span: [26, 27], text: "9"}] + exponents: + - span: {span: [26, 29], text: "9e9"} + prefix: {span: [27, 28], text: "e"} + radix: 10 + blocks: [{span: [28, 29], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [29, 30], text: ","} +- ident: + lexeme: 5 + span: {span: [31, 35], text: "-9e9"} + radix: 10 + sign: "-" + blocks: [{span: [32, 33], text: "9"}] + exponents: + - span: {span: [31, 35], text: "-9e9"} + prefix: {span: [33, 34], text: "e"} + radix: 10 + blocks: [{span: [34, 35], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [35, 36], text: ","} +- ident: + lexeme: 5 + span: {span: [37, 42], text: "+9e+9"} + radix: 10 + sign: "+" + blocks: [{span: [38, 39], text: "9"}] + exponents: + - span: {span: [37, 42], text: "+9e+9"} + prefix: {span: [39, 40], text: "e"} + radix: 10 + sign: "+" + blocks: [{span: [41, 42], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [42, 43], text: ","} +- ident: + lexeme: 5 + span: {span: [44, 48], text: "9e-9"} + radix: 10 + blocks: [{span: [44, 45], text: "9"}] + exponents: + - span: {span: [44, 48], text: "9e-9"} + prefix: {span: [45, 46], text: "e"} + radix: 10 + sign: "-" + blocks: [{span: [47, 48], text: "9"}] +- keyword: + lexeme: 0 + span: {span: [48, 49], text: ","} +- ident: + lexeme: 4 + span: {span: [50, 55], text: "-0777"} + prefix: {span: [50, 51], text: "-"} + radix: 8 + sign: "-" + blocks: [{span: [52, 55], text: "777"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [55, 56], text: ","} +- ident: + lexeme: 4 + span: {span: [57, 62], text: "0o777"} + prefix: {span: [57, 59], text: "0o"} + radix: 8 + blocks: [{span: [59, 62], text: "777"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [62, 63], text: ","} +- ident: + lexeme: 1 + span: {span: [64, 69], text: "%1210"} + prefix: {span: [64, 65], text: "%"} + radix: 2 + blocks: [{span: [65, 66], text: "1"}] + exponents: + - span: {span: [64, 69], text: "%1210"} + prefix: {span: [66, 67], text: "2"} + radix: 2 + blocks: [{span: [67, 69], text: "10"}] +- keyword: + lexeme: 0 + span: {span: [69, 70], text: ","} +- ident: + lexeme: 1 + span: {span: [71, 85], text: "0b0.0000000101"} + prefix: {span: [71, 73], text: "0b"} + radix: 2 + blocks: + - {span: [73, 74], text: "0"} + - {span: [75, 85], text: "0000000101"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [85, 86], text: ","} +- ident: + lexeme: 4 + span: {span: [87, 95], text: "0o0.0024"} + prefix: {span: [87, 89], text: "0o"} + radix: 8 + blocks: + - {span: [89, 90], text: "0"} + - {span: [91, 95], text: "0024"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [95, 96], text: ","} +- ident: + lexeme: 4 + span: {span: [97, 106], text: "0O1.01p01"} + prefix: {span: [97, 99], text: "0O"} + radix: 8 + blocks: + - {span: [99, 100], text: "1"} + - {span: [101, 103], text: "01"} + exponents: + - span: {span: [97, 106], text: "0O1.01p01"} + prefix: {span: [103, 104], text: "p"} + radix: 10 + blocks: [{span: [104, 106], text: "01"}] +- keyword: + lexeme: 0 + span: {span: [106, 107], text: ","} +- ident: + lexeme: 2 + span: {span: [108, 120], text: "0xfff.eep+10"} + prefix: {span: [108, 110], text: "0x"} + radix: 16 + blocks: + - {span: [110, 113], text: "fff"} + - {span: [114, 116], text: "ee"} + exponents: + - span: {span: [108, 120], text: "0xfff.eep+10"} + prefix: {span: [116, 117], text: "p"} + radix: 10 + sign: "+" + blocks: [{span: [118, 120], text: "10"}] +- keyword: + lexeme: 0 + span: {span: [120, 121], text: ","} +- ident: + lexeme: 2 + span: {span: [122, 131], text: "$DEADBEEF"} + prefix: {span: [122, 123], text: "$"} + radix: 16 + blocks: [{span: [123, 131], text: "DEADBEEF"}] + exponents: [] +- keyword: + lexeme: 0 + span: {span: [131, 132], text: ","} +- ident: + lexeme: 3 + span: {span: [133, 145], text: "-0q0123.0123"} + prefix: {span: [133, 135], text: "-0"} + radix: 4 + sign: "-" + blocks: + - {span: [136, 140], text: "0123"} + - {span: [141, 145], text: "0123"} + exponents: [] +- keyword: + lexeme: 0 + span: {span: [145, 146], text: ","} +- ident: + lexeme: 5 + span: {span: [147, 150], text: "3^a"} + radix: 10 + blocks: [{span: [147, 148], text: "3"}] + exponents: + - span: {span: [147, 150], text: "3^a"} + prefix: {span: [148, 149], text: "^"} + radix: 16 + blocks: [{span: [149, 150], text: "a"}] +- keyword: + lexeme: 0 + span: {span: [150, 151], text: ","} +- eof: + lexeme: 2147483647 + span: {span: [151, 151], text: ""} diff --git a/ilex/tests/numbers/numbers.txt b/ilex/tests/numbers/numbers.txt new file mode 100644 index 0000000..6a9cd42 --- /dev/null +++ b/ilex/tests/numbers/numbers.txt @@ -0,0 +1,18 @@ +0, +-00, +-0.0, +123.456e78, +9e9, +-9e9, ++9e+9, +9e-9, +-0777, +0o777, +%1210, +0b0.0000000101, +0o0.0024, +0O1.01p01, +0xfff.eep+10, +$DEADBEEF, +-0q0123.0123, +3^a, \ No newline at end of file diff --git a/ilex/tests/ui/ambiguous.rs b/ilex/tests/ui/ambiguous.rs deleted file mode 100644 index bf61deb..0000000 --- a/ilex/tests/ui/ambiguous.rs +++ /dev/null @@ -1,176 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - #[rule("null")] - kw: Lexeme, - #[rule("-null")] - kw2: Lexeme, - #[rule(")null")] - kw3: Lexeme, - - #[rule(Comment::nesting(Bracket::rust_style( - "/", - ("-", ""), - ("", "-"), - )))] - cm: Lexeme, - #[rule(Comment::nesting(Bracket::cxx_style( - Ident::new().min_len(1), - ("--", ""), - ("", ""), - )))] - cm2: Lexeme, - #[rule(Bracket::cxx_style( - Ident::new(), - ("$", "["), - ("]", ""), - ))] - br: Lexeme, - #[rule(Ident::new() - .prefix("/") - .suffixes(["", "%q", "/"]))] - id: Lexeme, - #[rule(Digital::new(10) - .prefixes(["", "%"]) - .suffixes(["", "%", "q", "/"]))] - nm: Lexeme, - #[rule(Quoted::new("'") - .prefixes(["%", "q"]) - .suffixes(["", "%", "q"]))] - st: Lexeme, - #[rule(Quoted::with(Bracket::cxx_style( - Ident::new(), - ("q", "("), - (")", ""), - )))] - st2: Lexeme, -} - -#[test] -fn no_xid_after_kw() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "null nullable") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_kw.stdout"); -} - -#[test] -fn no_xid_after_br() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "$[] $null[]null $null[]nullable") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_br.stdout"); -} - -#[test] -fn no_xid_after_cm() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - "--null some stuff null --null some more stuff nullnull", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_cm.stdout"); -} - -#[test] -fn no_xid_after_id() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "/foo%q /null%q /foo%qua") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_id.stdout"); -} - -#[test] -fn no_xid_after_nm() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "%123 %123qua") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_nm.stdout"); -} - -#[test] -fn no_xid_after_st() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "q'xyz'q %'xyz'qua") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/no_xid_after_st.stdout"); -} - -#[test] -fn ambiguous_idents() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "/foo/bar/") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/ambiguous_idents.stdout"); -} - -#[test] -fn ambiguous_nums() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "1234%1234 1234/xyz") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/ambiguous_nums.stdout"); -} - -#[test] -fn symbols_after_comment() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - // Below, we expect -/ more comment /- to lex correctly, then lex a - // -null and a null, even though if it wasn't a comment, it would be - // ambiguous, because `--null null`` is also a valid comment. - "-/ comment /- null -/ more comment /--null null", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report_ok(&report); -} - -#[test] -fn symbols_after_quoted() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - // Below, we expect to lex a single quoted, even though `a]null` is a - // keyword. This is because searching for ambiguities stops just shy of - // the '. - "qnull(a)null", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report_ok(&report); -} diff --git a/ilex/tests/ui/goldens/ambiguous_idents.stdout b/ilex/tests/ui/ambiguous/idents.stderr similarity index 69% rename from ilex/tests/ui/goldens/ambiguous_idents.stdout rename to ilex/tests/ui/ambiguous/idents.stderr index cc36eb6..bb169ff 100644 --- a/ilex/tests/ui/goldens/ambiguous_idents.stdout +++ b/ilex/tests/ui/ambiguous/idents.stderr @@ -1,25 +1,28 @@ error: unexpected `b` in `/`-suffixed number - --> :1:6 + --> ambiguous/idents.txt:1:6 | 1 | /foo/bar/ | ^ | --- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in `/`-suffixed number - --> :1:7 + --> ambiguous/idents.txt:1:7 | 1 | /foo/bar/ | ^ | --- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `r` in `/`-suffixed number - --> :1:8 + --> ambiguous/idents.txt:1:8 | 1 | /foo/bar/ | ^ | --- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: aborting due to 3 errors diff --git a/ilex/tests/ui/ambiguous/idents.txt b/ilex/tests/ui/ambiguous/idents.txt new file mode 100644 index 0000000..f2f1d82 --- /dev/null +++ b/ilex/tests/ui/ambiguous/idents.txt @@ -0,0 +1 @@ +/foo/bar/ diff --git a/ilex/tests/ui/ambiguous/no_xid_after_br.stderr b/ilex/tests/ui/ambiguous/no_xid_after_br.stderr new file mode 100644 index 0000000..29382cb --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_br.stderr @@ -0,0 +1,9 @@ +error: unexpected closing `able` + --> ambiguous/no_xid_after_br.txt:1:28 + | +1 | $[] $null[]null $null[]nullable + | ^^^^ expected to be opened by `--able` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_br.txt b/ilex/tests/ui/ambiguous/no_xid_after_br.txt new file mode 100644 index 0000000..d680ddc --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_br.txt @@ -0,0 +1 @@ +$[] $null[]null $null[]nullable diff --git a/ilex/tests/ui/goldens/no_xid_after_cm.stdout b/ilex/tests/ui/ambiguous/no_xid_after_cm.stderr similarity index 77% rename from ilex/tests/ui/goldens/no_xid_after_cm.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_cm.stderr index 4025d78..b9181e7 100644 --- a/ilex/tests/ui/goldens/no_xid_after_cm.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_cm.stderr @@ -1,9 +1,10 @@ error: extraneous characters after `--null ... null` - --> :1:51 + --> ambiguous/no_xid_after_cm.txt:1:51 | 1 | --null some stuff null --null some more stuff nullnull | ^^^^ | -- help: maybe you meant to include a space here | + = note: reported at: ilex/src/rt/emit2.rs:779:10 error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_cm.txt b/ilex/tests/ui/ambiguous/no_xid_after_cm.txt new file mode 100644 index 0000000..84d7a9a --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_cm.txt @@ -0,0 +1 @@ +--null some stuff null --null some more stuff nullnull \ No newline at end of file diff --git a/ilex/tests/ui/ambiguous/no_xid_after_id.stderr b/ilex/tests/ui/ambiguous/no_xid_after_id.stderr new file mode 100644 index 0000000..7a15292 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_id.stderr @@ -0,0 +1,9 @@ +error: unexpected closing `ua` + --> ambiguous/no_xid_after_id.txt:1:22 + | +1 | /foo%q /null%q /foo%qua + | ^^ expected to be opened by `--ua` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_id.txt b/ilex/tests/ui/ambiguous/no_xid_after_id.txt new file mode 100644 index 0000000..c5ddb88 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_id.txt @@ -0,0 +1 @@ +/foo%q /null%q /foo%qua diff --git a/ilex/tests/ui/goldens/no_xid_after_kw.stdout b/ilex/tests/ui/ambiguous/no_xid_after_kw.stderr similarity index 63% rename from ilex/tests/ui/goldens/no_xid_after_kw.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_kw.stderr index 83c0803..9a3a571 100644 --- a/ilex/tests/ui/goldens/no_xid_after_kw.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_kw.stderr @@ -1,8 +1,9 @@ error: unexpected closing `nullable` - --> :1:6 + --> ambiguous/no_xid_after_kw.txt:1:6 | 1 | null nullable | ^^^^^^^^ expected to be opened by `--nullable` | + = note: reported at: ilex/src/rt/emit2.rs:254:22 error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_kw.txt b/ilex/tests/ui/ambiguous/no_xid_after_kw.txt new file mode 100644 index 0000000..65ed97a --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_kw.txt @@ -0,0 +1 @@ +null nullable diff --git a/ilex/tests/ui/goldens/no_xid_after_nm.stdout b/ilex/tests/ui/ambiguous/no_xid_after_nm.stderr similarity index 68% rename from ilex/tests/ui/goldens/no_xid_after_nm.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_nm.stderr index 788aa50..8456863 100644 --- a/ilex/tests/ui/goldens/no_xid_after_nm.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_nm.stderr @@ -1,25 +1,28 @@ error: unexpected `q` in `%`-prefixed number - --> :1:10 + --> ambiguous/no_xid_after_nm.txt:1:10 | 1 | %123 %123qua | ^ | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `u` in `%`-prefixed number - --> :1:11 + --> ambiguous/no_xid_after_nm.txt:1:11 | 1 | %123 %123qua | ^ | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in `%`-prefixed number - --> :1:12 + --> ambiguous/no_xid_after_nm.txt:1:12 | 1 | %123 %123qua | ^ | ------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: aborting due to 3 errors diff --git a/ilex/tests/ui/ambiguous/no_xid_after_nm.txt b/ilex/tests/ui/ambiguous/no_xid_after_nm.txt new file mode 100644 index 0000000..9023903 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_nm.txt @@ -0,0 +1 @@ +%123 %123qua diff --git a/ilex/tests/ui/goldens/no_xid_after_st.stdout b/ilex/tests/ui/ambiguous/no_xid_after_st.stderr similarity index 69% rename from ilex/tests/ui/goldens/no_xid_after_st.stdout rename to ilex/tests/ui/ambiguous/no_xid_after_st.stderr index 1183322..d136573 100644 --- a/ilex/tests/ui/goldens/no_xid_after_st.stdout +++ b/ilex/tests/ui/ambiguous/no_xid_after_st.stderr @@ -1,9 +1,10 @@ error: extraneous characters after `%'...'q` - --> :1:16 + --> ambiguous/no_xid_after_st.txt:1:16 | 1 | q'xyz'q %'xyz'qua | ^^ | -- help: maybe you meant to include a space here | + = note: reported at: ilex/src/rt/emit2.rs:779:10 error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/no_xid_after_st.txt b/ilex/tests/ui/ambiguous/no_xid_after_st.txt new file mode 100644 index 0000000..517c131 --- /dev/null +++ b/ilex/tests/ui/ambiguous/no_xid_after_st.txt @@ -0,0 +1 @@ +q'xyz'q %'xyz'qua \ No newline at end of file diff --git a/ilex/tests/ui/ambiguous/nums.stderr b/ilex/tests/ui/ambiguous/nums.stderr new file mode 100644 index 0000000..de384cd --- /dev/null +++ b/ilex/tests/ui/ambiguous/nums.stderr @@ -0,0 +1,9 @@ +error: unexpected closing `xyz` + --> ambiguous/nums.txt:1:16 + | +1 | 1234%1234 1234/xyz + | ^^^ expected to be opened by `--xyz` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: aborting due to previous error diff --git a/ilex/tests/ui/ambiguous/nums.txt b/ilex/tests/ui/ambiguous/nums.txt new file mode 100644 index 0000000..9b7ae1f --- /dev/null +++ b/ilex/tests/ui/ambiguous/nums.txt @@ -0,0 +1 @@ +1234%1234 1234/xyz diff --git a/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml b/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml new file mode 100644 index 0000000..e9a7299 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_comment.tokens.yaml @@ -0,0 +1,12 @@ +- keyword: + lexeme: 0 + span: {span: [14, 18], text: "null"} +- keyword: + lexeme: 1 + span: {span: [37, 42], text: "-null"} +- keyword: + lexeme: 0 + span: {span: [43, 47], text: "null"} +- eof: + lexeme: 2147483647 + span: {span: [48, 48], text: ""} diff --git a/ilex/tests/ui/ambiguous/symbols_after_comment.txt b/ilex/tests/ui/ambiguous/symbols_after_comment.txt new file mode 100644 index 0000000..f382698 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_comment.txt @@ -0,0 +1 @@ +-/ comment /- null -/ more comment /--null null diff --git a/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml b/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml new file mode 100644 index 0000000..e4c73d1 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_quoted.tokens.yaml @@ -0,0 +1,10 @@ +- quoted: + lexeme: 9 + span: {span: [0, 12], text: "qnull(a)null"} + delims: + - {span: [0, 6], text: "qnull("} + - {span: [7, 12], text: ")null"} + contents: [{text: {span: [6, 7], text: "a"}}] +- eof: + lexeme: 2147483647 + span: {span: [13, 13], text: ""} diff --git a/ilex/tests/ui/ambiguous/symbols_after_quoted.txt b/ilex/tests/ui/ambiguous/symbols_after_quoted.txt new file mode 100644 index 0000000..e0b43b3 --- /dev/null +++ b/ilex/tests/ui/ambiguous/symbols_after_quoted.txt @@ -0,0 +1 @@ +qnull(a)null diff --git a/ilex/tests/ui/digital.rs b/ilex/tests/ui/digital.rs deleted file mode 100644 index 0698517..0000000 --- a/ilex/tests/ui/digital.rs +++ /dev/null @@ -1,161 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[test] -fn digit_points() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -1/2/3e4/5 -1/2/3/4e4/5 -1/2e4/5 -1/2/3e4/5/6 -1/2/3e4 - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/digit_points.stdout"); -} - -#[test] -fn digit_separators() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -all_ok@_123_._456_e_789_._012_ -no_prefix@_123_._456_e_789_._012_ -no_suffix@_123_._456_e_789_._012_ -no_point@_123_._456_e_789_._012_ -no_exp@_123_._456_e_789_._012_ - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/digit_separators.stdout"); -} - -#[test] -fn missing_digits() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -0xdeadbeef -0x 0xf - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/missing_digits.stdout"); -} - -#[test] -fn invalid_digits() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -0o777 -0o8 -0o08 -0/0/aa11g - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/invalid_digits.stdout"); -} - -#[ilex::spec] -struct Spec { - #[rule(Digital::new(16).prefix("0x"))] - m1: Lexeme, - #[rule(Digital::new(8).prefix("0o"))] - m2: Lexeme, - - #[rule( Digital::new(10) - .point_limit(2..3) - .point('/') - .exponent("e", Digits::new(10).point_limit(1..2)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: true, - around_exp: true, - }))] - m0: Lexeme, - #[rule(Digital::new(10) - .prefix("all_ok@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: true, - around_exp: true, - }))] - n0: Lexeme, - #[rule( Digital::new(10) - .prefix("no_prefix@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: false, - suffix: true, - around_point: true, - around_exp: true, - }))] - n1: Lexeme, - #[rule(Digital::new(10) - .prefix("no_suffix@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: false, - around_point: true, - around_exp: true, - }))] - n2: Lexeme, - #[rule( Digital::new(10) - .prefix("no_point@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: false, - around_exp: true, - }))] - n3: Lexeme, - #[rule(Digital::new(10) - .prefix("no_exp@") - .point_limit(0..3) - .exponent("e", Digits::new(10).point_limit(0..3)) - .separator_with("_", - SeparatorCornerCases { - prefix: true, - suffix: true, - around_point: true, - around_exp: false, - }))] - n4: Lexeme, -} diff --git a/ilex/tests/ui/goldens/invalid_digits.stdout b/ilex/tests/ui/digital/invalid.stderr similarity index 62% rename from ilex/tests/ui/goldens/invalid_digits.stdout rename to ilex/tests/ui/digital/invalid.stderr index d18e9f9..50b3962 100644 --- a/ilex/tests/ui/goldens/invalid_digits.stdout +++ b/ilex/tests/ui/digital/invalid.stderr @@ -1,41 +1,46 @@ error: unexpected `8` in `0o`-prefixed number - --> :3:3 + --> digital/invalid.txt:2:3 | -3 | 0o8 +2 | 0o8 | ^ | --- help: because this value is octal (base 8), digits should be within '0'..='7' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `8` in `0o`-prefixed number - --> :4:4 + --> digital/invalid.txt:3:4 | -4 | 0o08 +3 | 0o08 | ^ | ---- help: because this value is octal (base 8), digits should be within '0'..='7' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in number - --> :5:5 + --> digital/invalid.txt:4:5 | -5 | 0/0/aa11g +4 | 0/0/aa11g | ^ | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `a` in number - --> :5:6 + --> digital/invalid.txt:4:6 | -5 | 0/0/aa11g +4 | 0/0/aa11g | ^ | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: unexpected `g` in number - --> :5:9 + --> digital/invalid.txt:4:9 | -5 | 0/0/aa11g +4 | 0/0/aa11g | ^ | --------- help: because this value is decimal (base 10), digits should be within '0'..='9' | + = note: reported at: ilex/src/rt/emit2.rs:562:34 error: aborting due to 5 errors diff --git a/ilex/tests/ui/digital/invalid.txt b/ilex/tests/ui/digital/invalid.txt new file mode 100644 index 0000000..a425653 --- /dev/null +++ b/ilex/tests/ui/digital/invalid.txt @@ -0,0 +1,4 @@ +0o777 +0o8 +0o08 +0/0/aa11g diff --git a/ilex/tests/ui/goldens/missing_digits.stdout b/ilex/tests/ui/digital/missing.stderr similarity index 63% rename from ilex/tests/ui/goldens/missing_digits.stdout rename to ilex/tests/ui/digital/missing.stderr index 7898be4..ae4fa47 100644 --- a/ilex/tests/ui/goldens/missing_digits.stdout +++ b/ilex/tests/ui/digital/missing.stderr @@ -1,9 +1,10 @@ error: expected digits after `0x`, but found ` ` - --> :3:3 + --> digital/missing.txt:2:3 | -3 | 0x 0xf +2 | 0x 0xf | ^ expected digits after `0x` | ^^ because of this prefix | + = note: reported at: ilex/src/rt/emit2.rs:540:18 error: aborting due to previous error diff --git a/ilex/tests/ui/digital/missing.txt b/ilex/tests/ui/digital/missing.txt new file mode 100644 index 0000000..dd9d6d3 --- /dev/null +++ b/ilex/tests/ui/digital/missing.txt @@ -0,0 +1,2 @@ +0xdeadbeef +0x 0xf diff --git a/ilex/tests/ui/digital/points.stderr b/ilex/tests/ui/digital/points.stderr new file mode 100644 index 0000000..02acd80 --- /dev/null +++ b/ilex/tests/ui/digital/points.stderr @@ -0,0 +1,49 @@ +error: expected at least 2 `/`s + --> digital/points.txt:2:7 + | +2 | 1/2/3/4e4/5 + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: unrecognized character + --> digital/points.txt:2:6 + | +2 | 1/2/3/4e4/5 + | ^ + | + = note: reported at: ilex/src/rt/mod.rs:36:8 + +error: expected at least 2 `/`s + --> digital/points.txt:3:1 + | +3 | 1/2e4/5 + | ^^^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: expected at least 2 `/`s + --> digital/points.txt:4:11 + | +4 | 1/2/3e4/5/6 + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: unrecognized character + --> digital/points.txt:4:10 + | +4 | 1/2/3e4/5/6 + | ^ + | + = note: reported at: ilex/src/rt/mod.rs:36:8 + +error: expected at least 1 `/` + --> digital/points.txt:5:6 + | +5 | 1/2/3e4 + | ^^ + | + = note: reported at: ilex/src/rt/emit2.rs:523:16 + +error: aborting due to 6 errors diff --git a/ilex/tests/ui/digital/points.txt b/ilex/tests/ui/digital/points.txt new file mode 100644 index 0000000..b423a24 --- /dev/null +++ b/ilex/tests/ui/digital/points.txt @@ -0,0 +1,5 @@ +1/2/3e4/5 +1/2/3/4e4/5 +1/2e4/5 +1/2/3e4/5/6 +1/2/3e4 \ No newline at end of file diff --git a/ilex/tests/ui/digital/separators.stderr b/ilex/tests/ui/digital/separators.stderr new file mode 100644 index 0000000..ae70934 --- /dev/null +++ b/ilex/tests/ui/digital/separators.stderr @@ -0,0 +1,65 @@ +error: unexpected digit separator in `no_prefix@`-prefixed number + --> digital/separators.txt:2:11 + | +2 | no_prefix@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: unexpected digit separator in `no_suffix@`-prefixed number + --> digital/separators.txt:3:33 + | +3 | no_suffix@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:474:28 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:15 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:404:32 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:16 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:27 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:404:32 + +error: unexpected digit separator in `no_point@`-prefixed number + --> digital/separators.txt:4:28 + | +4 | no_point@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: unexpected digit separator in `no_exp@`-prefixed number + --> digital/separators.txt:5:19 + | +5 | no_exp@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:424:34 + +error: unexpected digit separator in `no_exp@`-prefixed number + --> digital/separators.txt:5:20 + | +5 | no_exp@_123_._456_e_789_._012_ + | ^ + | + = note: reported at: ilex/src/rt/emit2.rs:387:36 + +error: aborting due to 8 errors diff --git a/ilex/tests/ui/digital/separators.txt b/ilex/tests/ui/digital/separators.txt new file mode 100644 index 0000000..cc63efb --- /dev/null +++ b/ilex/tests/ui/digital/separators.txt @@ -0,0 +1,5 @@ +all_ok@_123_._456_e_789_._012_ +no_prefix@_123_._456_e_789_._012_ +no_suffix@_123_._456_e_789_._012_ +no_point@_123_._456_e_789_._012_ +no_exp@_123_._456_e_789_._012_ \ No newline at end of file diff --git a/ilex/tests/ui/eof.rs b/ilex/tests/ui/eof.rs deleted file mode 100644 index b254930..0000000 --- a/ilex/tests/ui/eof.rs +++ /dev/null @@ -1,157 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - #[rule("/*", "*/")] - c1: Lexeme, - - #[rule("[", "]")] - b1: Lexeme, - - #[rule("(", ")")] - b2: Lexeme, - - #[rule(Quoted::new("'"))] - q1: Lexeme, -} - -#[test] -fn eof_comment() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "/* ok /* nested */ */ /* /* not ok */") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/eof_comment.stdout"); -} - -#[test] -fn eof_comment_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -/* ok - /* nested */ */ -/* - /* not ok */ - - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/eof_comment_multiline.stdout", - ); -} - -#[test] -fn eof_bracket() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "[[[]]] [[]") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/eof_bracket.stdout"); -} - -#[test] -fn eof_bracket_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -[ - [] -][ - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/eof_bracket_multiline.stdout", - ); -} - -#[test] -fn eof_quoted() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "'foo' '' 'bar") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/eof_quoted.stdout"); -} - -#[test] -fn eof_quoted_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -'foo' -'' -'bar - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/eof_quoted_multiline.stdout", - ); -} - -#[test] -fn mixed_brackets() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "[] () [) (] [(])") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/mixed_brackets.stdout"); -} - -#[test] -fn mixed_brackets_multiline() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file( - "", - " -[ - () -] -[ - ( - ] -) -[ - ) - ( -] - ", - ) - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/mixed_brackets_multiline.stdout", - ); -} diff --git a/ilex/tests/ui/goldens/eof_bracket.stdout b/ilex/tests/ui/eof/bracket.stderr similarity index 64% rename from ilex/tests/ui/goldens/eof_bracket.stdout rename to ilex/tests/ui/eof/bracket.stderr index 33a4b53..ea31961 100644 --- a/ilex/tests/ui/goldens/eof_bracket.stdout +++ b/ilex/tests/ui/eof/bracket.stderr @@ -1,9 +1,10 @@ error: expected closing `]`, but found - --> :1:11 + --> eof/bracket.txt:1:11 | -1 | [[[]]] [[] +1 | [[[]]] [[] | ^ expected `]` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:311:10 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/bracket.txt b/ilex/tests/ui/eof/bracket.txt new file mode 100644 index 0000000..b2e2a9c --- /dev/null +++ b/ilex/tests/ui/eof/bracket.txt @@ -0,0 +1 @@ +[[[]]] [[] diff --git a/ilex/tests/ui/goldens/eof_bracket_multiline.stdout b/ilex/tests/ui/eof/bracket_multiline.stderr similarity index 62% rename from ilex/tests/ui/goldens/eof_bracket_multiline.stdout rename to ilex/tests/ui/eof/bracket_multiline.stderr index 203c2f2..9d0148b 100644 --- a/ilex/tests/ui/goldens/eof_bracket_multiline.stdout +++ b/ilex/tests/ui/eof/bracket_multiline.stderr @@ -1,9 +1,10 @@ error: expected closing `]`, but found - --> :4:3 + --> eof/bracket_multiline.txt:3:3 | -4 | ][ +3 | ][ | ^ expected `]` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:311:10 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/bracket_multiline.txt b/ilex/tests/ui/eof/bracket_multiline.txt new file mode 100644 index 0000000..9435e2c --- /dev/null +++ b/ilex/tests/ui/eof/bracket_multiline.txt @@ -0,0 +1,3 @@ +[ + [] +][ \ No newline at end of file diff --git a/ilex/tests/ui/goldens/eof_comment.stdout b/ilex/tests/ui/eof/comment.stderr similarity index 76% rename from ilex/tests/ui/goldens/eof_comment.stdout rename to ilex/tests/ui/eof/comment.stderr index ef1288a..eebb64f 100644 --- a/ilex/tests/ui/goldens/eof_comment.stdout +++ b/ilex/tests/ui/eof/comment.stderr @@ -1,9 +1,10 @@ error: expected closing `*/`, but found - --> :1:38 + --> eof/comment.txt:1:38 | 1 | /* ok /* nested */ */ /* /* not ok */ | ^ expected `*/` here | -- help: previously opened here | + = note: reported at: ilex/src/rt/emit2.rs:306:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/comment.txt b/ilex/tests/ui/eof/comment.txt new file mode 100644 index 0000000..815cac1 --- /dev/null +++ b/ilex/tests/ui/eof/comment.txt @@ -0,0 +1 @@ +/* ok /* nested */ */ /* /* not ok */ \ No newline at end of file diff --git a/ilex/tests/ui/goldens/eof_comment_multiline.stdout b/ilex/tests/ui/eof/comment_multiline.stderr similarity index 59% rename from ilex/tests/ui/goldens/eof_comment_multiline.stdout rename to ilex/tests/ui/eof/comment_multiline.stderr index 88bcdcd..01a1c69 100644 --- a/ilex/tests/ui/goldens/eof_comment_multiline.stdout +++ b/ilex/tests/ui/eof/comment_multiline.stderr @@ -1,10 +1,11 @@ error: expected closing `*/`, but found - --> :5:15 + --> eof/comment_multiline.txt:4:15 | -4 | /* +3 | /* | -- help: previously opened here -5 | /* not ok */ +4 | /* not ok */ | ^ expected `*/` here | + = note: reported at: ilex/src/rt/emit2.rs:306:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/comment_multiline.txt b/ilex/tests/ui/eof/comment_multiline.txt new file mode 100644 index 0000000..d1f7ed4 --- /dev/null +++ b/ilex/tests/ui/eof/comment_multiline.txt @@ -0,0 +1,4 @@ +/* ok + /* nested */ */ +/* + /* not ok */ diff --git a/ilex/tests/ui/goldens/mixed_brackets.stdout b/ilex/tests/ui/eof/mixed_brackets.stderr similarity index 64% rename from ilex/tests/ui/goldens/mixed_brackets.stdout rename to ilex/tests/ui/eof/mixed_brackets.stderr index ad1b1b8..243c71d 100644 --- a/ilex/tests/ui/goldens/mixed_brackets.stdout +++ b/ilex/tests/ui/eof/mixed_brackets.stderr @@ -1,32 +1,36 @@ error: unexpected closing `)` - --> :1:8 + --> eof/mixed_brackets.txt:1:8 | 1 | [] () [) (] [(]) | ^ expected to be opened by `(` | + = note: reported at: ilex/src/rt/emit2.rs:254:22 error: expected closing `)`, but found `]` - --> :1:11 + --> eof/mixed_brackets.txt:1:11 | 1 | [] () [) (] [(]) | ^ expected `)` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:202:23 error: expected closing `)`, but found `]` - --> :1:15 + --> eof/mixed_brackets.txt:1:15 | 1 | [] () [) (] [(]) | ^ expected `)` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:202:23 error: expected closing `)`, but found - --> :1:17 + --> eof/mixed_brackets.txt:1:17 | 1 | [] () [) (] [(]) | ^ expected `)` here | - help: previously opened here | + = note: reported at: ilex/src/rt/lexer.rs:311:10 error: aborting due to 4 errors diff --git a/ilex/tests/ui/eof/mixed_brackets.txt b/ilex/tests/ui/eof/mixed_brackets.txt new file mode 100644 index 0000000..0961a2b --- /dev/null +++ b/ilex/tests/ui/eof/mixed_brackets.txt @@ -0,0 +1 @@ +[] () [) (] [(]) \ No newline at end of file diff --git a/ilex/tests/ui/eof/mixed_brackets_multiline.stderr b/ilex/tests/ui/eof/mixed_brackets_multiline.stderr new file mode 100644 index 0000000..7cc82d5 --- /dev/null +++ b/ilex/tests/ui/eof/mixed_brackets_multiline.stderr @@ -0,0 +1,39 @@ +error: expected closing `)`, but found `]` + --> eof/mixed_brackets_multiline.txt:6:3 + | +5 | ( + | - help: previously opened here +6 | ] + | ^ expected `)` here + | + = note: reported at: ilex/src/rt/lexer.rs:202:23 + +error: unexpected closing `)` + --> eof/mixed_brackets_multiline.txt:9:3 + | +9 | ) + | ^ expected to be opened by `(` + | + = note: reported at: ilex/src/rt/emit2.rs:254:22 + +error: expected closing `)`, but found `]` + --> eof/mixed_brackets_multiline.txt:11:1 + | +10 | ( + | - help: previously opened here +11 | ] + | ^ expected `)` here + | + = note: reported at: ilex/src/rt/lexer.rs:202:23 + +error: expected closing `)`, but found + --> eof/mixed_brackets_multiline.txt:11:2 + | +10 | ( + | - help: previously opened here +11 | ] + | ^ expected `)` here + | + = note: reported at: ilex/src/rt/lexer.rs:311:10 + +error: aborting due to 4 errors diff --git a/ilex/tests/ui/eof/mixed_brackets_multiline.txt b/ilex/tests/ui/eof/mixed_brackets_multiline.txt new file mode 100644 index 0000000..ea6f94f --- /dev/null +++ b/ilex/tests/ui/eof/mixed_brackets_multiline.txt @@ -0,0 +1,11 @@ +[ + () +] +[ + ( + ] +) +[ + ) + ( +] \ No newline at end of file diff --git a/ilex/tests/ui/goldens/eof_quoted.stdout b/ilex/tests/ui/eof/quoted.stderr similarity index 64% rename from ilex/tests/ui/goldens/eof_quoted.stdout rename to ilex/tests/ui/eof/quoted.stderr index f6a98bb..b095d02 100644 --- a/ilex/tests/ui/goldens/eof_quoted.stdout +++ b/ilex/tests/ui/eof/quoted.stderr @@ -1,9 +1,10 @@ error: expected closing `'`, but found - --> :1:14 + --> eof/quoted.txt:1:14 | -1 | 'foo' '' 'bar +1 | 'foo' '' 'bar | ^ expected `'` here | - help: previously opened here | + = note: reported at: ilex/src/rt/emit2.rs:691:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/quoted.txt b/ilex/tests/ui/eof/quoted.txt new file mode 100644 index 0000000..a92eb58 --- /dev/null +++ b/ilex/tests/ui/eof/quoted.txt @@ -0,0 +1 @@ +'foo' '' 'bar diff --git a/ilex/tests/ui/goldens/eof_quoted_multiline.stdout b/ilex/tests/ui/eof/quoted_multiline.stderr similarity index 62% rename from ilex/tests/ui/goldens/eof_quoted_multiline.stdout rename to ilex/tests/ui/eof/quoted_multiline.stderr index 1f3f1cd..ff96acd 100644 --- a/ilex/tests/ui/goldens/eof_quoted_multiline.stdout +++ b/ilex/tests/ui/eof/quoted_multiline.stderr @@ -1,9 +1,10 @@ error: expected closing `'`, but found - --> :4:5 + --> eof/quoted_multiline.txt:3:5 | -4 | 'bar +3 | 'bar | ^ expected `'` here | - help: previously opened here | + = note: reported at: ilex/src/rt/emit2.rs:691:14 error: aborting due to previous error diff --git a/ilex/tests/ui/eof/quoted_multiline.txt b/ilex/tests/ui/eof/quoted_multiline.txt new file mode 100644 index 0000000..2d4dde5 --- /dev/null +++ b/ilex/tests/ui/eof/quoted_multiline.txt @@ -0,0 +1,3 @@ +'foo' +'' +'bar diff --git a/ilex/tests/ui/goldens/ambiguous_nums.stdout b/ilex/tests/ui/goldens/ambiguous_nums.stdout deleted file mode 100644 index b5432d9..0000000 --- a/ilex/tests/ui/goldens/ambiguous_nums.stdout +++ /dev/null @@ -1,17 +0,0 @@ -error: extraneous characters after `%`-suffixed number - --> :1:6 - | -1 | 1234%1234 1234/xyz - | ^^^^ - | -- help: maybe you meant to include a space here - | - -error: extraneous characters after `/`-suffixed number - --> :1:16 - | -1 | 1234%1234 1234/xyz - | ^^^ - | -- help: maybe you meant to include a space here - | - -error: aborting due to 2 errors diff --git a/ilex/tests/ui/goldens/digit_points.stdout b/ilex/tests/ui/goldens/digit_points.stdout deleted file mode 100644 index b738704..0000000 --- a/ilex/tests/ui/goldens/digit_points.stdout +++ /dev/null @@ -1,43 +0,0 @@ -error: expected at least 2 `/`s - --> :3:7 - | -3 | 1/2/3/4e4/5 - | ^ - | - -error: unrecognized character - --> :3:6 - | -3 | 1/2/3/4e4/5 - | ^ - | - -error: expected at least 2 `/`s - --> :4:1 - | -4 | 1/2e4/5 - | ^^^ - | - -error: expected at least 2 `/`s - --> :5:11 - | -5 | 1/2/3e4/5/6 - | ^ - | - -error: unrecognized character - --> :5:10 - | -5 | 1/2/3e4/5/6 - | ^ - | - -error: expected at least 1 `/` - --> :6:6 - | -6 | 1/2/3e4 - | ^^ - | - -error: aborting due to 6 errors diff --git a/ilex/tests/ui/goldens/digit_separators.stdout b/ilex/tests/ui/goldens/digit_separators.stdout deleted file mode 100644 index 4fa983d..0000000 --- a/ilex/tests/ui/goldens/digit_separators.stdout +++ /dev/null @@ -1,57 +0,0 @@ -error: unexpected digit separator in `no_prefix@`-prefixed number - --> :3:11 - | -3 | no_prefix@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_suffix@`-prefixed number - --> :4:33 - | -4 | no_suffix@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:15 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:16 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:27 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_point@`-prefixed number - --> :5:28 - | -5 | no_point@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_exp@`-prefixed number - --> :6:19 - | -6 | no_exp@_123_._456_e_789_._012_ - | ^ - | - -error: unexpected digit separator in `no_exp@`-prefixed number - --> :6:20 - | -6 | no_exp@_123_._456_e_789_._012_ - | ^ - | - -error: aborting due to 8 errors diff --git a/ilex/tests/ui/goldens/does_not_exist.stdout b/ilex/tests/ui/goldens/does_not_exist.stdout deleted file mode 100644 index 5791c75..0000000 --- a/ilex/tests/ui/goldens/does_not_exist.stdout +++ /dev/null @@ -1,3 +0,0 @@ -error: could not open input file `does_not_exist`: No such file or directory (os error 2) - -error: aborting due to previous error diff --git a/ilex/tests/ui/goldens/mixed_brackets_multiline.stdout b/ilex/tests/ui/goldens/mixed_brackets_multiline.stdout deleted file mode 100644 index 84d5d93..0000000 --- a/ilex/tests/ui/goldens/mixed_brackets_multiline.stdout +++ /dev/null @@ -1,35 +0,0 @@ -error: expected closing `)`, but found `]` - --> :7:3 - | -6 | ( - | - help: previously opened here -7 | ] - | ^ expected `)` here - | - -error: unexpected closing `)` - --> :10:3 - | -10 | ) - | ^ expected to be opened by `(` - | - -error: expected closing `)`, but found `]` - --> :12:1 - | -11 | ( - | - help: previously opened here -12 | ] - | ^ expected `)` here - | - -error: expected closing `)`, but found - --> :12:2 - | -11 | ( - | - help: previously opened here -12 | ] - | ^ expected `)` here - | - -error: aborting due to 4 errors diff --git a/ilex/tests/ui/goldens/no_xid_after_br.stdout b/ilex/tests/ui/goldens/no_xid_after_br.stdout deleted file mode 100644 index 5eb60d0..0000000 --- a/ilex/tests/ui/goldens/no_xid_after_br.stdout +++ /dev/null @@ -1,9 +0,0 @@ -error: extraneous characters after `$null[ ... ]null` - --> :1:28 - | -1 | $[] $null[]null $null[]nullable - | ^^^^ - | -- help: maybe you meant to include a space here - | - -error: aborting due to previous error diff --git a/ilex/tests/ui/goldens/no_xid_after_id.stdout b/ilex/tests/ui/goldens/no_xid_after_id.stdout deleted file mode 100644 index 553403c..0000000 --- a/ilex/tests/ui/goldens/no_xid_after_id.stdout +++ /dev/null @@ -1,9 +0,0 @@ -error: extraneous characters after `/`-prefixed, `%q`-suffixed identifier - --> :1:22 - | -1 | /foo%q /null%q /foo%qua - | ^^ - | -- help: maybe you meant to include a space here - | - -error: aborting due to previous error diff --git a/ilex/tests/ui/goldens/not_utf8.stdout b/ilex/tests/ui/goldens/not_utf8.stdout deleted file mode 100644 index 02375b9..0000000 --- a/ilex/tests/ui/goldens/not_utf8.stdout +++ /dev/null @@ -1,3 +0,0 @@ -error: input file `tests/ui/not_utf8` was not valid UTF-8 - -error: aborting due to previous error diff --git a/ilex/tests/ui/main.rs b/ilex/tests/ui/main.rs index c57cad8..735e4c2 100644 --- a/ilex/tests/ui/main.rs +++ b/ilex/tests/ui/main.rs @@ -1,6 +1,253 @@ -mod ambiguous; -mod digital; -mod eof; -mod new_file; -mod too_small; -mod unrecognized; +use ilex::report::Options; +use ilex::rule::*; +use ilex::Context; +use ilex::Lexeme; + +#[gilded::test("tests/ui/ambiguous/*.txt")] +fn ambiguous(test: &gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule("null")] + kw: Lexeme, + #[rule("-null")] + kw2: Lexeme, + #[rule(")null")] + kw3: Lexeme, + + #[rule(Comment::nesting(Bracket::rust_style( + "/", + ("-", ""), + ("", "-"), + )))] + cm: Lexeme, + #[rule(Comment::nesting(Bracket::cxx_style( + Ident::new().min_len(1), + ("--", ""), + ("", ""), + )))] + cm2: Lexeme, + #[rule(Bracket::cxx_style( + Ident::new(), + ("$", "["), + ("]", ""), + ))] + br: Lexeme, + #[rule(Ident::new() + .prefix("/") + .suffixes(["", "%q", "/"]))] + id: Lexeme, + #[rule(Digital::new(10) + .prefixes(["", "%"]) + .suffixes(["", "%", "q", "/"]))] + nm: Lexeme, + #[rule(Quoted::new("'") + .prefixes(["%", "q"]) + .suffixes(["", "%", "q"]))] + st: Lexeme, + #[rule(Quoted::with(Bracket::cxx_style( + Ident::new(), + ("q", "("), + (")", ""), + )))] + st2: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} + +#[gilded::test("tests/ui/digital/*.txt")] +fn digital(test: &gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule(Digital::new(16).prefix("0x"))] + m1: Lexeme, + #[rule(Digital::new(8).prefix("0o"))] + m2: Lexeme, + + #[rule( Digital::new(10) + .point_limit(2..3) + .point('/') + .exponent("e", Digits::new(10).point_limit(1..2)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: true, + around_exp: true, + }))] + m0: Lexeme, + #[rule(Digital::new(10) + .prefix("all_ok@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: true, + around_exp: true, + }))] + n0: Lexeme, + #[rule( Digital::new(10) + .prefix("no_prefix@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: false, + suffix: true, + around_point: true, + around_exp: true, + }))] + n1: Lexeme, + #[rule(Digital::new(10) + .prefix("no_suffix@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: false, + around_point: true, + around_exp: true, + }))] + n2: Lexeme, + #[rule( Digital::new(10) + .prefix("no_point@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: false, + around_exp: true, + }))] + n3: Lexeme, + #[rule(Digital::new(10) + .prefix("no_exp@") + .point_limit(0..3) + .exponent("e", Digits::new(10).point_limit(0..3)) + .separator_with("_", + SeparatorCornerCases { + prefix: true, + suffix: true, + around_point: true, + around_exp: false, + }))] + n4: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} + +#[gilded::test("tests/ui/eof/*.txt")] +fn eof(test: &gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule("/*", "*/")] + c1: Lexeme, + + #[rule("[", "]")] + b1: Lexeme, + + #[rule("(", ")")] + b2: Lexeme, + + #[rule(Quoted::new("'"))] + q1: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} + +#[gilded::test("tests/ui/too_small/*.txt")] +fn too_small(test: &gilded::Test) { + #[ilex::spec] + struct Spec { + #[rule(Ident::new().prefix("%"))] + i1: Lexeme, + #[rule(Ident::new().prefix("$").min_len(3))] + i2: Lexeme, + + #[rule(Bracket::rust_style("#", ("r#", "'"), ("'#", "")))] + r1: Lexeme, + #[rule(Bracket::rust_style("#", ("q###", "'"), ("'###", "")))] + r2: Lexeme, + + #[rule(Bracket::cxx_style(Ident::new().min_len(1), ("R'", "("), (")", "'")))] + c1: Lexeme, + #[rule(Bracket::cxx_style(Ident::new().min_len(3), ("Q'", "("), (")", "'")))] + c2: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} + +#[gilded::test("tests/ui/unrecognized/*.txt")] +fn unrecognized(test: &gilded::Test) { + #[ilex::spec] + struct Spec { + null: Lexeme, + + #[rule("[", "]")] + cm: Lexeme, + } + + let ctx = Context::new(); + let report = + ctx.new_report_with(Options { color: false, ..Default::default() }); + let file = ctx + .new_file_from_bytes(test.path(), test.text(), &report) + .unwrap(); + + let [tokens, stderr] = test.outputs(["tokens.yaml", "stderr"]); + match file.lex(Spec::get().spec(), &report) { + Ok(stream) => tokens(stream.summary()), + Err(fatal) => stderr(fatal.to_string()), + } +} diff --git a/ilex/tests/ui/new_file.rs b/ilex/tests/ui/new_file.rs deleted file mode 100644 index 44916ba..0000000 --- a/ilex/tests/ui/new_file.rs +++ /dev/null @@ -1,20 +0,0 @@ -use ilex::testing; -use ilex::Context; - -#[test] -fn does_not_exist() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx.open_file("does_not_exist", &report); - - testing::check_report(&report, "tests/ui/goldens/does_not_exist.stdout"); -} - -#[test] -fn not_utf8() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx.open_file("tests/ui/not_utf8", &report); - - testing::check_report(&report, "tests/ui/goldens/not_utf8.stdout"); -} diff --git a/ilex/tests/ui/not_utf8 b/ilex/tests/ui/not_utf8 deleted file mode 100644 index ce542ef..0000000 --- a/ilex/tests/ui/not_utf8 +++ /dev/null @@ -1 +0,0 @@ -˙ \ No newline at end of file diff --git a/ilex/tests/ui/too_small.rs b/ilex/tests/ui/too_small.rs deleted file mode 100644 index abade6f..0000000 --- a/ilex/tests/ui/too_small.rs +++ /dev/null @@ -1,61 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - #[rule(Ident::new().prefix("%"))] - i1: Lexeme, - #[rule(Ident::new().prefix("$").min_len(3))] - i2: Lexeme, - - #[rule(Bracket::rust_style("#", ("r#", "'"), ("'#", "")))] - r1: Lexeme, - #[rule(Bracket::rust_style("#", ("q###", "'"), ("'###", "")))] - r2: Lexeme, - - #[rule(Bracket::cxx_style(Ident::new().min_len(1), ("R'", "("), (")", "'")))] - c1: Lexeme, - #[rule(Bracket::cxx_style(Ident::new().min_len(3), ("Q'", "("), (")", "'")))] - c2: Lexeme, -} - -#[test] -fn ident_too_small() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "%foo $bar % $oo") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/ident_too_small.stdout"); -} - -#[test] -fn rust_string_hashes_too_small() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "r#'foo'# r'foo' q###'bar'### q##'bar'##") - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/rust_string_hashes_too_small.stdout", - ); -} - -#[test] -fn cxx_string_tag_too_small() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo'") - .lex(Spec::get().spec(), &report); - - testing::check_report( - &report, - "tests/ui/goldens/cxx_string_tag_too_small.stdout", - ); -} diff --git a/ilex/tests/ui/goldens/cxx_string_tag_too_small.stdout b/ilex/tests/ui/too_small/cxx_tag.stderr similarity index 73% rename from ilex/tests/ui/goldens/cxx_string_tag_too_small.stdout rename to ilex/tests/ui/too_small/cxx_tag.stderr index c206acd..a68c080 100644 --- a/ilex/tests/ui/goldens/cxx_string_tag_too_small.stdout +++ b/ilex/tests/ui/too_small/cxx_tag.stderr @@ -1,16 +1,18 @@ error: expected at least 1 character in identifier, but found none - --> :1:14 + --> too_small/cxx_tag.txt:1:14 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' | ^ expected at least 1 here | = help: this appears to be an empty identifier + = note: reported at: ilex/src/rt/emit2.rs:223:14 error: expected at least 3 characters in identifier, but found only 2 - --> :1:38 + --> too_small/cxx_tag.txt:1:38 | 1 | R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' | ^^ expected at least 3 here | + = note: reported at: ilex/src/rt/emit2.rs:223:14 error: aborting due to 2 errors diff --git a/ilex/tests/ui/too_small/cxx_tag.txt b/ilex/tests/ui/too_small/cxx_tag.txt new file mode 100644 index 0000000..03beed6 --- /dev/null +++ b/ilex/tests/ui/too_small/cxx_tag.txt @@ -0,0 +1 @@ +R'c(foo)c' R'(foo)' Q'foo(bar)foo' Q'oo(bar)oo' diff --git a/ilex/tests/ui/goldens/ident_too_small.stdout b/ilex/tests/ui/too_small/ident.stderr similarity index 69% rename from ilex/tests/ui/goldens/ident_too_small.stdout rename to ilex/tests/ui/too_small/ident.stderr index 069ce18..449d5de 100644 --- a/ilex/tests/ui/goldens/ident_too_small.stdout +++ b/ilex/tests/ui/too_small/ident.stderr @@ -1,8 +1,9 @@ error: expected at least 3 characters in identifier, but found only 2 - --> :1:13 + --> too_small/ident.txt:1:13 | 1 | %foo $bar % $oo | ^^^ expected at least 3 here | + = note: reported at: ilex/src/rt/emit2.rs:315:28 error: aborting due to previous error diff --git a/ilex/tests/ui/too_small/ident.txt b/ilex/tests/ui/too_small/ident.txt new file mode 100644 index 0000000..9734547 --- /dev/null +++ b/ilex/tests/ui/too_small/ident.txt @@ -0,0 +1 @@ +%foo $bar % $oo diff --git a/ilex/tests/ui/goldens/rust_string_hashes_too_small.stdout b/ilex/tests/ui/too_small/rust_hashes.stderr similarity index 62% rename from ilex/tests/ui/goldens/rust_string_hashes_too_small.stdout rename to ilex/tests/ui/too_small/rust_hashes.stderr index 9573ce3..914b334 100644 --- a/ilex/tests/ui/goldens/rust_string_hashes_too_small.stdout +++ b/ilex/tests/ui/too_small/rust_hashes.stderr @@ -1,22 +1,25 @@ error: unrecognized characters - --> :1:10 + --> too_small/rust_hashes.txt:1:10 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## | ^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unexpected closing `'##` - --> :1:37 + --> too_small/rust_hashes.txt:1:37 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## | ^^^ expected to be opened by `r##'` | + = note: reported at: ilex/src/rt/emit2.rs:254:22 error: unrecognized characters - --> :1:30 + --> too_small/rust_hashes.txt:1:30 | 1 | r#'foo'# r'foo' q###'bar'### q##'bar'## | ^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: aborting due to 3 errors diff --git a/ilex/tests/ui/too_small/rust_hashes.txt b/ilex/tests/ui/too_small/rust_hashes.txt new file mode 100644 index 0000000..fd4ef1d --- /dev/null +++ b/ilex/tests/ui/too_small/rust_hashes.txt @@ -0,0 +1 @@ +r#'foo'# r'foo' q###'bar'### q##'bar'## diff --git a/ilex/tests/ui/unrecognized.rs b/ilex/tests/ui/unrecognized.rs deleted file mode 100644 index 327026b..0000000 --- a/ilex/tests/ui/unrecognized.rs +++ /dev/null @@ -1,23 +0,0 @@ -use ilex::rule::*; -use ilex::testing; -use ilex::Context; -use ilex::Lexeme; - -#[ilex::spec] -struct Spec { - null: Lexeme, - - #[rule("[", "]")] - cm: Lexeme, -} - -#[test] -fn unrecognized() { - let ctx = Context::new(); - let report = ctx.new_report(); - let _ = ctx - .new_file("", "multiple, null, [unrecognized], chunks!~ ") - .lex(Spec::get().spec(), &report); - - testing::check_report(&report, "tests/ui/goldens/unrecognized.stdout"); -} diff --git a/ilex/tests/ui/goldens/unrecognized.stdout b/ilex/tests/ui/unrecognized/unrecognized.stderr similarity index 62% rename from ilex/tests/ui/goldens/unrecognized.stdout rename to ilex/tests/ui/unrecognized/unrecognized.stderr index b9d557c..b5d8944 100644 --- a/ilex/tests/ui/goldens/unrecognized.stdout +++ b/ilex/tests/ui/unrecognized/unrecognized.stderr @@ -1,36 +1,41 @@ error: unrecognized characters - --> :1:1 + --> unrecognized.txt:1:1 | 1 | multiple, null, [unrecognized], chunks!~ | ^^^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized character - --> :1:15 + --> unrecognized.txt:1:15 | 1 | multiple, null, [unrecognized], chunks!~ | ^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized characters - --> :1:18 + --> unrecognized.txt:1:18 | 1 | multiple, null, [unrecognized], chunks!~ | ^^^^^^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized character - --> :1:31 + --> unrecognized.txt:1:31 | 1 | multiple, null, [unrecognized], chunks!~ | ^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: unrecognized characters - --> :1:33 + --> unrecognized.txt:1:33 | 1 | multiple, null, [unrecognized], chunks!~ | ^^^^^^^^ | + = note: reported at: ilex/src/rt/mod.rs:36:8 error: aborting due to 5 errors diff --git a/ilex/tests/ui/unrecognized/unrecognized.txt b/ilex/tests/ui/unrecognized/unrecognized.txt new file mode 100644 index 0000000..449b03a --- /dev/null +++ b/ilex/tests/ui/unrecognized/unrecognized.txt @@ -0,0 +1 @@ +multiple, null, [unrecognized], chunks!~ diff --git a/proc2decl/Cargo.toml b/proc2decl/Cargo.toml new file mode 100644 index 0000000..30ddd97 --- /dev/null +++ b/proc2decl/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "proc2decl" +version = "0.1.0" +description = "Write less frickin' proc macro code" + +edition.workspace = true +authors.workspace = true +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true + +[dependencies] +nu-glob = "0.101.0" +unicode-xid = "0.2.6" +walkdir = "2.5.0" diff --git a/proc2decl/README.md b/proc2decl/README.md new file mode 100644 index 0000000..5177891 --- /dev/null +++ b/proc2decl/README.md @@ -0,0 +1,28 @@ +# proc2decl + +`proc2decl` exists for one reason only: because proc macros are a toxic +ecosystem. + +Sometimes, you want to use an attribute to define a macro. Unfortunately, +Rust does not support declarative macros (also called macros-by-example) +for attributes, for reasons that essentially boil down to cookie-licking. + +This crate exists for one purpose only, and that is ot facilitate writing +declarative macros that an attribute converts into. + +## How Uo Use + +1. Define the macro-by-example you wish to use as the main implementation of + your attribute or derive. + +2. Crate a proc-macro crate. This is where the documentation for your + attribute will need to live. Your actual crate should depend on this + crate. + +3. Use `bridge!()` to define your bridge proc macros. These + macro calls should be documented, since their doc comments are the ones + that will appear in rustdoc for your macros. + +4. `pub use` the macros in your actual crate. + +Proc macros suck! diff --git a/proc2decl/src/lib.rs b/proc2decl/src/lib.rs new file mode 100644 index 0000000..4940df5 --- /dev/null +++ b/proc2decl/src/lib.rs @@ -0,0 +1,434 @@ +//! `proc2decl` exists for one reason only: because proc macros are a toxic +//! ecosystem. +//! +//! Sometimes, you want to use an attribute to define a macro. Unfortunately, +//! Rust does not support declarative macros (also called macros-by-example) +//! for attributes, for reasons that essentially boil down to cookie-licking. +//! +//! This crate exists for one purpose only, and that is ot facilitate writing +//! declarative macros that an attribute converts into. +//! +//! # How Uo Use +//! +//! 1. Define the macro-by-example you wish to use as the main implementation of +//! your attribute or derive. +//! +//! 2. Crate a proc-macro crate. This is where the documentation for your +//! attribute will need to live. Your actual crate should depend on this +//! crate. +//! +//! 3. Use [`bridge!()`] to define your bridge proc macros. These +//! macro calls should be documented, since their doc comments are the ones +//! that will appear in rustdoc for your macros. +//! +//! 4. `pub use` the macros in your actual crate. +//! +//! Proc macros suck! + +pub extern crate proc_macro; + +use std::collections::HashMap; +use std::env; +use std::ffi::OsStr; +use std::fs; +use std::path::Component; +use std::path::PathBuf; +use std::sync::atomic::AtomicU64; +use std::sync::atomic::Ordering; + +use nu_glob::Pattern; +use proc_macro::Delimiter; +use proc_macro::Group; +use proc_macro::Ident; +use proc_macro::Literal; +use proc_macro::Punct; +use proc_macro::Spacing; +use proc_macro::Span; +use proc_macro::TokenStream; +use proc_macro::TokenTree; +use walkdir::WalkDir; + +/// Defines a new attribute or derive proc macro that forwards to the given +/// function-like macro. +/// +/// # Attribute Macros +/// +/// The tokens passed to `$macro!()` will be `#[$name(...)]` containing the +/// arguments of the attribute, followed by the item passed to the macro by +/// rustc. Like all other attribute macros, it will replace the annotated +/// item with the result of evaluating the macro, in this case a call to the +/// actual macro-by-example that implements it. +/// +/// ```ignore +/// macro_rules! __impl { +/// (#[my_macro] const $name:ident: $ty:ty = $expr:expr;) => {/* ... */} +/// } +/// +/// proc2decl::bridge! { +/// // My cool macro. +/// macro #[my_macro] => my_crate::__impl; +/// } +/// ``` +/// +/// # Derive Macros +/// +/// The tokens passed to `$macro!()` will be the item passed to the macro by +/// rustc. Like all other derive macros, it will insert the result of evaluating +/// the macro immediately after the annotated item, in this case a call to the +/// actual macro-by-example that implements it. +/// +/// The `$attrs` are the names of inert helper attributes to define for +/// the derive. +/// +/// ```ignore +/// macro_rules! __impl { +/// (struct $name:ident {}) => {/* ... */} +/// } +/// +/// proc2decl::bridge! { +/// // My cool macro. +/// macro #[derive(MyMacro)], #[helper] => my_crate::__impl; +/// } +/// ``` +#[macro_export] +macro_rules! bridge { + ( + $(#[$attr:meta])* + macro #[$name:ident] => $crate_:ident::$macro:ident; + ) => { + $(#[$attr])* + #[proc_macro_attribute] + pub fn $name( + attr: $crate::proc_macro::TokenStream, + item: $crate::proc_macro::TokenStream, + ) -> $crate::proc_macro::TokenStream { + use $crate::proc_macro::*; + let span = Span::call_site(); + + $crate::attr_bridge( + stringify!($name), + stringify!($crate_), + stringify!($macro), + span, + attr, + item, + ) + } + }; + + ( + $(#[$attr:meta])* + macro #[derive($name:ident)] $(, #[$attrs:ident])* => $crate_:ident::$macro:ident + ) => { + $(#[$attr])* + #[proc_macro_derive($name, attributes($($attrs,)*))] + pub fn $name( + item: $crate::proc_macro::TokenStream, + ) -> $crate::proc_macro::TokenStream { + use $crate::proc_macro::*; + let span = Span::call_site(); + + $crate::derive_bridge( + stringify!($name), + stringify!($crate_), + stringify!($macro), + span, + item, + ) + } + }; +} + +/// Defines a new attribute proc macro that finds files matching a glob and +/// forwards the directory structure to the given function-like macro, in such +/// a way that a corresponding module structure can be defined using the +/// directory structure. +/// +/// The resulting attribute should be called as #[my_attr("glob", ...)], where +/// `glob` is a glob relative to the root of the crate the attribute appears in. +/// The glob will not match files across symlinks. +/// +/// The expanded-to macro will be called with the annotated item, followed by +/// token trees in the following form: +/// +/// ```ignore +/// foo { +/// bar { +/// baz("foo/bar/baz.txt", b"contents") +/// empty("foo/bar/empty.txt", b"contents") +/// } +/// bar2 { +/// boing("foo/bar2/boing.txt", b"contents") +/// } +/// } +/// ``` +/// +/// Any directories whose names contain identifiers that are not valid Rust +/// identifiers will be ignored. +#[macro_export] +macro_rules! fs_bridge { + ( + $(#[$attr:meta])* + macro #[$name:ident] => $crate_:ident::$macro:ident; + ) => { + $(#[$attr])* + #[proc_macro_attribute] + pub fn $name( + attr: $crate::proc_macro::TokenStream, + item: $crate::proc_macro::TokenStream, + ) -> $crate::proc_macro::TokenStream { + use $crate::proc_macro::*; + let span = Span::call_site(); + + $crate::dir_bridge( + stringify!($name), + stringify!($crate_), + stringify!($macro), + span, + attr, + item, + ) + } + }; +} + +static COUNTER: AtomicU64 = AtomicU64::new(0); + +#[doc(hidden)] +pub fn derive_bridge( + _name: &str, + crate_: &str, + macro_: &str, + span: Span, + item: TokenStream, +) -> TokenStream { + let extern_ = + format!("__extern{}_{}__", COUNTER.fetch_add(1, Ordering::Relaxed), crate_); + + stream([ + // extern crate $crate as __extern_$crate__; + Ident::new("extern", span).into(), + Ident::new("crate", span).into(), + Ident::new(crate_, span).into(), + Ident::new("as", span).into(), + Ident::new(&extern_, span).into(), + Punct::new(';', Spacing::Alone).into(), + // __extern_$crate__::$macro! { attr item } + Ident::new(&extern_, span).into(), + Punct::new(':', Spacing::Joint).into(), + Punct::new(':', Spacing::Alone).into(), + Ident::new(macro_, span).into(), + Punct::new('!', Spacing::Alone).into(), + Group::new(Delimiter::Brace, item).into(), + ]) +} + +#[doc(hidden)] +pub fn attr_bridge( + name: &str, + crate_: &str, + macro_: &str, + span: Span, + args: TokenStream, + mut item: TokenStream, +) -> TokenStream { + if !args.is_empty() { + item = stream2( + [ + // #[name(args)] + Punct::new('#', Spacing::Alone).into(), + Group::new( + Delimiter::Bracket, + stream([ + Ident::new(name, span).into(), + Group::new(Delimiter::Parenthesis, args).into(), + ]), + ) + .into(), + ], + item, + ); + } else { + item = stream2( + [ + // #[name] + Punct::new('#', Spacing::Alone).into(), + Group::new(Delimiter::Bracket, stream([Ident::new(name, span).into()])) + .into(), + ], + item, + ); + } + + derive_bridge(name, crate_, macro_, span, item) +} + +#[doc(hidden)] +pub fn dir_bridge( + name: &str, + crate_: &str, + macro_: &str, + span: Span, + args: TokenStream, + item: TokenStream, +) -> TokenStream { + let Some(TokenTree::Literal(lit)) = args.clone().into_iter().next() else { + panic!("#[{crate_}::{name}] requires a glob as its first argument"); + }; + + // TODO(mcyoung): support all Rust string literals. + let lit = lit.to_string(); + if !lit.starts_with('"') || !lit.starts_with('"') || lit.contains('\\') { + panic!("#[{crate_}::{name}] only supports single-quoted string literals without escapes"); + } + let glob = match Pattern::new(&lit[1..lit.len() - 1]) { + Ok(p) => p, + Err(e) => { + panic!("#[{crate_}::{name}] requires a glob as its first argument: {e}") + } + }; + + struct File { + path: String, + components: Vec, + contents: Vec, + } + + let mut names = Vec::new(); + let mut table = HashMap::new(); + let mut push_name = |name: &OsStr| -> Option { + let utf8 = name.to_str()?; + if !is_valid_ident(utf8) { + return None; + } + + Some(*table.entry(utf8.to_string()).or_insert_with_key(|k| { + let n = names.len(); + names.push(k.clone()); + n + })) + }; + + let mut files = Vec::new(); + let root = PathBuf::from(env::var_os("CARGO_MANIFEST_DIR").unwrap()); + 'walk: for entry in WalkDir::new(&root) { + let entry = match entry { + Ok(p) => p, + Err(e) => panic!("directory walk failed: {e}"), + }; + + let path = entry.path(); + if path.is_dir() { + continue 'walk; + } + + let rel = path.strip_prefix(&root).unwrap(); + if !glob.matches_path(rel) { + continue 'walk; + } + + let mut components = Vec::new(); + if let Some(parent) = rel.parent() { + for component in parent.components() { + let Component::Normal(component) = component else { + continue 'walk; + }; + let Some(name) = push_name(component) else { + continue 'walk; + }; + components.push(name); + } + } + + let Some(name) = push_name(path.file_stem().unwrap()) else { + continue 'walk; + }; + components.push(name); + + let Some(utf8) = path.as_os_str().to_str() else { + continue 'walk; + }; + + let contents = match fs::read(path) { + Ok(bytes) => bytes, + Err(e) => panic!("could not open file: {e}"), + }; + + files.push(File { + path: utf8.to_string(), + components, + contents, + }); + } + files.sort_by(|a, b| Ord::cmp(&a.components, &b.components)); + + let mut mod_stack: Vec> = vec![item.into_iter().collect()]; + let mut dir_stack = &[][..]; + for file in &files { + let dir = &file.components[..file.components.len() - 1]; + let [_, remove, add] = common_prefix(dir_stack, dir); + for &i in remove { + let items = mod_stack.pop().unwrap(); + mod_stack.last_mut().unwrap().extend_from_slice(&[ + Ident::new(&names[i], span).into(), + Group::new(Delimiter::Brace, items.into_iter().collect()).into(), + ]); + } + for _ in add { + mod_stack.push(Vec::new()); + } + dir_stack = dir; + + let name = &names[*file.components.last().unwrap()]; + mod_stack.last_mut().unwrap().extend_from_slice(&[ + Ident::new(name, span).into(), + Group::new( + Delimiter::Parenthesis, + stream([ + Literal::string(&file.path).into(), + Punct::new(',', Spacing::Alone).into(), + Literal::byte_string(&file.contents).into(), + ]), + ) + .into(), + ]); + } + + attr_bridge( + name, + crate_, + macro_, + span, + args, + mod_stack.swap_remove(0).into_iter().collect(), + ) +} + +fn common_prefix<'a, T: PartialEq>(a: &'a [T], b: &'a [T]) -> [&'a [T]; 3] { + for (i, (x, y)) in a.iter().zip(b).enumerate() { + if x != y { + return [&a[..i], &a[i..], &b[i..]]; + } + } + [a, &[], &[]] +} + +fn is_valid_ident(name: &str) -> bool { + use unicode_xid::UnicodeXID as _; + // See https://doc.rust-lang.org/reference/identifiers.html + name.chars().enumerate().all(|(i, c)| { + if i == 0 { + c == '_' || c.is_xid_start() + } else { + c.is_xid_continue() + } + }) +} + +fn stream(tt: [TokenTree; N]) -> TokenStream { + tt.into_iter().collect() +} + +fn stream2(tt: [TokenTree; N], ts: TokenStream) -> TokenStream { + tt.into_iter().chain(ts).collect() +}