Skip to content

Commit 73e3c86

Browse files
add list interpreter in nom-rust
1 parent b2ea463 commit 73e3c86

File tree

2 files changed

+361
-0
lines changed

2 files changed

+361
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
[package]
2+
name = "lisp-interpreter-in-nom"
3+
version = "0.1.0"
4+
authors = ["Yu Chen <[email protected]>"]
5+
edition = "2018"
6+
7+
[dependencies]
8+
nom = "5.0"
9+
jemallocator = "^0.1"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,352 @@
1+
//! In this example we build an [S-expression](https://en.wikipedia.org/wiki/S-expression)
2+
//! parser and tiny [lisp](https://en.wikipedia.org/wiki/Lisp_(programming_language)) interpreter.
3+
//! Lisp is a simple type of language made up of Atoms and Lists, forming easily parsable trees.
4+
5+
extern crate jemallocator;
6+
extern crate nom;
7+
8+
#[global_allocator]
9+
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
10+
11+
use nom::{
12+
branch::alt,
13+
bytes::complete::tag,
14+
character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
15+
combinator::{cut, map, map_res, opt},
16+
error::{context, VerboseError},
17+
multi::many0,
18+
sequence::{delimited, preceded, terminated, tuple},
19+
IResult,
20+
};
21+
22+
/// We start by defining the types that define the shape of data that we want.
23+
/// In this case, we want something tree-like
24+
25+
/// Starting from the most basic, we define some built-in functions that our lisp has
26+
#[derive(Debug, PartialEq, Clone, Copy)]
27+
pub enum BuiltIn {
28+
Plus,
29+
Minus,
30+
Times,
31+
Divide,
32+
Equal,
33+
Not,
34+
}
35+
36+
/// We now wrap this type and a few other primitives into our Atom type.
37+
/// Remember from before that Atoms form one half of our language.
38+
39+
#[derive(Debug, PartialEq, Clone)]
40+
pub enum Atom {
41+
Num(i32),
42+
Keyword(String),
43+
Boolean(bool),
44+
BuiltIn(BuiltIn),
45+
}
46+
47+
/// The remaining half is Lists. We implement these as recursive Expressions.
48+
/// For a list of numbers, we have `'(1 2 3)`, which we'll parse to:
49+
/// ```
50+
/// Expr::Quote(vec![Expr::Constant(Atom::Num(1)),
51+
/// Expr::Constant(Atom::Num(2)),
52+
/// Expr::Constant(Atom::Num(3))])
53+
/// Quote takes an S-expression and prevents evaluation of it, making it a data
54+
/// structure that we can deal with programmatically. Thus any valid expression
55+
/// is also a valid data structure in Lisp itself.
56+
57+
#[derive(Debug, PartialEq, Clone)]
58+
pub enum Expr {
59+
Constant(Atom),
60+
/// (func-name arg1 arg2)
61+
Application(Box<Expr>, Vec<Expr>),
62+
/// (if predicate do-this)
63+
If(Box<Expr>, Box<Expr>),
64+
/// (if predicate do-this otherwise-do-this)
65+
IfElse(Box<Expr>, Box<Expr>, Box<Expr>),
66+
/// '(3 (if (+ 3 3) 4 5) 7)
67+
Quote(Vec<Expr>),
68+
}
69+
70+
/// Continuing the trend of starting from the simplest piece and building up,
71+
/// we start by creating a parser for the built-in operator functions.
72+
fn parse_builtin_op<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
73+
// one_of matches one of the characters we give it
74+
let (i, t) = one_of("+-*/=")(i)?;
75+
76+
// because we are matching single character tokens, we can do the matching logic
77+
// on the returned value
78+
Ok((
79+
i,
80+
match t {
81+
'+' => BuiltIn::Plus,
82+
'-' => BuiltIn::Minus,
83+
'*' => BuiltIn::Times,
84+
'/' => BuiltIn::Divide,
85+
'=' => BuiltIn::Equal,
86+
_ => unreachable!(),
87+
},
88+
))
89+
}
90+
91+
fn parse_builtin<'a>(i: &'a str) -> IResult<&'a str, BuiltIn, VerboseError<&'a str>> {
92+
// alt gives us the result of first parser that succeeds, of the series of
93+
// parsers we give it
94+
alt((
95+
parse_builtin_op,
96+
// map lets us process the parsed output, in this case we know what we parsed,
97+
// so we ignore the input and return the BuiltIn directly
98+
map(tag("not"), |_| BuiltIn::Not),
99+
))(i)
100+
}
101+
102+
/// Our boolean values are also constant, so we can do it the same way
103+
fn parse_bool<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
104+
alt((map(tag("#t"), |_| Atom::Boolean(true)), map(tag("#f"), |_| Atom::Boolean(false))))(i)
105+
}
106+
107+
/// The next easiest thing to parse are keywords.
108+
/// We introduce some error handling combinators: `context` for human readable errors
109+
/// and `cut` to prevent back-tracking.
110+
///
111+
/// Put plainly: `preceded(tag(":"), cut(alpha1))` means that once we see the `:`
112+
/// character, we have to see one or more alphabetic chararcters or the input is invalid.
113+
fn parse_keyword<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
114+
map(context("keyword", preceded(tag(":"), cut(alpha1))), |sym_str: &str| {
115+
Atom::Keyword(sym_str.to_string())
116+
})(i)
117+
}
118+
119+
/// Next up is number parsing. We're keeping it simple here by accepting any number (> 1)
120+
/// of digits but ending the program if it doesn't fit into an i32.
121+
fn parse_num<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
122+
alt((
123+
map_res(digit1, |digit_str: &str| digit_str.parse::<i32>().map(Atom::Num)),
124+
map(preceded(tag("-"), digit1), |digit_str: &str| {
125+
Atom::Num(-1 * digit_str.parse::<i32>().unwrap())
126+
}),
127+
))(i)
128+
}
129+
130+
/// Now we take all these simple parsers and connect them.
131+
/// We can now parse half of our language!
132+
fn parse_atom<'a>(i: &'a str) -> IResult<&'a str, Atom, VerboseError<&'a str>> {
133+
alt((parse_num, parse_bool, map(parse_builtin, Atom::BuiltIn), parse_keyword))(i)
134+
}
135+
136+
/// We then add the Expr layer on top
137+
fn parse_constant<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
138+
map(parse_atom, |atom| Expr::Constant(atom))(i)
139+
}
140+
141+
/// Before continuing, we need a helper function to parse lists.
142+
/// A list starts with `(` and ends with a matching `)`.
143+
/// By putting whitespace and newline parsing here, we can avoid having to worry about it
144+
/// in much of the rest of the parser.
145+
///
146+
/// Unlike the previous functions, this function doesn't take or consume input, instead it
147+
/// takes a parsing function and returns a new parsing function.
148+
fn s_exp<'a, O1, F>(inner: F) -> impl Fn(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>>
149+
where
150+
F: Fn(&'a str) -> IResult<&'a str, O1, VerboseError<&'a str>>,
151+
{
152+
delimited(
153+
char('('),
154+
preceded(multispace0, inner),
155+
context("closing paren", cut(preceded(multispace0, char(')')))),
156+
)
157+
}
158+
159+
/// We can now use our new combinator to define the rest of the `Expr`s.
160+
///
161+
/// Starting with function application, we can see how the parser mirrors our data
162+
/// definitions: our definition is `Application(Box<Expr>, Vec<Expr>)`, so we know
163+
/// that we need to parse an expression and then parse 0 or more expressions, all
164+
/// wrapped in an S-expression.
165+
///
166+
/// `tuple` is used to sequence parsers together, so we can translate this directly
167+
/// and then map over it to transform the output into an `Expr::Application`
168+
fn parse_application<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
169+
let application_inner = map(tuple((parse_expr, many0(parse_expr))), |(head, tail)| {
170+
Expr::Application(Box::new(head), tail)
171+
});
172+
// finally, we wrap it in an s-expression
173+
s_exp(application_inner)(i)
174+
}
175+
176+
/// Because `Expr::If` and `Expr::IfElse` are so similar (we easily could have
177+
/// defined `Expr::If` to have an `Option` for the else block), we parse both
178+
/// in a single function.
179+
///
180+
/// In fact, we define our parser as if `Expr::If` was defined with an Option in it,
181+
/// we have the `opt` combinator which fits very nicely here.
182+
fn parse_if<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
183+
let if_inner = context(
184+
"if expression",
185+
map(
186+
preceded(
187+
// here to avoid ambiguity with other names starting with `if`, if we added
188+
// variables to our language, we say that if must be terminated by at least
189+
// one whitespace character
190+
terminated(tag("if"), multispace1),
191+
cut(tuple((parse_expr, parse_expr, opt(parse_expr)))),
192+
),
193+
|(pred, true_branch, maybe_false_branch)| {
194+
if let Some(false_branch) = maybe_false_branch {
195+
Expr::IfElse(Box::new(pred), Box::new(true_branch), Box::new(false_branch))
196+
} else {
197+
Expr::If(Box::new(pred), Box::new(true_branch))
198+
}
199+
},
200+
),
201+
);
202+
s_exp(if_inner)(i)
203+
}
204+
205+
/// A quoted S-expression is list data structure.
206+
///
207+
/// This example doesn't have the symbol atom, but by adding variables and changing
208+
/// the definition of quote to not always be around an S-expression, we'd get them
209+
/// naturally.
210+
fn parse_quote<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
211+
// this should look very straight-forward after all we've done:
212+
// we find the `'` (quote) character, use cut to say that we're unambiguously
213+
// looking for an s-expression of 0 or more expressions, and then parse them
214+
map(context("quote", preceded(tag("'"), cut(s_exp(many0(parse_expr))))), |exprs| {
215+
Expr::Quote(exprs)
216+
})(i)
217+
}
218+
219+
/// We tie them all together again, making a top-level expression parser!
220+
221+
fn parse_expr<'a>(i: &'a str) -> IResult<&'a str, Expr, VerboseError<&'a str>> {
222+
preceded(multispace0, alt((parse_constant, parse_application, parse_if, parse_quote)))(i)
223+
}
224+
225+
/// And that's it!
226+
/// We can now parse our entire lisp language.
227+
///
228+
/// But in order to make it a little more interesting, we can hack together
229+
/// a little interpreter to take an Expr, which is really an
230+
/// [Abstract Syntax Tree](https://en.wikipedia.org/wiki/Abstract_syntax_tree) (AST),
231+
/// and give us something back
232+
233+
/// To start we define a couple of helper functions
234+
fn get_num_from_expr(e: Expr) -> Option<i32> {
235+
if let Expr::Constant(Atom::Num(n)) = e {
236+
Some(n)
237+
} else {
238+
None
239+
}
240+
}
241+
242+
fn get_bool_from_expr(e: Expr) -> Option<bool> {
243+
if let Expr::Constant(Atom::Boolean(b)) = e {
244+
Some(b)
245+
} else {
246+
None
247+
}
248+
}
249+
250+
/// This function tries to reduce the AST.
251+
/// This has to return an Expression rather than an Atom because quoted s_expressions
252+
/// can't be reduced
253+
fn eval_expression(e: Expr) -> Option<Expr> {
254+
match e {
255+
// Constants and quoted s-expressions are our base-case
256+
Expr::Constant(_) | Expr::Quote(_) => Some(e),
257+
// we then recursively `eval_expression` in the context of our special forms
258+
// and built-in operators
259+
Expr::If(pred, true_branch) => {
260+
let reduce_pred = eval_expression(*pred)?;
261+
if get_bool_from_expr(reduce_pred)? {
262+
eval_expression(*true_branch)
263+
} else {
264+
None
265+
}
266+
}
267+
Expr::IfElse(pred, true_branch, false_branch) => {
268+
let reduce_pred = eval_expression(*pred)?;
269+
if get_bool_from_expr(reduce_pred)? {
270+
eval_expression(*true_branch)
271+
} else {
272+
eval_expression(*false_branch)
273+
}
274+
}
275+
Expr::Application(head, tail) => {
276+
let reduced_head = eval_expression(*head)?;
277+
let reduced_tail = tail.into_iter().map(|expr| eval_expression(expr)).collect::<Option<Vec<Expr>>>()?;
278+
if let Expr::Constant(Atom::BuiltIn(bi)) = reduced_head {
279+
Some(Expr::Constant(match bi {
280+
BuiltIn::Plus => Atom::Num(
281+
reduced_tail
282+
.into_iter()
283+
.map(get_num_from_expr)
284+
.collect::<Option<Vec<i32>>>()?
285+
.into_iter()
286+
.sum(),
287+
),
288+
BuiltIn::Times => Atom::Num(
289+
reduced_tail
290+
.into_iter()
291+
.map(get_num_from_expr)
292+
.collect::<Option<Vec<i32>>>()?
293+
.into_iter()
294+
.product(),
295+
),
296+
BuiltIn::Equal => Atom::Boolean(reduced_tail.iter().zip(reduced_tail.iter().skip(1)).all(|(a, b)| a == b)),
297+
BuiltIn::Not => {
298+
if reduced_tail.len() != 1 {
299+
return None;
300+
} else {
301+
Atom::Boolean(!get_bool_from_expr(reduced_tail.first().cloned().unwrap())?)
302+
}
303+
}
304+
BuiltIn::Minus => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
305+
let fe = get_num_from_expr(first_elem)?;
306+
reduced_tail
307+
.into_iter()
308+
.map(get_num_from_expr)
309+
.collect::<Option<Vec<i32>>>()?
310+
.into_iter()
311+
.skip(1)
312+
.fold(fe, |a, b| a - b)
313+
} else {
314+
Default::default()
315+
}),
316+
BuiltIn::Divide => Atom::Num(if let Some(first_elem) = reduced_tail.first().cloned() {
317+
let fe = get_num_from_expr(first_elem)?;
318+
reduced_tail
319+
.into_iter()
320+
.map(get_num_from_expr)
321+
.collect::<Option<Vec<i32>>>()?
322+
.into_iter()
323+
.skip(1)
324+
.fold(fe, |a, b| a / b)
325+
} else {
326+
Default::default()
327+
}),
328+
}))
329+
} else {
330+
None
331+
}
332+
}
333+
}
334+
}
335+
336+
/// And we add one more top-level function to tie everything together, letting
337+
/// us call eval on a string directly
338+
fn eval_from_str(src: &str) -> Result<Expr, String> {
339+
parse_expr(src)
340+
.map_err(|e: nom::Err<VerboseError<&str>>| format!("{:#?}", e))
341+
.and_then(|(_, exp)| eval_expression(exp).ok_or("Eval failed".to_string()))
342+
}
343+
344+
fn main() {
345+
let expression_1 = "((if (= (+ 3 (/ 9 3))
346+
(* 2 3))
347+
*
348+
/)
349+
456 123)";
350+
println!("\"{}\"\nevaled gives us: {:?}", expression_1, eval_from_str(expression_1));
351+
}
352+

0 commit comments

Comments
 (0)