Skip to content

Commit 9ba32ca

Browse files
add a lexer in rust
1 parent 73e3c86 commit 9ba32ca

File tree

6 files changed

+362
-0
lines changed

6 files changed

+362
-0
lines changed

rust/lexer/lexer-in-rust/Cargo.toml

+6
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
[package]
2+
name = "writing_an_interpreter_in_rust"
3+
version = "0.1.0"
4+
authors = ["Chris Aumann <[email protected]>"]
5+
6+
[dependencies]

rust/lexer/lexer-in-rust/README.md

+16
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Writing an interpreter in rust
2+
3+
This is an implementation of the Monkey interpreter from Thorsten Ball's book [Writing an interpreter in Go](https://interpreterbook.com).
4+
5+
[![Build Status](https://travis-ci.org/chr4/writing_an_interpreter_in_rust.svg?branch=master)](https://travis-ci.org/chr4/writing_an_interpreter_in_rust)
6+
7+
## Usage
8+
9+
```bash
10+
$ git clone https://github.com/chr4/writing_an_interpreter_in_rust
11+
$ cd writing_an_interpreter_in_rust
12+
$ cargo test
13+
$ cargo run
14+
```
15+
16+
For more information, see this [blog post](https://chr4.org/blog/2016/12/09/writing-an-interpreter-in-rust/).

rust/lexer/lexer-in-rust/src/lexer.rs

+245
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
use token;
2+
use token::Token;
3+
4+
use std::str::Chars;
5+
use std::iter::Peekable;
6+
7+
pub struct Lexer<'a> {
8+
input: Peekable<Chars<'a>>,
9+
}
10+
11+
impl<'a> Lexer<'a> {
12+
pub fn new(input: &str) -> Lexer {
13+
Lexer { input: input.chars().peekable() }
14+
}
15+
16+
fn read_char(&mut self) -> Option<char> {
17+
self.input.next()
18+
}
19+
20+
fn peek_char(&mut self) -> Option<&char> {
21+
self.input.peek()
22+
}
23+
24+
fn peek_char_eq(&mut self, ch: char) -> bool {
25+
match self.peek_char() {
26+
Some(&peek_ch) => peek_ch == ch,
27+
None => false,
28+
}
29+
}
30+
31+
fn skip_whitespace(&mut self) {
32+
while let Some(&c) = self.peek_char() {
33+
if !c.is_whitespace() {
34+
break;
35+
}
36+
self.read_char();
37+
}
38+
}
39+
40+
fn peek_is_letter(&mut self) -> bool {
41+
match self.peek_char() {
42+
Some(&ch) => is_letter(ch),
43+
None => false,
44+
}
45+
}
46+
47+
fn read_identifier(&mut self, first: char) -> String {
48+
let mut ident = String::new();
49+
ident.push(first);
50+
51+
while self.peek_is_letter() {
52+
ident.push(self.read_char().unwrap()); // TODO: unwrap()
53+
}
54+
55+
ident
56+
}
57+
58+
fn read_number(&mut self, first: char) -> String {
59+
let mut number = String::new();
60+
number.push(first);
61+
62+
while let Some(&c) = self.peek_char() {
63+
if !c.is_numeric() {
64+
break;
65+
}
66+
number.push(self.read_char().unwrap()); // TODO: unwrap()
67+
}
68+
69+
number
70+
}
71+
72+
pub fn next_token(&mut self) -> Token {
73+
self.skip_whitespace();
74+
75+
match self.read_char() {
76+
Some('=') => {
77+
if self.peek_char_eq('=') {
78+
self.read_char();
79+
Token::Equal
80+
} else {
81+
Token::Assign
82+
}
83+
}
84+
Some('+') => Token::Plus,
85+
Some('-') => Token::Minus,
86+
Some('!') => {
87+
if self.peek_char_eq('=') {
88+
self.read_char();
89+
Token::NotEqual
90+
} else {
91+
Token::Bang
92+
}
93+
}
94+
Some('/') => Token::Slash,
95+
Some('*') => Token::Asterisk,
96+
Some('<') => Token::LowerThan,
97+
Some('>') => Token::GreaterThan,
98+
Some(';') => Token::Semicolon,
99+
Some(',') => Token::Comma,
100+
Some('{') => Token::LeftBrace,
101+
Some('}') => Token::RightBrace,
102+
Some('(') => Token::LeftParenthesis,
103+
Some(')') => Token::RightParenthesis,
104+
105+
Some(ch @ _) => {
106+
if is_letter(ch) {
107+
let literal = self.read_identifier(ch);
108+
token::lookup_ident(&literal)
109+
} else if ch.is_numeric() {
110+
Token::Integer(self.read_number(ch))
111+
} else {
112+
Token::Illegal // TODO: Maybe we need ch here, to display a nice error message later?
113+
}
114+
}
115+
116+
// Handle EOF
117+
None => Token::EndOfFile,
118+
}
119+
}
120+
}
121+
122+
// is_letter checks whether a char is a valid alphabetic character or an underscore
123+
fn is_letter(ch: char) -> bool {
124+
ch.is_alphabetic() || ch == '_'
125+
}
126+
127+
#[test]
128+
fn is_letter_test() {
129+
assert!(is_letter('_'));
130+
assert!(is_letter('a'));
131+
assert!(is_letter('Z'));
132+
133+
assert!(!is_letter('*'));
134+
assert!(!is_letter('1'));
135+
}
136+
137+
138+
#[test]
139+
fn next_token_test() {
140+
141+
#[cfg_attr(rustfmt, rustfmt_skip)]
142+
let input = "let five = 5;
143+
let ten = 10;
144+
145+
let add = fn(x, y) {
146+
x + y;
147+
};
148+
149+
let result = add(five, ten);
150+
!-/*5;
151+
5 < 10 > 5;
152+
153+
if (5 < 10) {
154+
return true;
155+
} else {
156+
return false;
157+
}
158+
159+
10 == 10;
160+
10 != 9;
161+
";
162+
163+
let tests = vec![
164+
Token::Let,
165+
Token::Ident("five".to_string()),
166+
Token::Assign,
167+
Token::Integer("5".to_string()),
168+
Token::Semicolon,
169+
Token::Let,
170+
Token::Ident("ten".to_string()),
171+
Token::Assign,
172+
Token::Integer("10".to_string()),
173+
Token::Semicolon,
174+
Token::Let,
175+
Token::Ident("add".to_string()),
176+
Token::Assign,
177+
Token::Function,
178+
Token::LeftParenthesis,
179+
Token::Ident("x".to_string()),
180+
Token::Comma,
181+
Token::Ident("y".to_string()),
182+
Token::RightParenthesis,
183+
Token::LeftBrace,
184+
Token::Ident("x".to_string()),
185+
Token::Plus,
186+
Token::Ident("y".to_string()),
187+
Token::Semicolon,
188+
Token::RightBrace,
189+
Token::Semicolon,
190+
Token::Let,
191+
Token::Ident("result".to_string()),
192+
Token::Assign,
193+
Token::Ident("add".to_string()),
194+
Token::LeftParenthesis,
195+
Token::Ident("five".to_string()),
196+
Token::Comma,
197+
Token::Ident("ten".to_string()),
198+
Token::RightParenthesis,
199+
Token::Semicolon,
200+
Token::Bang,
201+
Token::Minus,
202+
Token::Slash,
203+
Token::Asterisk,
204+
Token::Integer("5".to_string()),
205+
Token::Semicolon,
206+
Token::Integer("5".to_string()),
207+
Token::LowerThan,
208+
Token::Integer("10".to_string()),
209+
Token::GreaterThan,
210+
Token::Integer("5".to_string()),
211+
Token::Semicolon,
212+
Token::If,
213+
Token::LeftParenthesis,
214+
Token::Integer("5".to_string()),
215+
Token::LowerThan,
216+
Token::Integer("10".to_string()),
217+
Token::RightParenthesis,
218+
Token::LeftBrace,
219+
Token::Return,
220+
Token::True,
221+
Token::Semicolon,
222+
Token::RightBrace,
223+
Token::Else,
224+
Token::LeftBrace,
225+
Token::Return,
226+
Token::False,
227+
Token::Semicolon,
228+
Token::RightBrace,
229+
Token::Integer("10".to_string()),
230+
Token::Equal,
231+
Token::Integer("10".to_string()),
232+
Token::Semicolon,
233+
Token::Integer("10".to_string()),
234+
Token::NotEqual,
235+
Token::Integer("9".to_string()),
236+
Token::Semicolon,
237+
Token::EndOfFile,
238+
];
239+
240+
let mut l = Lexer::new(input);
241+
for t in tests {
242+
let tok = l.next_token();
243+
assert_eq!(tok, t);
244+
}
245+
}

rust/lexer/lexer-in-rust/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
pub mod token;
2+
pub mod lexer;

rust/lexer/lexer-in-rust/src/main.rs

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
pub mod lexer;
2+
pub mod token;
3+
4+
use std::io::{self, BufRead, Write};
5+
use token::Token;
6+
use lexer::Lexer;
7+
8+
// Start a custom repl
9+
fn main() {
10+
let stdin = io::stdin();
11+
12+
loop {
13+
// Stdout needs to be flushed, due to missing newline
14+
print!(">> ");
15+
io::stdout().flush().expect("Error flushing stdout");
16+
17+
let mut line = String::new();
18+
stdin.lock().read_line(&mut line).expect("Error reading from stdin");
19+
let mut lexer = Lexer::new(&mut line);
20+
21+
loop {
22+
let tok = lexer.next_token();
23+
println!("{:?}", tok);
24+
if tok == Token::EndOfFile {
25+
break;
26+
}
27+
}
28+
}
29+
}

rust/lexer/lexer-in-rust/src/token.rs

+64
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#[derive(Debug, PartialEq)]
2+
pub enum Token {
3+
Illegal,
4+
EndOfFile,
5+
6+
// Literals are stored as strings
7+
Ident(String),
8+
Integer(String),
9+
10+
// Operators
11+
Assign,
12+
Plus,
13+
Minus,
14+
Bang,
15+
Asterisk,
16+
Slash,
17+
LowerThan,
18+
GreaterThan,
19+
Equal,
20+
NotEqual,
21+
22+
// Delimiters
23+
Comma,
24+
Semicolon,
25+
LeftParenthesis,
26+
RightParenthesis,
27+
LeftBrace,
28+
RightBrace,
29+
30+
// Keywords
31+
Function,
32+
Let,
33+
True,
34+
False,
35+
If,
36+
Else,
37+
Return,
38+
}
39+
40+
impl Default for Token {
41+
// Choose an Illegal identifier as default
42+
// this should be overriden before being used
43+
fn default() -> Token {
44+
Token::Illegal
45+
}
46+
}
47+
48+
pub fn lookup_ident(ident: &str) -> Token {
49+
match ident {
50+
"fn" => Token::Function,
51+
"let" => Token::Let,
52+
"true" => Token::True,
53+
"false" => Token::False,
54+
"if" => Token::If,
55+
"else" => Token::Else,
56+
"return" => Token::Return,
57+
_ => Token::Ident(ident.to_string()),
58+
}
59+
}
60+
61+
#[test]
62+
fn lookup_ident_test() {
63+
assert_eq!(lookup_ident("fn"), Token::Function);
64+
}

0 commit comments

Comments
 (0)