Skip to content

Commit 25b076b

Browse files
Merge pull request #7569 from joshuawarner32/zig-tokenizer
Implement initial roc tokenizer in zig
2 parents 68e524e + ce8f706 commit 25b076b

File tree

8 files changed

+1499
-74
lines changed

8 files changed

+1499
-74
lines changed

build.zig

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,16 @@ pub fn build(b: *std.Build) void {
99
const target = b.standardTargetOptions(.{});
1010
const optimize = b.standardOptimizeOption(.{});
1111

12+
// Zig unicode library - https://codeberg.org/atman/zg
13+
const zg = b.dependency("zg", .{});
14+
1215
const exe = b.addExecutable(.{
1316
.name = "roc",
1417
.root_source_file = b.path("src/main.zig"),
1518
.target = target,
1619
.optimize = optimize,
1720
});
21+
exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
1822

1923
b.installArtifact(exe);
2024

@@ -34,6 +38,7 @@ pub fn build(b: *std.Build) void {
3438
.target = target,
3539
.optimize = optimize,
3640
});
41+
all_tests.root_module.addImport("GenCatData", zg.module("GenCatData"));
3742

3843
// Install the test binary so we can run separately
3944
// ```sh

build.zig.zon

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
.url = "git+https://github.com/kristoff-it/zig-afl-kit#88c6b71377767c1b8d26979b0adfa12a58d988dd",
88
.hash = "1220796f7d2d9a2d4d7f8339ee0b14aa4bf133a15ae9ba39c941cc68e08d5c5ce9a2",
99
},
10+
.zg = .{
11+
.url = "https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz",
12+
.hash = "122055beff332830a391e9895c044d33b15ea21063779557024b46169fb1984c6e40",
13+
},
1014
},
1115
.paths = .{
1216
"build.zig",

crates/compiler/test_syntax/tests/snapshots/pass/tuple_function_annotation.expr.formatted.roc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1P : (
1+
11 : (
22
I,
33
s,
44
Mw
Lines changed: 71 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,79 @@
1-
@0-21 Defs(
2-
Defs {
3-
tags: [
4-
EitherIndex(2147483648),
5-
],
6-
regions: [
7-
@0-17,
8-
],
9-
space_before: [
10-
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
11-
],
12-
space_after: [
13-
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
14-
],
15-
spaces: [],
16-
type_defs: [],
17-
value_defs: [
18-
Annotation(
19-
@0-2 SpaceAfter(
20-
NumLiteral(
21-
"1P",
1+
@0-21 SpaceAfter(
2+
Defs(
3+
Defs {
4+
tags: [
5+
EitherIndex(2147483648),
6+
],
7+
regions: [
8+
@0-17,
9+
],
10+
space_before: [
11+
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
12+
],
13+
space_after: [
14+
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
15+
],
16+
spaces: [],
17+
type_defs: [],
18+
value_defs: [
19+
Annotation(
20+
@0-2 SpaceAfter(
21+
NumLiteral(
22+
"11",
23+
),
24+
[
25+
Newline,
26+
],
2227
),
23-
[
24-
Newline,
25-
],
26-
),
27-
@4-17 Tuple {
28-
elems: [
29-
@5-15 Function(
30-
[
31-
@5-6 Apply(
32-
"",
33-
"I",
34-
[],
35-
),
36-
@7-8 SpaceAfter(
37-
BoundVariable(
38-
"s",
28+
@4-17 Tuple {
29+
elems: [
30+
@5-15 Function(
31+
[
32+
@5-6 Apply(
33+
"",
34+
"I",
35+
[],
3936
),
40-
[
41-
Newline,
42-
],
43-
),
44-
@10-12 Apply(
45-
"",
46-
"Mw",
47-
[],
37+
@7-8 SpaceAfter(
38+
BoundVariable(
39+
"s",
40+
),
41+
[
42+
Newline,
43+
],
44+
),
45+
@10-12 Apply(
46+
"",
47+
"Mw",
48+
[],
49+
),
50+
],
51+
Pure,
52+
@14-15 BoundVariable(
53+
"r",
4854
),
49-
],
50-
Pure,
51-
@14-15 BoundVariable(
52-
"r",
55+
),
56+
],
57+
ext: Some(
58+
@16-17 BoundVariable(
59+
"l",
5360
),
5461
),
55-
],
56-
ext: Some(
57-
@16-17 BoundVariable(
58-
"l",
59-
),
60-
),
61-
},
62-
),
63-
],
64-
},
65-
@18-21 SpaceBefore(
66-
Var {
67-
module_name: "",
68-
ident: "asl",
62+
},
63+
),
64+
],
6965
},
70-
[
71-
Newline,
72-
],
66+
@18-21 SpaceBefore(
67+
Var {
68+
module_name: "",
69+
ident: "asl",
70+
},
71+
[
72+
Newline,
73+
],
74+
),
7375
),
76+
[
77+
Newline,
78+
],
7479
)
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1P
1+
11
22
:(I,s
33
,Mw->r)l
4-
asl
4+
asl

src/check/parse.zig

Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
const std = @import("std");
2+
const tokenize = @import("tokenize.zig");
3+
4+
pub const Region = struct {
5+
start: usize,
6+
end: usize,
7+
};
8+
9+
pub const Node = struct {
10+
tag: Tag,
11+
data: Data,
12+
region: Region,
13+
14+
pub const Tag = enum {
15+
Unary,
16+
Binary,
17+
// TODO
18+
};
19+
20+
pub const Data = union {
21+
Unary: UnaryOpData,
22+
Binary: BinaryOpData,
23+
// Add more node data as needed
24+
};
25+
26+
pub const UnaryOpData = struct {
27+
// TODO
28+
};
29+
30+
pub const BinaryOpData = struct {
31+
// TODO
32+
};
33+
};
34+
35+
pub const Diagnostic = struct {
36+
tag: Tag,
37+
region: Region,
38+
39+
pub const Tag = enum {
40+
// TODO
41+
};
42+
};
43+
44+
pub const Parser = struct {
45+
pos: usize,
46+
tokens: tokenize.TokenizedBuffer,
47+
nodes: std.MultiArrayList(Node),
48+
diagnostics: std.ArrayList(tokenize.Diagnostic),
49+
allocator: std.mem.Allocator,
50+
51+
pub fn init(tokens: tokenize.TokenizedBuffer, allocator: std.mem.Allocator) Parser {
52+
return Parser{
53+
.pos = 0,
54+
.tokens = tokens,
55+
.nodes = std.MultiArrayList(Node){},
56+
.diagnostics = std.ArrayList(tokenize.Diagnostic).init(allocator),
57+
.allocator = allocator,
58+
};
59+
}
60+
61+
pub fn advance(self: *Parser) void {
62+
if (self.pos >= self.tokens.tokens.len) {
63+
return;
64+
}
65+
std.debug.print("advance {s}\n", .{@tagName(self.tokens.tokens.items(.tag)[self.pos])});
66+
self.pos += 1;
67+
}
68+
69+
pub fn peek(self: *Parser) tokenize.Token.Tag {
70+
if (self.pos >= self.tokens.tokens.len) {
71+
return .EndOfFile;
72+
}
73+
return self.tokens.tokens.items(.tag)[self.pos];
74+
}
75+
76+
// If the next token is a newline, consume it
77+
// Returns the indent level of the next line if it is a newline, otherwise null
78+
pub fn consumeNewline(self: *Parser) ?u16 {
79+
if (self.peek() != .Newline) {
80+
return null;
81+
}
82+
const indent = self.tokens.tokens.items(.offset)[self.pos];
83+
self.advance();
84+
return @intCast(indent);
85+
}
86+
87+
// Returns the indent level of the next line if the next token is a newline, otherwise null
88+
pub fn peekNewline(self: *Parser) ?u16 {
89+
if (self.peek() != .Newline) {
90+
return null;
91+
}
92+
const indent = self.tokens.tokens.items(.offset)[self.pos];
93+
return @intCast(indent);
94+
}
95+
96+
pub fn parseFile(self: *Parser) !void {
97+
while (self.peek() != .EndOfFile) {
98+
if (self.consumeNewline()) |indent| {
99+
std.debug.print("parseFile indent {d}\n", .{indent});
100+
std.debug.assert(indent == 0); // TODO: report an error
101+
}
102+
if (self.peek() == .EndOfFile) {
103+
break;
104+
}
105+
self.parseStmt(0);
106+
}
107+
}
108+
109+
pub fn parseStmt(self: *Parser, base_indent: u16) void {
110+
switch (self.peek()) {
111+
.LowerIdent => {
112+
self.advance();
113+
if (self.peek() == .OpEquals) {
114+
self.finishParseAssign(base_indent);
115+
std.debug.print("parseStmt assign\n", .{});
116+
} else {
117+
std.debug.print("parseStmt expr\n", .{});
118+
}
119+
},
120+
else => {
121+
std.debug.panic("todo: emit error, unexpected token {s}", .{@tagName(self.peek())});
122+
},
123+
}
124+
}
125+
126+
pub fn parseExpr(self: *Parser) void {
127+
switch (self.peek()) {
128+
.LowerIdent => {
129+
self.advance();
130+
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
131+
// TODO: add node
132+
},
133+
.Int => {
134+
self.advance();
135+
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
136+
// TODO: add node
137+
},
138+
else => {
139+
std.debug.panic("todo: emit error", .{});
140+
},
141+
}
142+
}
143+
144+
pub fn finishParseAssign(self: *Parser, base_indent: u16) void {
145+
std.debug.assert(self.peek() == .OpEquals);
146+
self.advance();
147+
if (self.consumeNewline()) |indent| {
148+
std.debug.print("startParseAssign indent {d}\n", .{indent});
149+
if (indent <= base_indent) {
150+
std.debug.panic("todo: emit error", .{});
151+
}
152+
153+
self.parseStmt(indent);
154+
155+
while (true) {
156+
if (self.peekNewline()) |i| {
157+
if (i <= base_indent) {
158+
break;
159+
}
160+
self.advance();
161+
} else {
162+
break;
163+
}
164+
self.parseStmt(indent);
165+
}
166+
} else {
167+
self.parseExpr();
168+
}
169+
170+
std.debug.print("finishParseAssign\n", .{});
171+
}
172+
};
173+
test "Parser advance and peek" {
174+
const allocator = std.heap.page_allocator;
175+
var tokens = try tokenize.TokenizedBuffer.init(allocator);
176+
// x =
177+
// y = 1
178+
// y
179+
try tokens.pushToken(.LowerIdent, 0, 1);
180+
try tokens.pushToken(.OpEquals, 0, 0);
181+
try tokens.pushNewline(4);
182+
try tokens.pushToken(.LowerIdent, 0, 0);
183+
try tokens.pushToken(.OpEquals, 0, 0);
184+
try tokens.pushToken(.Int, 0, 0);
185+
try tokens.pushNewline(4);
186+
try tokens.pushToken(.LowerIdent, 0, 0);
187+
try tokens.pushNewline(0);
188+
try tokens.pushToken(.EndOfFile, 0, 0);
189+
190+
var parser = Parser.init(tokens, allocator);
191+
192+
try parser.parseFile();
193+
194+
// std.debug.assert(parser.nodes)
195+
}

0 commit comments

Comments
 (0)