Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement initial roc tokenizer in zig #7569

Merged
merged 1 commit into from
Feb 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,16 @@ pub fn build(b: *std.Build) void {
const target = b.standardTargetOptions(.{});
const optimize = b.standardOptimizeOption(.{});

// Zig unicode library - https://codeberg.org/atman/zg
const zg = b.dependency("zg", .{});

const exe = b.addExecutable(.{
.name = "roc",
.root_source_file = b.path("src/main.zig"),
.target = target,
.optimize = optimize,
});
exe.root_module.addImport("GenCatData", zg.module("GenCatData"));

b.installArtifact(exe);

Expand All @@ -34,6 +38,7 @@ pub fn build(b: *std.Build) void {
.target = target,
.optimize = optimize,
});
all_tests.root_module.addImport("GenCatData", zg.module("GenCatData"));

// Install the test binary so we can run separately
// ```sh
Expand Down
4 changes: 4 additions & 0 deletions build.zig.zon
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
.url = "git+https://github.com/kristoff-it/zig-afl-kit#88c6b71377767c1b8d26979b0adfa12a58d988dd",
.hash = "1220796f7d2d9a2d4d7f8339ee0b14aa4bf133a15ae9ba39c941cc68e08d5c5ce9a2",
},
.zg = .{
.url = "https://codeberg.org/dude_the_builder/zg/archive/v0.13.2.tar.gz",
.hash = "122055beff332830a391e9895c044d33b15ea21063779557024b46169fb1984c6e40",
},
},
.paths = .{
"build.zig",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1P : (
11 : (
I,
s,
Mw
Expand Down
Original file line number Diff line number Diff line change
@@ -1,74 +1,79 @@
@0-21 Defs(
Defs {
tags: [
EitherIndex(2147483648),
],
regions: [
@0-17,
],
space_before: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
space_after: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
spaces: [],
type_defs: [],
value_defs: [
Annotation(
@0-2 SpaceAfter(
NumLiteral(
"1P",
@0-21 SpaceAfter(
Defs(
Defs {
tags: [
EitherIndex(2147483648),
],
regions: [
@0-17,
],
space_before: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
space_after: [
Slice<roc_parse::ast::CommentOrNewline> { start: 0, length: 0 },
],
spaces: [],
type_defs: [],
value_defs: [
Annotation(
@0-2 SpaceAfter(
NumLiteral(
"11",
),
[
Newline,
],
),
[
Newline,
],
),
@4-17 Tuple {
elems: [
@5-15 Function(
[
@5-6 Apply(
"",
"I",
[],
),
@7-8 SpaceAfter(
BoundVariable(
"s",
@4-17 Tuple {
elems: [
@5-15 Function(
[
@5-6 Apply(
"",
"I",
[],
),
[
Newline,
],
),
@10-12 Apply(
"",
"Mw",
[],
@7-8 SpaceAfter(
BoundVariable(
"s",
),
[
Newline,
],
),
@10-12 Apply(
"",
"Mw",
[],
),
],
Pure,
@14-15 BoundVariable(
"r",
),
],
Pure,
@14-15 BoundVariable(
"r",
),
],
ext: Some(
@16-17 BoundVariable(
"l",
),
),
],
ext: Some(
@16-17 BoundVariable(
"l",
),
),
},
),
],
},
@18-21 SpaceBefore(
Var {
module_name: "",
ident: "asl",
},
),
],
},
[
Newline,
],
@18-21 SpaceBefore(
Var {
module_name: "",
ident: "asl",
},
[
Newline,
],
),
),
[
Newline,
],
)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
1P
11
:(I,s
,Mw->r)l
asl
asl
195 changes: 195 additions & 0 deletions src/check/parse.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
const std = @import("std");
const tokenize = @import("tokenize.zig");

pub const Region = struct {
start: usize,
end: usize,
};

pub const Node = struct {
tag: Tag,
data: Data,
region: Region,

pub const Tag = enum {
Unary,
Binary,
// TODO
};

pub const Data = union {
Unary: UnaryOpData,
Binary: BinaryOpData,
// Add more node data as needed
};

pub const UnaryOpData = struct {
// TODO
};

pub const BinaryOpData = struct {
// TODO
};
};

pub const Diagnostic = struct {
tag: Tag,
region: Region,

pub const Tag = enum {
// TODO
};
};

pub const Parser = struct {
pos: usize,
tokens: tokenize.TokenizedBuffer,
nodes: std.MultiArrayList(Node),
joshuawarner32 marked this conversation as resolved.
Show resolved Hide resolved
diagnostics: std.ArrayList(tokenize.Diagnostic),
allocator: std.mem.Allocator,

pub fn init(tokens: tokenize.TokenizedBuffer, allocator: std.mem.Allocator) Parser {
return Parser{
.pos = 0,
.tokens = tokens,
.nodes = std.MultiArrayList(Node){},
.diagnostics = std.ArrayList(tokenize.Diagnostic).init(allocator),
.allocator = allocator,
};
}

pub fn advance(self: *Parser) void {
if (self.pos >= self.tokens.tokens.len) {
return;
}
std.debug.print("advance {s}\n", .{@tagName(self.tokens.tokens.items(.tag)[self.pos])});
self.pos += 1;
}

pub fn peek(self: *Parser) tokenize.Token.Tag {
if (self.pos >= self.tokens.tokens.len) {
return .EndOfFile;
}
return self.tokens.tokens.items(.tag)[self.pos];
}

// If the next token is a newline, consume it
// Returns the indent level of the next line if it is a newline, otherwise null
pub fn consumeNewline(self: *Parser) ?u16 {
if (self.peek() != .Newline) {
return null;
}
const indent = self.tokens.tokens.items(.offset)[self.pos];
self.advance();
return @intCast(indent);
}

// Returns the indent level of the next line if the next token is a newline, otherwise null
pub fn peekNewline(self: *Parser) ?u16 {
if (self.peek() != .Newline) {
return null;
}
const indent = self.tokens.tokens.items(.offset)[self.pos];
return @intCast(indent);
}

pub fn parseFile(self: *Parser) !void {
while (self.peek() != .EndOfFile) {
if (self.consumeNewline()) |indent| {
std.debug.print("parseFile indent {d}\n", .{indent});
std.debug.assert(indent == 0); // TODO: report an error
}
if (self.peek() == .EndOfFile) {
break;
}
self.parseStmt(0);
}
}

pub fn parseStmt(self: *Parser, base_indent: u16) void {
switch (self.peek()) {
.LowerIdent => {
self.advance();
if (self.peek() == .OpEquals) {
self.finishParseAssign(base_indent);
std.debug.print("parseStmt assign\n", .{});
} else {
std.debug.print("parseStmt expr\n", .{});
}
},
else => {
std.debug.panic("todo: emit error, unexpected token {s}", .{@tagName(self.peek())});
},
}
}

pub fn parseExpr(self: *Parser) void {
switch (self.peek()) {
.LowerIdent => {
self.advance();
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
// TODO: add node
},
.Int => {
self.advance();
std.debug.print("parseExpr {s}\n", .{@tagName(self.peek())});
// TODO: add node
},
else => {
std.debug.panic("todo: emit error", .{});
},
}
}

pub fn finishParseAssign(self: *Parser, base_indent: u16) void {
std.debug.assert(self.peek() == .OpEquals);
self.advance();
if (self.consumeNewline()) |indent| {
std.debug.print("startParseAssign indent {d}\n", .{indent});
if (indent <= base_indent) {
std.debug.panic("todo: emit error", .{});
}

self.parseStmt(indent);

while (true) {
if (self.peekNewline()) |i| {
if (i <= base_indent) {
break;
}
self.advance();
} else {
break;
}
self.parseStmt(indent);
}
} else {
self.parseExpr();
}

std.debug.print("finishParseAssign\n", .{});
}
};
test "Parser advance and peek" {
const allocator = std.heap.page_allocator;
var tokens = try tokenize.TokenizedBuffer.init(allocator);
// x =
// y = 1
// y
try tokens.pushToken(.LowerIdent, 0, 1);
try tokens.pushToken(.OpEquals, 0, 0);
try tokens.pushNewline(4);
try tokens.pushToken(.LowerIdent, 0, 0);
try tokens.pushToken(.OpEquals, 0, 0);
try tokens.pushToken(.Int, 0, 0);
try tokens.pushNewline(4);
try tokens.pushToken(.LowerIdent, 0, 0);
try tokens.pushNewline(0);
try tokens.pushToken(.EndOfFile, 0, 0);

var parser = Parser.init(tokens, allocator);

try parser.parseFile();

// std.debug.assert(parser.nodes)
}
Loading