diff --git a/src/check/parse/tokenize/build.zig b/src/check/parse/tokenize/build.zig
index 6b96d5b409..2e60797952 100644
--- a/src/check/parse/tokenize/build.zig
+++ b/src/check/parse/tokenize/build.zig
@@ -14,21 +14,9 @@ pub fn build(b: *std.Build) void {
     // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not
     // set a preferred release mode, allowing the user to decide how to optimize.
     const optimize = b.standardOptimizeOption(.{});
-    const zg = b.dependency("zg", .{});
-
-    const lib = b.addStaticLibrary(.{
-        .name = "tokenize",
-        // In this case the main source file is merely a path, however, in more
-        // complicated build scripts, this could be a generated file.
-        .root_source_file = b.path("src/root.zig"),
-        .target = target,
-        .optimize = optimize,
-    });
 
-    // This declares intent for the library to be installed into the standard
-    // location when the user invokes the "install" step (the default step when
-    // running `zig build`).
-    b.installArtifact(lib);
+    // Zig unicode library - https://codeberg.org/atman/zg
+    const zg = b.dependency("zg", .{});
 
     const exe = b.addExecutable(.{
         .name = "tokenize",
@@ -37,6 +25,8 @@ pub fn build(b: *std.Build) void {
         .optimize = optimize,
     });
 
+    // Unicode data is required for the tokenizer to work correctly.
+    // This is "general category" data - e.g. is this a lowercase letter, a digit, etc.
     exe.root_module.addImport("GenCatData", zg.module("GenCatData"));
 
     // This declares intent for the executable to be installed into the
@@ -67,16 +57,6 @@ pub fn build(b: *std.Build) void {
     const run_step = b.step("run", "Run the app");
     run_step.dependOn(&run_cmd.step);
 
-    // Creates a step for unit testing. This only builds the test executable
-    // but does not run it.
-    const lib_unit_tests = b.addTest(.{
-        .root_source_file = b.path("src/root.zig"),
-        .target = target,
-        .optimize = optimize,
-    });
-
-    const run_lib_unit_tests = b.addRunArtifact(lib_unit_tests);
-
     const exe_unit_tests = b.addTest(.{
         .root_source_file = b.path("src/main.zig"),
         .target = target,
@@ -89,6 +69,5 @@ pub fn build(b: *std.Build) void {
     // the `zig build --help` menu, providing a way for the user to request
     // running the unit tests.
     const test_step = b.step("test", "Run unit tests");
-    test_step.dependOn(&run_lib_unit_tests.step);
     test_step.dependOn(&run_exe_unit_tests.step);
 }
diff --git a/src/check/parse/tokenize/src/main.zig b/src/check/parse/tokenize/src/main.zig
index 46da698538..c8122c5373 100644
--- a/src/check/parse/tokenize/src/main.zig
+++ b/src/check/parse/tokenize/src/main.zig
@@ -1,179 +1,192 @@
 const std = @import("std");
-const GenCatData = @import("GenCatData");
-
-/// The token kinds
-pub const T = enum(u8) {
-    EndOfFile,
-
-    // primitives
-    Float,
-    String,
-    SingleQuote,
-
-    // a part of a string interpolation; generally you'll see something like:
-    // StringBegin, OpenCurly, <expr>, CloseCurly StringPart, OpenCurly, <expr>, CloseCurly
-    StringBegin,
-    StringPart,
-
-    // These are not technically valid, but we can have the formatter fix them up.
-    SingleQuoteBegin,
-    SingleQuotePart,
-
-    UpperIdent,
-    LowerIdent,
-    Underscore,
-    DotLowerIdent,
-    DotNumber,
-
-    OpenRound,
-    CloseRound,
-    OpenSquare,
-    CloseSquare,
-    OpenCurly,
-    CloseCurly,
-
-    OpPlus,
-    OpStar,
-    OpPizza,
-    OpAssign,
-    OpBinaryMinus, // trailing whitespace
-    OpUnaryMinus, // no trailing whitespace
-    OpNotEquals,
-    OpBang,
-    OpAnd,
-    OpAmpersand,
-    OpQuestion,
-    OpOr,
-    OpBar,
-    OpDoubleSlash,
-    OpSlash,
-    OpPercent,
-    OpCaret,
-    OpGreaterThanOrEq,
-    OpGreaterThan,
-    OpLessThanOrEq,
-    OpBackArrow,
-    OpLessThan,
-    OpEquals,
-    OpColonEqual,
-
-    Comma,
-    Dot,
-    DoubleDot,
-    TripleDot,
-    OpColon,
-    OpArrow,
-    OpBackslash,
-
-    // Keywords
-    KwIf,
-    KwThen,
-    KwElse,
-    KwWhen,
-    KwIs,
-    KwAs,
-    KwDbg,
-    KwCrash,
-    KwHas,
-    KwWhere,
-    KwImplements,
-    KwExposes,
-    KwImports,
-    KwImport,
-    KwWith,
-    KwGenerates,
-    KwPackage,
-    KwPackages,
-    KwRequires,
-    KwProvides,
-    KwTo,
-    KwInterface,
-    KwApp,
-    KwPlatform,
-    KwHosted,
-    KwDebug,
-    KwExpect,
-
-    NamedUnderscore,
-    OpaqueName,
-    Int,
-    DotUpperIdent,
-    NoSpaceDotNumber,
-    NoSpaceDotLowerIdent,
-    NoSpaceDotUpperIdent,
-};
 
-/// A helper function equivalent to the Rust impl T { is_keyword }.
-pub fn isKeyword(tok: T) bool {
-    return switch (tok) {
-        .KwIf,
-        .KwThen,
-        .KwElse,
-        .KwWhen,
-        .KwIs,
-        .KwAs,
-        .KwDbg,
-        .KwExpect,
-        .KwCrash,
-        .KwHas,
-        .KwWhere,
-        .KwImplements,
-        .KwExposes,
-        .KwImport,
-        .KwImports,
-        .KwWith,
-        .KwGenerates,
-        .KwPackage,
-        .KwPackages,
-        .KwRequires,
-        .KwProvides,
-        .KwTo,
-        .KwInterface,
-        .KwApp,
-        .KwPlatform,
-        .KwHosted,
-        .KwExpectFx,
-        => true,
-        else => false,
-    };
-}
+// Unicode data tables - allows us to identify upper/lowercase letters for non-ASCII characters.
+const GenCatData = @import("GenCatData");
 
 pub const Token = struct {
-    kind: T,
+    tag: Tag,
     offset: usize,
     length: usize,
 
-    pub const keywords = std.StaticStringMap(T).initComptime(.{
-        .{ "if", .KwIf },
-        .{ "then", .KwThen },
-        .{ "else", .KwElse },
-        .{ "when", .KwWhen },
-        .{ "is", .KwIs },
+    pub const Tag = enum(u8) {
+        EndOfFile,
+
+        // primitives
+        Float,
+        String,
+        SingleQuote,
+
+        // a part of a string interpolation; generally you'll see something like:
+        // StringBegin, OpenCurly, <expr>, CloseCurly StringPart, OpenCurly, <expr>, CloseCurly
+        StringBegin,
+        StringPart,
+
+        // These are not technically valid, but we can have the formatter fix them up.
+        SingleQuoteBegin,
+        SingleQuotePart,
+
+        UpperIdent,
+        LowerIdent,
+        Underscore,
+        DotLowerIdent,
+        DotInt,
+        DotUpperIdent,
+        NoSpaceDotInt,
+        NoSpaceDotLowerIdent,
+        NoSpaceDotUpperIdent,
+
+        NamedUnderscore,
+        OpaqueName,
+        Int,
+
+        OpenRound,
+        CloseRound,
+        OpenSquare,
+        CloseSquare,
+        OpenCurly,
+        CloseCurly,
+
+        OpPlus,
+        OpStar,
+        OpPizza,
+        OpAssign,
+        OpBinaryMinus, // trailing whitespace
+        OpUnaryMinus, // no trailing whitespace
+        OpNotEquals,
+        OpBang,
+        OpAnd,
+        OpAmpersand,
+        OpQuestion,
+        OpOr,
+        OpBar,
+        OpDoubleSlash,
+        OpSlash,
+        OpPercent,
+        OpCaret,
+        OpGreaterThanOrEq,
+        OpGreaterThan,
+        OpLessThanOrEq,
+        OpBackArrow,
+        OpLessThan,
+        OpEquals,
+        OpColonEqual,
+
+        Comma,
+        Dot,
+        DoubleDot,
+        TripleDot,
+        OpColon,
+        OpArrow,
+        OpBackslash,
+
+        // Keywords
+        KwApp,
+        KwAs,
+        KwCrash,
+        KwDbg,
+        KwDebug,
+        KwElse,
+        KwExpect,
+        KwExposes,
+        KwGenerates,
+        KwHas,
+        KwHosted,
+        KwIf,
+        KwImplements,
+        KwImport,
+        KwImports,
+        KwInterface,
+        KwIs,
+        KwModule,
+        KwPackage,
+        KwPackages,
+        KwPlatform,
+        KwProvides,
+        KwRequires,
+        KwThen,
+        KwTo,
+        KwWhen,
+        KwWhere,
+        KwWith,
+    };
+
+    pub const keywords = std.StaticStringMap(Tag).initComptime(.{
+        .{ "app", .KwApp },
         .{ "as", .KwAs },
+        .{ "crash", .KwCrash },
         .{ "dbg", .KwDbg },
+        .{ "else", .KwElse },
         .{ "expect", .KwExpect },
-        .{ "crash", .KwCrash },
+        .{ "exposes", .KwExposes },
+        .{ "generates", .KwGenerates },
         .{ "has", .KwHas },
-        .{ "where", .KwWhere },
+        .{ "hosted", .KwHosted },
+        .{ "if", .KwIf },
         .{ "implements", .KwImplements },
-        .{ "exposes", .KwExposes },
         .{ "import", .KwImport },
         .{ "imports", .KwImports },
-        .{ "with", .KwWith },
-        .{ "generates", .KwGenerates },
+        .{ "interface", .KwInterface },
+        .{ "is", .KwIs },
+        .{ "module", .KwModule },
         .{ "package", .KwPackage },
         .{ "packages", .KwPackages },
-        .{ "requires", .KwRequires },
+        .{ "platform", .KwPlatform },
         .{ "provides", .KwProvides },
+        .{ "requires", .KwRequires },
+        .{ "then", .KwThen },
         .{ "to", .KwTo },
-        .{ "interface", .KwInterface },
-        .{ "app", .KwApp },
-        .{ "platform", .KwPlatform },
-        .{ "hosted", .KwHosted },
+        .{ "when", .KwWhen },
+        .{ "where", .KwWhere },
+        .{ "with", .KwWith },
     });
 
-    pub fn getKeyword(bytes: []const u8) ?T {
-        return keywords.get(bytes);
+    pub const valid_number_suffixes = std.StaticStringMap(void).initComptime(.{
+        .{ "dec", .{} },
+        .{ "i128", .{} },
+        .{ "i16", .{} },
+        .{ "i32", .{} },
+        .{ "i64", .{} },
+        .{ "i8", .{} },
+        .{ "nat", .{} },
+        .{ "u128", .{} },
+        .{ "u16", .{} },
+        .{ "u32", .{} },
+        .{ "u64", .{} },
+        .{ "u8", .{} },
+    });
+
+    pub fn isKeyword(tok: Tag) bool {
+        return switch (tok) {
+            .KwApp,
+            .KwAs,
+            .KwCrash,
+            .KwDbg,
+            .KwElse,
+            .KwExpect,
+            .KwExpectFx,
+            .KwExposes,
+            .KwGenerates,
+            .KwHas,
+            .KwHosted,
+            .KwIf,
+            .KwImplements,
+            .KwImport,
+            .KwImports,
+            .KwInterface,
+            .KwIs,
+            .KwPackage,
+            .KwPackages,
+            .KwPlatform,
+            .KwProvides,
+            .KwRequires,
+            .KwThen,
+            .KwTo,
+            .KwWhen,
+            .KwWhere,
+            .KwWith,
+            => true,
+            else => false,
+        };
     }
 };
 
@@ -226,12 +239,17 @@ pub const TokenizedBuffer = struct {
         };
     }
 
-    /// Pushes a token with the given kind, token offset, and length.
-    pub fn pushToken(self: *TokenizedBuffer, kind: T, tok_offset: usize, tok_length: usize) !void {
+    pub fn deinit(self: *TokenizedBuffer) void {
+        self.tokens.deinit(self.allocator);
+        self.lines.deinit();
+    }
+
+    /// Pushes a token with the given tag, token offset, and length.
+    pub fn pushToken(self: *TokenizedBuffer, tag: Token.Tag, tok_offset: u32, tok_length: u32) !void {
         try self.tokens.append(self.allocator, .{
-            .kind = kind,
-            .offset = @intCast(tok_offset),
-            .length = @intCast(tok_length),
+            .tag = tag,
+            .offset = tok_offset,
+            .length = tok_length,
         });
     }
 
@@ -241,47 +259,44 @@ pub const TokenizedBuffer = struct {
     }
 };
 
-/// A comment is represented by its begin and end offsets.
 pub const Comment = struct {
     begin: usize,
     end: usize,
 };
 
-/// MessageKind enumerates different diagnostic messages.
-pub const MessageKind = enum {
-    MisplacedCarriageReturn,
-    AsciiControl,
-    LeadingZero,
-    UnknownToken,
-    OpaqueNameWithoutName,
-    UppercaseBase,
-    InvalidUnicodeEscapeSequence,
-    InvalidEscapeSequence,
-    UnclosedString,
-    UnclosedSingleQuote,
-    BadNumberSuffix,
-    OverClosedBrace,
-    MismatchedBrace,
-};
-
-const UnicodeKind = enum {
-    LetterUpper,
-    LetterNotUpper,
-    Digit,
-    Other,
-    Invalid,
-};
+const Unicode = struct {
+    tag: Tag,
+    length: u32,
 
-const UnicodeInfo = struct {
-    kind: UnicodeKind,
-    length: usize,
+    const Tag = enum {
+        LetterUpper,
+        LetterNotUpper,
+        Digit,
+        Other,
+        Invalid,
+    };
 };
 
-/// A message with its kind and offset.
-pub const Message = struct {
-    kind: MessageKind,
+pub const Diagnostic = struct {
+    tag: Tag,
     begin: u32,
     end: u32,
+
+    pub const Tag = enum {
+        MisplacedCarriageReturn,
+        AsciiControl,
+        LeadingZero,
+        UnknownToken,
+        OpaqueNameWithoutName,
+        UppercaseBase,
+        InvalidUnicodeEscapeSequence,
+        InvalidEscapeSequence,
+        UnclosedString,
+        UnclosedSingleQuote,
+        BadNumberSuffix,
+        OverClosedBrace,
+        MismatchedBrace,
+    };
 };
 
 /// The cursor is our current position in the input text, and it collects messages.
@@ -292,13 +307,13 @@ pub const Message = struct {
 /// allocate a larger slice and tokenize again.
 pub const Cursor = struct {
     buf: []const u8,
-    pos: usize,
-    messages: []Message,
+    pos: u32,
+    messages: []Diagnostic,
     message_count: usize,
     gc: *GenCatData,
 
     /// Initialize a Cursor with the given input buffer and a pre-allocated messages slice.
-    pub fn init(buf: []const u8, messages: []Message, gc: *GenCatData) Cursor {
+    pub fn init(buf: []const u8, messages: []Diagnostic, gc: *GenCatData) Cursor {
         return Cursor{
             .buf = buf,
             .pos = 0,
@@ -308,26 +323,19 @@ pub const Cursor = struct {
         };
     }
 
-    fn pushMessageHere(self: *Cursor, kind: MessageKind) void {
-        if (self.message_count < self.messages.len) {
-            self.messages[self.message_count] = Message{
-                .kind = kind,
-                .begin = @intCast(self.pos),
-                .end = @intCast(self.pos),
-            };
-            self.message_count += 1;
-        }
+    fn pushMessageHere(self: *Cursor, tag: Diagnostic.Tag) void {
+        self.pushMessage(tag, self.pos, self.pos);
     }
 
-    fn pushMessage(self: *Cursor, kind: MessageKind, begin: u32, end: u32) void {
+    fn pushMessage(self: *Cursor, tag: Diagnostic.Tag, begin: u32, end: u32) void {
         if (self.message_count < self.messages.len) {
-            self.messages[self.message_count] = Message{
-                .kind = kind,
+            self.messages[self.message_count] = .{
+                .tag = tag,
                 .begin = begin,
                 .end = end,
             };
-            self.message_count += 1;
         }
+        self.message_count += 1;
     }
 
     /// Returns the current byte, or null if at the end.
@@ -355,15 +363,17 @@ pub const Cursor = struct {
     }
 
     /// Requires that the next byte is `ch`, otherwise pushes a message.
-    pub fn require(self: *Cursor, ch: u8, kind: MessageKind) void {
+    pub fn require(self: *Cursor, ch: u8, tag: Diagnostic.Tag) void {
         if (self.peek() == ch) {
             self.pos += 1;
         } else {
-            self.pushMessageHere(kind);
+            self.pushMessageHere(tag);
         }
     }
 
     /// Chomps “trivia” (whitespace, comments, etc.) and returns an optional indent.
+    /// If the chomped trivia includes a newline, returns the indent of the next (real) line.
+    /// Otherwise, returns null.
     pub fn chompTrivia(self: *Cursor) ?Indent {
         var sawNewline = false;
         var indent = Indent.init();
@@ -411,28 +421,32 @@ pub const Cursor = struct {
         }
     }
 
-    pub fn peekUnicode(self: *Cursor) UnicodeInfo {
-        const len3 = std.unicode.utf8ByteSequenceLength(self.buf[self.pos]) catch {
-            return .{ .kind = .Invalid, .length = 1 };
+    /// Decodes a Unicode character starting at `self.pos` and returns its category.
+    /// Note this assumes the caller has already peek'd the first byte.
+    pub fn decodeUnicode(self: *Cursor, first_byte: u8) Unicode {
+        std.debug.assert(first_byte == self.buf[self.pos]);
+        const len3 = std.unicode.utf8ByteSequenceLength(first_byte) catch {
+            return .{ .tag = .Invalid, .length = 1 };
         };
-        const len: usize = @intCast(len3);
+        const len: u32 = @intCast(len3);
         const utf8_char = std.unicode.utf8Decode(self.buf[self.pos .. self.pos + len]) catch {
-            return .{ .kind = .Invalid, .length = 1 };
+            return .{ .tag = .Invalid, .length = len };
         };
         switch (self.gc.gc(utf8_char)) {
-            .Lu, .Lt => return .{ .kind = .LetterUpper, .length = len },
-            .Ll, .Lm, .Lo => return .{ .kind = .LetterNotUpper, .length = len },
-            .Nd, .Nl, .No => return .{ .kind = .Digit, .length = len },
-            else => return .{ .kind = .Other, .length = len },
+            .Lu, .Lt => return .{ .tag = .LetterUpper, .length = len },
+            .Ll, .Lm, .Lo => return .{ .tag = .LetterNotUpper, .length = len },
+            .Nd, .Nl, .No => return .{ .tag = .Digit, .length = len },
+            else => return .{ .tag = .Other, .length = len },
         }
     }
 
-    pub fn chompNumber(self: *Cursor, b: u8) T {
+    pub fn chompNumber(self: *Cursor, initialDigit: u8) Token.Tag {
         // Consume the initial digit.
+        std.debug.assert(initialDigit == self.buf[self.pos]);
         self.pos += 1;
 
-        var tok: T = undefined;
-        if (b == '0') {
+        var tok: Token.Tag = undefined;
+        if (initialDigit == '0') {
             while (true) {
                 const c = self.peek() orelse 0;
                 switch (c) {
@@ -441,7 +455,7 @@ pub const Cursor = struct {
                         self.pos += 1;
                         self.chompIntegerBase16();
                         self.chompNumberSuffix();
-                        tok = T.Int;
+                        tok = .Int;
                         break;
                     },
                     'o', 'O' => {
@@ -449,7 +463,7 @@ pub const Cursor = struct {
                         self.pos += 1;
                         self.chompIntegerBase8();
                         self.chompNumberSuffix();
-                        tok = T.Int;
+                        tok = .Int;
                         break;
                     },
                     'b', 'B' => {
@@ -457,14 +471,14 @@ pub const Cursor = struct {
                         self.pos += 1;
                         self.chompIntegerBase2();
                         self.chompNumberSuffix();
-                        tok = T.Int;
+                        tok = .Int;
                         break;
                     },
                     '0'...'9' => {
                         self.pushMessageHere(.LeadingZero);
                         _ = self.chompNumberBase10();
                         self.chompNumberSuffix();
-                        tok = T.Int;
+                        tok = .Int;
                         break;
                     },
                     '_' => {
@@ -474,12 +488,12 @@ pub const Cursor = struct {
                     '.' => {
                         self.pos += 1;
                         _ = self.chompIntegerBase10();
-                        tok = T.Float;
+                        tok = .Float;
                         _ = self.chompExponent();
                         break;
                     },
                     else => {
-                        tok = T.Int;
+                        tok = .Int;
                         break;
                     },
                 }
@@ -487,7 +501,7 @@ pub const Cursor = struct {
         } else {
             _ = self.chompNumberBase10();
             self.chompNumberSuffix();
-            tok = T.Int;
+            tok = .Int;
         }
         return tok;
     }
@@ -510,7 +524,7 @@ pub const Cursor = struct {
             return;
         }
         const start = self.pos;
-        var pos: usize = self.pos + 1;
+        var pos = self.pos + 1;
         while (pos < self.buf.len) : (pos += 1) {
             const c = self.buf[pos];
             if (std.ascii.isAlphabetic(c) or std.ascii.isDigit(c)) {
@@ -520,34 +534,22 @@ pub const Cursor = struct {
             }
         }
         const suffix = self.buf[start..pos];
-        if (!(std.mem.eql(u8, suffix, "dec") or
-            std.mem.eql(u8, suffix, "i128") or
-            std.mem.eql(u8, suffix, "i16") or
-            std.mem.eql(u8, suffix, "i32") or
-            std.mem.eql(u8, suffix, "i64") or
-            std.mem.eql(u8, suffix, "i8") or
-            std.mem.eql(u8, suffix, "nat") or
-            std.mem.eql(u8, suffix, "u128") or
-            std.mem.eql(u8, suffix, "u16") or
-            std.mem.eql(u8, suffix, "u32") or
-            std.mem.eql(u8, suffix, "u64") or
-            std.mem.eql(u8, suffix, "u8")))
-        {
+        if (Token.valid_number_suffixes.get(suffix) != null) {
             self.pushMessageHere(.BadNumberSuffix);
         }
         self.pos = pos;
     }
 
-    pub fn chompNumberBase10(self: *Cursor) T {
+    pub fn chompNumberBase10(self: *Cursor) Token.Tag {
         self.chompIntegerBase10();
-        var token_type: T = T.Int;
+        var token_type: Token.Tag = .Int;
         if (self.peek() orelse 0 == '.') {
             self.pos += 1;
             self.chompIntegerBase10();
-            token_type = T.Float;
+            token_type = .Float;
         }
         if (self.chompExponent()) {
-            token_type = T.Float;
+            token_type = .Float;
         }
         return token_type;
     }
@@ -600,38 +602,55 @@ pub const Cursor = struct {
         }
     }
 
-    pub fn chompIdentLower(self: *Cursor) T {
+    /// Chomps an identifier starting with a lowercase letter.
+    /// Also checks if the resulting identifier is a keyword.
+    /// Returns the token type - LowerIdent or Kw*
+    pub fn chompIdentLower(self: *Cursor) Token.Tag {
         const start = self.pos;
         var kwCheck: bool = true;
         while (self.peek()) |c| {
             if (c >= 'a' and c <= 'z') {
                 self.pos += 1;
-            } else if ((c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9')) {
+            } else if ((c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9') or c == '_') {
                 self.pos += 1;
                 kwCheck = false;
             } else {
-                break;
+                const info = self.decodeUnicode(c);
+                if (info.tag != .Other and info.tag != .Invalid) {
+                    self.pos += info.length;
+                    kwCheck = false;
+                } else {
+                    break;
+                }
             }
         }
-        if (kwCheck and (self.pos - start) <= 10) {
+        if (kwCheck) {
             const ident = self.buf[start..self.pos];
-            const kw = Token.getKeyword(ident);
-            return kw orelse T.LowerIdent;
+            const kw = Token.keywords.get(ident);
+            return kw orelse .LowerIdent;
         } else {
-            return T.LowerIdent;
+            return .LowerIdent;
         }
     }
 
-    pub fn chompIdentUpper(self: *Cursor) T {
+    /// Chomps a general identifier - either upper or lower case.
+    /// Doesn't check if the identifier is a keyword, since we assume the caller already
+    /// determined that was impossible (e.g. because the first character was uppercase),
+    /// or otherwise not relevant.
+    pub fn chompIdentGeneral(self: *Cursor) void {
         while (self.pos < self.buf.len) {
             const c = self.buf[self.pos];
-            if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9')) {
+            if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9') or c == '_') {
                 self.pos += 1;
             } else {
-                break;
+                const info = self.decodeUnicode(c);
+                if (info.tag != .Other and info.tag != .Invalid) {
+                    self.pos += info.length;
+                } else {
+                    break;
+                }
             }
         }
-        return T.UpperIdent;
     }
 
     pub fn chompInteger(self: *Cursor) void {
@@ -644,29 +663,13 @@ pub const Cursor = struct {
             }
         }
     }
-
-    pub fn chompIdentGeneral(self: *Cursor) void {
-        while (self.pos < self.buf.len) {
-            const c = self.buf[self.pos];
-            if ((c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9') or c == '_') {
-                self.pos += 1;
-            } else {
-                const info = self.peekUnicode();
-                if (info.kind != UnicodeKind.Other and info.kind != UnicodeKind.Invalid) {
-                    self.pos += info.length;
-                } else {
-                    break;
-                }
-            }
-        }
-    }
 };
 
 /// The output of the tokenizer.
 pub const TokenOutput = struct {
     tokens: TokenizedBuffer,
     /// The messages slice is the same one provided by the caller.
-    messages: []Message,
+    messages: []Diagnostic,
     message_count: usize,
 };
 
@@ -688,7 +691,7 @@ pub const Tokenizer = struct {
 
     /// Creates a new Tokenizer.
     /// Note that the caller must also provide a pre-allocated messages buffer.
-    pub fn init(text: []const u8, messages: []Message, gc: *GenCatData, allocator: std.mem.Allocator) !Tokenizer {
+    pub fn init(text: []const u8, messages: []Diagnostic, gc: *GenCatData, allocator: std.mem.Allocator) !Tokenizer {
         const cursor = Cursor.init(text, messages, gc);
         var output = try TokenizedBuffer.init(allocator);
         // Push an initial line with indent 0.
@@ -703,19 +706,23 @@ pub const Tokenizer = struct {
         };
     }
 
-    pub fn destroy(self: *Tokenizer) void {
-        // self.output.kinds.deinit();
-        // self.output.offsets.deinit();
-        // self.output.lengths.deinit();
-        // self.output.lines.deinit();
+    pub fn deinit(self: *Tokenizer) void {
+        self.output.deinit();
         self.stack.deinit();
-        self.gc.deinit();
     }
 
-    // A simplified equivalent to the Rust macros:
-    fn pushToken(self: *Tokenizer, kind: T, start: usize) !void {
+    pub fn finish_and_deinit(self: Tokenizer) TokenOutput {
+        self.stack.deinit();
+        return .{
+            .tokens = self.output,
+            .messages = self.cursor.messages,
+            .message_count = self.cursor.message_count,
+        };
+    }
+
+    fn pushToken(self: *Tokenizer, tag: Token.Tag, start: u32) !void {
         const len = self.cursor.pos - start;
-        try self.output.pushToken(kind, start, len);
+        try self.output.pushToken(tag, start, len);
     }
 
     fn consumeBraceCloseAndContinueStringInterp(self: *Tokenizer, brace: BraceKind) !void {
@@ -760,7 +767,7 @@ pub const Tokenizer = struct {
         }
     }
 
-    /// The main tokenize loop.
+    /// The main tokenize loop. This loops over the whole input buffer, tokenizing as it goes.
     pub fn tokenize(self: *Tokenizer) !void {
         var sawWhitespace: bool = true;
         while (self.cursor.pos < self.cursor.buf.len) {
@@ -787,40 +794,40 @@ pub const Tokenizer = struct {
                         if (n == '.') {
                             if (self.cursor.peekAt(2) == '.') {
                                 self.cursor.pos += 3;
-                                try self.output.pushToken(T.TripleDot, start, 3);
+                                try self.output.pushToken(.TripleDot, start, 3);
                             } else {
                                 self.cursor.pos += 2;
-                                try self.output.pushToken(T.DoubleDot, start, 2);
+                                try self.output.pushToken(.DoubleDot, start, 2);
                             }
                         } else if (n >= '0' and n <= '9') {
                             self.cursor.pos += 1;
                             self.cursor.chompInteger();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(if (sp) T.DotNumber else T.NoSpaceDotNumber, start, len);
+                            try self.output.pushToken(if (sp) .DotInt else .NoSpaceDotInt, start, len);
                         } else if (n >= 'a' and n <= 'z') {
                             self.cursor.pos += 1;
                             self.cursor.chompIdentGeneral();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(if (sp) T.DotLowerIdent else T.NoSpaceDotLowerIdent, start, len);
+                            try self.output.pushToken(if (sp) .DotLowerIdent else .NoSpaceDotLowerIdent, start, len);
                         } else if (n >= 'A' and n <= 'Z') {
                             self.cursor.pos += 1;
                             self.cursor.chompIdentGeneral();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(if (sp) T.DotUpperIdent else T.NoSpaceDotUpperIdent, start, len);
+                            try self.output.pushToken(if (sp) .DotUpperIdent else .NoSpaceDotUpperIdent, start, len);
                         } else if (n >= 0b11000000 and n <= 0xff) {
-                            const info = self.cursor.peekUnicode();
-                            switch (info.kind) {
+                            const info = self.cursor.decodeUnicode(n);
+                            switch (info.tag) {
                                 .LetterUpper => {
                                     self.cursor.pos += info.length;
                                     self.cursor.chompIdentGeneral();
                                     const len = self.cursor.pos - start;
-                                    try self.output.pushToken(if (sp) T.DotUpperIdent else T.NoSpaceDotUpperIdent, start, len);
+                                    try self.output.pushToken(if (sp) .DotUpperIdent else .NoSpaceDotUpperIdent, start, len);
                                 },
                                 .LetterNotUpper => {
                                     self.cursor.pos += info.length;
                                     self.cursor.chompIdentGeneral();
                                     const len = self.cursor.pos - start;
-                                    try self.output.pushToken(if (sp) T.DotLowerIdent else T.NoSpaceDotLowerIdent, start, len);
+                                    try self.output.pushToken(if (sp) .DotLowerIdent else .NoSpaceDotLowerIdent, start, len);
                                 },
                                 else => {
                                     self.cursor.pos += info.length;
@@ -829,13 +836,13 @@ pub const Tokenizer = struct {
                             }
                         } else if (n == '{') {
                             self.cursor.pos += 1;
-                            try self.output.pushToken(T.Dot, start, 2);
+                            try self.output.pushToken(.Dot, start, 2);
                         } else {
                             return error.UnhandledToken;
                         }
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.Dot, start, 1);
+                        try self.output.pushToken(.Dot, start, 1);
                     }
                 },
 
@@ -845,25 +852,25 @@ pub const Tokenizer = struct {
                     if (next) |n| {
                         if (n == '>') {
                             self.cursor.pos += 2;
-                            try self.output.pushToken(T.OpArrow, start, 2);
+                            try self.output.pushToken(.OpArrow, start, 2);
                         } else if (n == ' ' or n == '\t' or n == '\n' or n == '\r' or n == '#') {
                             self.cursor.pos += 1;
-                            try self.output.pushToken(T.OpBinaryMinus, start, 1);
+                            try self.output.pushToken(.OpBinaryMinus, start, 1);
                         } else if (n >= '0' and n <= '9' and sp) {
                             self.cursor.pos += 1;
                             while (self.cursor.pos < self.cursor.buf.len and std.ascii.isDigit(self.cursor.buf[self.cursor.pos])) {
                                 self.cursor.pos += 1;
                             }
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(T.Int, start, len);
+                            try self.output.pushToken(.Int, start, len);
                         } else {
                             self.cursor.pos += 1;
-                            const tokenType = if (sp) T.OpUnaryMinus else T.OpBinaryMinus;
+                            const tokenType: Token.Tag = if (sp) .OpUnaryMinus else .OpBinaryMinus;
                             try self.output.pushToken(tokenType, start, 1);
                         }
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(if (sp) T.OpUnaryMinus else T.OpBinaryMinus, start, 1);
+                        try self.output.pushToken(if (sp) .OpUnaryMinus else .OpBinaryMinus, start, 1);
                     }
                 },
 
@@ -871,10 +878,10 @@ pub const Tokenizer = struct {
                 '!' => {
                     if (self.cursor.peekAt(1) == '=') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpNotEquals, start, 2);
+                        try self.output.pushToken(.OpNotEquals, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpBang, start, 1);
+                        try self.output.pushToken(.OpBang, start, 1);
                     }
                 },
 
@@ -882,88 +889,88 @@ pub const Tokenizer = struct {
                 '&' => {
                     if (self.cursor.peekAt(1) == '&') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpAnd, start, 2);
+                        try self.output.pushToken(.OpAnd, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpAmpersand, start, 1);
+                        try self.output.pushToken(.OpAmpersand, start, 1);
                     }
                 },
 
                 // Comma (,)
                 ',' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.Comma, start, 1);
+                    try self.output.pushToken(.Comma, start, 1);
                 },
 
                 // Question mark (?)
                 '?' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.OpQuestion, start, 1);
+                    try self.output.pushToken(.OpQuestion, start, 1);
                 },
 
                 // Pipe (|)
                 '|' => {
                     if (self.cursor.peekAt(1) == '|') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpOr, start, 2);
+                        try self.output.pushToken(.OpOr, start, 2);
                     } else if (self.cursor.peekAt(1) == '>') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpPizza, start, 2);
+                        try self.output.pushToken(.OpPizza, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpBar, start, 1);
+                        try self.output.pushToken(.OpBar, start, 1);
                     }
                 },
 
                 // Plus (+)
                 '+' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.OpPlus, start, 1);
+                    try self.output.pushToken(.OpPlus, start, 1);
                 },
 
                 // Star (*)
                 '*' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.OpStar, start, 1);
+                    try self.output.pushToken(.OpStar, start, 1);
                 },
 
                 // Slash (/)
                 '/' => {
                     if (self.cursor.peekAt(1) == '/') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpDoubleSlash, start, 2);
+                        try self.output.pushToken(.OpDoubleSlash, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpSlash, start, 1);
+                        try self.output.pushToken(.OpSlash, start, 1);
                     }
                 },
 
                 // Backslash (\)
                 '\\' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.OpBackslash, start, 1);
+                    try self.output.pushToken(.OpBackslash, start, 1);
                 },
 
                 // Percent (%)
                 '%' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.OpPercent, start, 1);
+                    try self.output.pushToken(.OpPercent, start, 1);
                 },
 
                 // Caret (^)
                 '^' => {
                     self.cursor.pos += 1;
-                    try self.output.pushToken(T.OpCaret, start, 1);
+                    try self.output.pushToken(.OpCaret, start, 1);
                 },
 
                 // Greater-than (>)
                 '>' => {
                     if (self.cursor.peekAt(1) == '=') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpGreaterThanOrEq, start, 2);
+                        try self.output.pushToken(.OpGreaterThanOrEq, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpGreaterThan, start, 1);
+                        try self.output.pushToken(.OpGreaterThan, start, 1);
                     }
                 },
 
@@ -971,13 +978,13 @@ pub const Tokenizer = struct {
                 '<' => {
                     if (self.cursor.peekAt(1) == '=') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpLessThanOrEq, start, 2);
+                        try self.output.pushToken(.OpLessThanOrEq, start, 2);
                     } else if (self.cursor.peekAt(1) == '-') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpBackArrow, start, 2);
+                        try self.output.pushToken(.OpBackArrow, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpLessThan, start, 1);
+                        try self.output.pushToken(.OpLessThan, start, 1);
                     }
                 },
 
@@ -985,10 +992,10 @@ pub const Tokenizer = struct {
                 '=' => {
                     if (self.cursor.peekAt(1) == '=') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpEquals, start, 2);
+                        try self.output.pushToken(.OpEquals, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpAssign, start, 1);
+                        try self.output.pushToken(.OpAssign, start, 1);
                     }
                 },
 
@@ -996,53 +1003,42 @@ pub const Tokenizer = struct {
                 ':' => {
                     if (self.cursor.peekAt(1) == '=') {
                         self.cursor.pos += 2;
-                        try self.output.pushToken(T.OpColonEqual, start, 2);
+                        try self.output.pushToken(.OpColonEqual, start, 2);
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpColon, start, 1);
+                        try self.output.pushToken(.OpColon, start, 1);
                     }
                 },
 
-                // Open parenthesis (()
                 '(' => {
                     self.cursor.pos += 1;
-                    try self.stack.append(BraceKind.Round);
-                    try self.output.pushToken(T.OpenRound, start, 1);
-                },
-
-                // Close parenthesis ())
-                ')' => {
-                    try self.output.pushToken(T.CloseRound, start, 1);
-                    try self.consumeBraceCloseAndContinueStringInterp(.Round);
+                    try self.stack.append(.Round);
+                    try self.output.pushToken(.OpenRound, start, 1);
                 },
-
-                // Open square bracket ([)
                 '[' => {
                     self.cursor.pos += 1;
-                    try self.stack.append(BraceKind.Square);
-                    try self.output.pushToken(T.OpenSquare, start, 1);
+                    try self.stack.append(.Square);
+                    try self.output.pushToken(.OpenSquare, start, 1);
                 },
-
-                // Close square bracket (])
-                ']' => {
-                    try self.output.pushToken(T.CloseSquare, start, 1);
-                    try self.consumeBraceCloseAndContinueStringInterp(.Square);
-                },
-
-                // Open curly brace ({)
                 '{' => {
                     self.cursor.pos += 1;
-                    try self.stack.append(BraceKind.Curly);
-                    try self.output.pushToken(T.OpenCurly, start, 1);
+                    try self.stack.append(.Curly);
+                    try self.output.pushToken(.OpenCurly, start, 1);
                 },
 
-                // Close curly brace (})
+                ')' => {
+                    try self.output.pushToken(.CloseRound, start, 1);
+                    try self.consumeBraceCloseAndContinueStringInterp(.Round);
+                },
+                ']' => {
+                    try self.output.pushToken(.CloseSquare, start, 1);
+                    try self.consumeBraceCloseAndContinueStringInterp(.Square);
+                },
                 '}' => {
-                    try self.output.pushToken(T.CloseCurly, start, 1);
+                    try self.output.pushToken(.CloseCurly, start, 1);
                     try self.consumeBraceCloseAndContinueStringInterp(.Curly);
                 },
 
-                // Underscore (_)
                 '_' => {
                     const next = self.cursor.peekAt(1);
                     if (next) |n| {
@@ -1050,18 +1046,17 @@ pub const Tokenizer = struct {
                             self.cursor.pos += 2;
                             self.cursor.chompIdentGeneral();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(T.NamedUnderscore, start, len);
+                            try self.output.pushToken(.NamedUnderscore, start, len);
                         } else {
                             self.cursor.pos += 1;
-                            try self.output.pushToken(T.Underscore, start, 1);
+                            try self.output.pushToken(.Underscore, start, 1);
                         }
                     } else {
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.Underscore, start, 1);
+                        try self.output.pushToken(.Underscore, start, 1);
                     }
                 },
 
-                // At-sign (@)
                 '@' => {
                     const next = self.cursor.peekAt(1);
                     if (next) |n| {
@@ -1069,16 +1064,16 @@ pub const Tokenizer = struct {
                             self.cursor.pos += 2;
                             self.cursor.chompIdentGeneral();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(T.OpaqueName, start, len);
+                            try self.output.pushToken(.OpaqueName, start, len);
                         } else {
                             self.cursor.pushMessageHere(.OpaqueNameWithoutName);
                             self.cursor.pos += 1;
-                            try self.output.pushToken(T.OpaqueName, start, 1);
+                            try self.output.pushToken(.OpaqueName, start, 1);
                         }
                     } else {
                         self.cursor.pushMessageHere(.OpaqueNameWithoutName);
                         self.cursor.pos += 1;
-                        try self.output.pushToken(T.OpaqueName, start, 1);
+                        try self.output.pushToken(.OpaqueName, start, 1);
                     }
                 },
 
@@ -1086,21 +1081,23 @@ pub const Tokenizer = struct {
                 '0'...'9' => {
                     _ = self.cursor.chompNumber(b);
                     const len = self.cursor.pos - start;
-                    try self.output.pushToken(T.Int, start, len);
+                    try self.output.pushToken(.Int, start, len);
                 },
 
                 // Lowercase identifiers
                 'a'...'z' => {
+                    self.cursor.pos += 1;
                     _ = self.cursor.chompIdentLower();
                     const len = self.cursor.pos - start;
-                    try self.output.pushToken(T.LowerIdent, start, len);
+                    try self.output.pushToken(.LowerIdent, start, len);
                 },
 
                 // Uppercase identifiers
                 'A'...'Z' => {
-                    _ = self.cursor.chompIdentUpper();
+                    self.cursor.pos += 1;
+                    _ = self.cursor.chompIdentGeneral();
                     const len = self.cursor.pos - start;
-                    try self.output.pushToken(T.UpperIdent, start, len);
+                    try self.output.pushToken(.UpperIdent, start, len);
                 },
 
                 // String-like literal starting with a single or double quote
@@ -1113,19 +1110,19 @@ pub const Tokenizer = struct {
 
                 // first byte of a UTF-8 sequence
                 0b11000000...0xff => {
-                    const info = self.cursor.peekUnicode();
-                    switch (info.kind) {
+                    const info = self.cursor.decodeUnicode(b);
+                    switch (info.tag) {
                         .LetterUpper => {
                             self.cursor.pos += info.length;
                             self.cursor.chompIdentGeneral();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(T.UpperIdent, start, len);
+                            try self.output.pushToken(.UpperIdent, start, len);
                         },
                         .LetterNotUpper => {
                             self.cursor.pos += info.length;
                             self.cursor.chompIdentGeneral();
                             const len = self.cursor.pos - start;
-                            try self.output.pushToken(T.LowerIdent, start, len);
+                            try self.output.pushToken(.LowerIdent, start, len);
                         },
                         else => {
                             self.cursor.pos += info.length;
@@ -1146,10 +1143,10 @@ pub const Tokenizer = struct {
             _ = self.output.lines.pop();
         }
 
-        try self.pushToken(T.EndOfFile, 0);
+        try self.pushToken(.EndOfFile, 0);
     }
 
-    pub fn tokenizeStringLikeLiteral(self: *Tokenizer, term: u8) !T {
+    pub fn tokenizeStringLikeLiteral(self: *Tokenizer, term: u8) !Token.Tag {
         const start = self.cursor.pos;
         // Skip the initial quote.
         self.cursor.pos += 1;
@@ -1161,7 +1158,7 @@ pub const Tokenizer = struct {
         return try self.tokenizeStringLikeLiteralBody(false, term, start, multiline);
     }
 
-    pub fn tokenizeStringLikeLiteralBody(self: *Tokenizer, already_started: bool, term: u8, start: usize, multiline: bool) !T {
+    pub fn tokenizeStringLikeLiteralBody(self: *Tokenizer, already_started: bool, term: u8, start: u32, multiline: bool) !Token.Tag {
         var escape: bool = false;
         while (self.cursor.pos < self.cursor.buf.len) {
             const c = self.cursor.buf[self.cursor.pos];
@@ -1174,7 +1171,7 @@ pub const Tokenizer = struct {
                     'u' => {
                         escape = false;
                         self.cursor.pos += 1;
-                        self.cursor.require('(', MessageKind.InvalidUnicodeEscapeSequence);
+                        self.cursor.require('(', .InvalidUnicodeEscapeSequence);
                         while (true) {
                             if (self.cursor.peek() == ')') {
                                 self.cursor.pos += 1;
@@ -1224,53 +1221,45 @@ pub const Tokenizer = struct {
                     try self.stack.append(brace);
                     if (term == '"') {
                         if (already_started) {
-                            return T.StringPart;
+                            return .StringPart;
                         } else {
-                            return T.StringBegin;
+                            return .StringBegin;
                         }
                     } else {
                         std.debug.assert(term == '\'');
                         if (already_started) {
-                            return T.SingleQuotePart;
+                            return .SingleQuotePart;
                         } else {
-                            return T.SingleQuoteBegin;
+                            return .SingleQuoteBegin;
                         }
                     }
                 } else if (c == '\n') {
                     if (!multiline) {
                         self.cursor.pushMessage(.UnclosedString, @intCast(start), @intCast(self.cursor.pos));
-                        return T.String;
+                        return .String;
                     } else {
                         self.cursor.pos += 1;
                     }
                 } else {
                     if (!multiline and c == term) {
                         self.cursor.pos += 1;
-                        return T.String;
+                        return .String;
                     } else if (multiline and c == term and self.cursor.peekAt(1) == term and self.cursor.peekAt(2) == term) {
                         self.cursor.pos += 3;
-                        return T.String;
+                        return .String;
                     }
                     self.cursor.pos += 1;
                 }
             }
         }
-        const kind: MessageKind = if (term == '"') .UnclosedString else .UnclosedSingleQuote;
-        self.cursor.pushMessage(kind, @intCast(start), @intCast(self.cursor.pos));
+        const diag: Diagnostic.Tag = if (term == '"') .UnclosedString else .UnclosedSingleQuote;
+        self.cursor.pushMessage(diag, start, self.cursor.pos);
         if (already_started) {
-            return if (term == '"') T.StringPart else T.SingleQuotePart;
+            return if (term == '"') .StringPart else .SingleQuotePart;
         } else {
-            return if (term == '"') T.String else T.SingleQuote;
+            return if (term == '"') .String else .SingleQuote;
         }
     }
-
-    pub fn finish(self: Tokenizer) TokenOutput {
-        return .{
-            .tokens = self.output,
-            .messages = self.cursor.messages,
-            .message_count = self.cursor.message_count,
-        };
-    }
 };
 
 fn tokenizeAndCheckMessages(gc: *GenCatData, base_dir: std.fs.Dir, file_path: []const u8) !void {
@@ -1289,11 +1278,12 @@ fn tokenizeAndCheckMessages(gc: *GenCatData, base_dir: std.fs.Dir, file_path: []
     }
 
     const alloc = std.heap.page_allocator;
-    var messages: [128]Message = undefined;
+    var messages: [128]Diagnostic = undefined;
     const messages_slice = messages[0..];
     var tokenizer = try Tokenizer.init(buffer, messages_slice, gc, alloc);
     try tokenizer.tokenize();
-    const result = tokenizer.finish();
+    var result = tokenizer.finish_and_deinit();
+    defer result.tokens.deinit();
 
     if (result.message_count != 0) {
         try stdout.print("Messages:\n", .{});
@@ -1304,7 +1294,7 @@ fn tokenizeAndCheckMessages(gc: *GenCatData, base_dir: std.fs.Dir, file_path: []
                 break;
             }
             i += 1;
-            try stdout.print("  {s} from {d} to {d}\n", .{ @tagName(msg.kind), msg.begin, msg.end });
+            try stdout.print("  {s} from {d} to {d}\n", .{ @tagName(msg.tag), msg.begin, msg.end });
         }
 
         // Print the whole file with error caret markers inserted.
@@ -1354,7 +1344,7 @@ fn tokenizeAndCheckMessages(gc: *GenCatData, base_dir: std.fs.Dir, file_path: []
                 @memset(caret_line[highlight_start..caret_line_len], '^');
                 // Write the caret line.
                 try stdout.writeAll(caret_line);
-                try stdout.print(" {s}", .{@tagName(result.messages[msg_index].kind)});
+                try stdout.print(" {s}", .{@tagName(result.messages[msg_index].tag)});
                 try stdout.writeAll("\n");
                 std.heap.page_allocator.free(caret_line);
 
diff --git a/src/check/parse/tokenize/src/root.zig b/src/check/parse/tokenize/src/root.zig
deleted file mode 100644
index ecfeade1a3..0000000000
--- a/src/check/parse/tokenize/src/root.zig
+++ /dev/null
@@ -1,10 +0,0 @@
-const std = @import("std");
-const testing = std.testing;
-
-export fn add(a: i32, b: i32) i32 {
-    return a + b;
-}
-
-test "basic add functionality" {
-    try testing.expect(add(3, 7) == 10);
-}