diff --git a/src/base.zig b/src/base.zig new file mode 100644 index 0000000000..8a518d2b88 --- /dev/null +++ b/src/base.zig @@ -0,0 +1,52 @@ +const region = @import("base/region.zig"); +const symbol = @import("base/symbol.zig"); +const module = @import("base/module.zig"); +const package = @import("base/package.zig"); +const primitive = @import("base/primitive.zig"); +const env = @import("base/env.zig"); + +pub const Region = region.Region; +pub const Position = region.Position; +pub const LineAndColumn = region.LineAndColumn; + +pub const Ident = symbol.Ident; +pub const IdentAttributes = symbol.IdentAttributes; +pub const IdentProblems = symbol.IdentProblems; +pub const IdentId = symbol.IdentId; +pub const IdentStore = symbol.IdentStore; +pub const Symbol = symbol.Symbol; +pub const SymbolStore = symbol.SymbolStore; + +pub const Module = module.Module; +pub const ModuleId = module.ModuleId; +pub const ModuleStore = module.ModuleStore; + +pub const Package = package.Package; +pub const PackageId = package.PackageId; +pub const PackageStore = package.PackageStore; + +pub const Primitive = primitive.Primitive; +pub const Literal = primitive.Literal; + +pub const ModuleEnv = env.ModuleEnv; +pub const GlobalEnv = env.GlobalEnv; + +pub const Recursive = enum { + NotRecursive, + Recursive, + TailRecursive, +}; + +// TODO: can this be smaller than u32? +/// Source of crash, and its runtime representation to roc_panic. +pub const CrashOrigin = enum(u32) { + /// The crash is due to Roc, either via a builtin or type error. + Roc = 0, + /// The crash is user-defined. + User = 1, +}; + +pub const LowLevel = .{}; + +// TODO: move to relevant stages +pub const TypeVar = struct { id: u32 }; diff --git a/src/base/env.zig b/src/base/env.zig new file mode 100644 index 0000000000..159d1ec664 --- /dev/null +++ b/src/base/env.zig @@ -0,0 +1,74 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const cols = @import("../collections.zig"); +const problem = @import("../problem.zig"); + +pub const ModuleEnv = struct { + symbols: base.SymbolStore, + modules: base.ModuleStore, + strings: cols.LargeStringInterner, + tag_names: cols.TagNameInterner, + tag_ids_for_slicing: cols.SafeList(cols.TagNameId), + field_names: cols.FieldNameInterner, + field_ids_for_slicing: cols.SafeList(cols.FieldNameId), + problems: cols.SafeList(problem.Problem), + // TODO: where are these used, and how do we manage them? + // pub tuple_elem_indices: Vec, + // pub record_fields: Vec>, + + pub fn init(allocator: std.mem.Allocator) ModuleEnv { + return ModuleEnv{ + .symbols = base.SymbolStore.init(allocator), + .modules = base.ModuleStore.init(allocator), + .strings = cols.LargeStringInterner.init(allocator), + .tag_names = cols.TagNameInterner.init(allocator), + .tag_ids_for_slicing = cols.SafeList(cols.TagNameId).init(allocator), + .field_names = cols.FieldNameInterner.init(allocator), + .field_ids_for_slicing = cols.SafeList(cols.FieldNameId).init(allocator), + .problems = cols.SafeList(problem.Problem).init(allocator), + }; + } + + pub fn deinit(self: *ModuleEnv) void { + self.symbols.deinit(); + self.modules.deinit(); + self.strings.deinit(); + self.tag_names.deinit(); + self.tag_ids_for_slicing.deinit(); + self.field_names.deinit(); + self.field_ids_for_slicing.deinit(); + self.problems.deinit(); + } + + pub fn addTagNameSlice( + self: *ModuleEnv, + name_ids: []cols.TagNameId, + ) cols.SafeList(cols.TagNameId).Slice { + return self.tag_ids_for_slicing.appendSlice(name_ids); + } + + pub fn addFieldNameSlice( + self: *ModuleEnv, + name_ids: []cols.FieldNameId, + ) cols.SafeList(cols.FieldNameId).Slice { + return self.field_ids_for_slicing.appendSlice(name_ids); + } +}; + +pub const GlobalEnv = struct { + // TODO: do we need this if each module manages this? + modules: base.ModuleStore, + packages: base.PackageStore, + + pub fn init(allocator: std.mem.Allocator) GlobalEnv { + return GlobalEnv{ + .modules = base.ModuleStore.init(allocator), + .packages = base.PackageStore.init(allocator), + }; + } + + pub fn deinit(self: *GlobalEnv) void { + self.modules.deinit(); + self.packages.deinit(); + } +}; diff --git a/src/base/module.zig b/src/base/module.zig new file mode 100644 index 0000000000..479a65102f --- /dev/null +++ b/src/base/module.zig @@ -0,0 +1,75 @@ +const std = @import("std"); +const cols = @import("../collections.zig"); + +pub const ModuleId = struct { id: u32 }; + +pub const Module = struct { + name: []u8, + package_shorthand: ?[]u8, + is_builtin: bool, +}; + +pub const ModuleStore = struct { + modules: cols.SafeMultiList(Module), + + pub fn init(allocator: std.mem.Allocator) ModuleStore { + const modules = cols.SafeMultiList(Module).init(allocator); + modules.append(Module{ + .id = 0, + .name = &.{}, + .base_name = &.{}, + .package_shorthand = null, + .is_builtin = false, + }); + + // TODO: insert builtins automatically? + + return ModuleStore{ .modules = modules }; + } + + pub fn deinit(self: *ModuleStore) void { + self.modules.deinit(); + } + + pub fn lookup(self: *ModuleStore, name: []const u8, package_shorthand: ?[]const u8) ?ModuleId { + const items = self.modules.items; + + for (0..self.modules.len()) |index| { + const other_name = items.items(.name_segments)[index]; + if (name == other_name) { + const other_package_shorthand = items.items(.package_shorthand)[index]; + if (other_package_shorthand == package_shorthand) { + return ModuleId{ .id = @as(u32, index) }; + } + } + } + + return null; + } + + pub fn getOrInsert( + self: *ModuleStore, + name: []const u8, + package_shorthand: ?[]const u8, + ) ModuleId { + if (self.lookup(name, package_shorthand)) |id| { + return id; + } else { + const new_id = self.modules.insert(Module{ + .name = name, + .package_shorthand = package_shorthand, + .is_builtin = false, + }); + + return ModuleId{ .id = new_id.id }; + } + } + + pub fn getName(self: *ModuleStore, id: ModuleId) []u8 { + return self.modules.items.items(.name)[@as(usize, id.id)]; + } + + pub fn getPackageShorthand(self: *ModuleStore, id: ModuleId) ?[]u8 { + return self.modules.items.items(.package_shorthand)[@as(usize, id.id)]; + } +}; diff --git a/src/base/package.zig b/src/base/package.zig new file mode 100644 index 0000000000..dbf3f1c757 --- /dev/null +++ b/src/base/package.zig @@ -0,0 +1,29 @@ +const std = @import("std"); + +pub const PackageId = struct { id: u32 }; + +pub const Package = struct { + id: PackageId, + /// The BLAKE3 hash of the tarball's contents. Also the .tar filename on disk. + content_hash: []u8, + /// On disk, this will be the subfolder inside the cache dir where the package lives + cache_subdir: []u8, + /// Other code will default this to main.roc, but this module isn't concerned with that default. + root_module_filename: ?[]u8, +}; + +pub const PackageStore = struct { + allocator: std.mem.Allocator, + packages: std.MultiArrayList(Package), + + pub fn init(allocator: std.mem.Allocator) PackageStore { + return PackageStore{ + .allocator = allocator, + .packages = std.MultiArrayList(Package), + }; + } + + pub fn deinit(self: *PackageStore) void { + self.packages.deinit(); + } +}; diff --git a/src/base/primitive.zig b/src/base/primitive.zig new file mode 100644 index 0000000000..9c20feb14d --- /dev/null +++ b/src/base/primitive.zig @@ -0,0 +1,70 @@ +const cols = @import("../collections.zig"); + +// TODO: figure out how to combine enums and union(enum)s at comptime +// to avoid them being multilevel + +pub const Primitive = union(enum) { + Int: Int, + Float: Float, + Bool, + Str, + Crash, + + pub const Int = enum { + U8, + I8, + U16, + I16, + U32, + I32, + U64, + I64, + U128, + I128, + }; + + pub const Float = enum { + F32, + F64, + Dec, + }; + + pub const Num = union(enum) { + Int: Int, + Float: Float, + }; +}; + +pub const Literal = union(enum) { + Int: Int, + Float: Float, + Bool: bool, + Str: cols.LargeStringId, + Crash: cols.LargeStringId, + + pub const Int = union(enum) { + I8: i8, + U8: u8, + I16: i16, + U16: u16, + I32: i32, + U32: u32, + I64: i64, + U64: u64, + I128: i128, + U128: u128, + }; + + pub const Float = union(enum) { + F32: f32, + F64: f64, + // We represent Dec as a large int divided by 10^18, which is the maximum + // number of decimal places that allows lossless conversion of U64 to Dec + Dec: u128, + }; + + pub const Num = union(enum) { + Int: Int, + Float: Float, + }; +}; diff --git a/src/base/region.zig b/src/base/region.zig new file mode 100644 index 0000000000..0ecdcb54bb --- /dev/null +++ b/src/base/region.zig @@ -0,0 +1,34 @@ +const std = @import("std"); + +pub const Region = struct { + start: Position, + end: Position, + + pub fn format(self: *const Region, comptime fmt: []const u8, _: std.fmt.FormatOptions, writer: anytype) !void { + if (fmt.len != 0) { + std.fmt.invalidFmtError(fmt, self); + } + + if ((self.start == Position.zero()) and (self.end == Position.zero())) { + // In tests, it's super common to set all Located values to 0. + // Also in tests, we don't want to bother printing the locations + // because it makes failed assertions much harder to read. + return writer.print("…", .{}); + } else { + return writer.print("@{}-{}", .{ self.start.offset, self.end.offset }); + } + } +}; + +pub const Position = struct { + offset: u32, + + pub fn zero() Position { + return Position{ .offset = 0 }; + } +}; + +pub const LineAndColumn = packed struct(u32) { + line: u20, + column: u12, +}; diff --git a/src/base/symbol.zig b/src/base/symbol.zig new file mode 100644 index 0000000000..ba5a7ddb70 --- /dev/null +++ b/src/base/symbol.zig @@ -0,0 +1,137 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const cols = @import("../collections.zig"); +const problem = @import("../problem.zig"); +const module = @import("module.zig"); + +pub const IdentId = packed struct(u32) { + attributes: IdentAttributes, + id: u29, +}; + +pub const Ident = struct { + raw_text: []u8, + attributes: IdentAttributes, + problems: IdentProblems, + + pub fn for_text(text: []u8) Ident { + return Ident{ + .raw_text = text, + .attributes = IdentAttributes{}, + .problems = IdentProblems{}, + }; + // TODO: parse idents and their attributes/problems + } +}; + +// this information could be built up during parsing +pub const IdentAttributes = packed struct(u3) { + effectful: bool, + ignored: bool, + reassignable: bool, +}; + +// for example we detect two underscores in a row during parsing... we can make a problem and report +// it to the user later as a warning, but still allow the program to run +pub const IdentProblems = packed struct { + // TODO: add more problem cases + subsequent_underscores: bool, + + pub fn has_problems(self: *IdentProblems) bool { + return self.subsequent_underscores; + } +}; + +pub const IdentStore = struct { + interner: cols.SmallStringInterner, + regions: std.AutoHashMap(IdentId, base.Region), + + pub fn init(allocator: std.mem.Allocator) IdentStore { + return IdentStore{ + .interner = cols.SmallStringInterner.init(allocator), + .regions = std.AutoHashMap(u32, base.Region).init(allocator), + }; + } + + pub fn deinit(self: *IdentStore) void { + self.interner.deinit(); + self.regions.deinit(); + } + + pub fn insert( + self: *IdentStore, + ident: Ident, + region: base.Region, + problems: *std.ArrayList(problem.Problem), + ) IdentId { + if (ident.problems.has_problems()) { + problems.push(.IdentIssue{ .problems = ident.problems, .region = region }); + } + + const id = self.interner.insert(ident.raw_text); + self.regions.put(id.id, region); + + return IdentId{ .attributes = ident.attibutes, .id = @as(u29, id.id) }; + } + + pub fn getText(self: *IdentStore, ident_id: IdentId) []u8 { + return self.interner.get(cols.SmallStringId{ .id = @as(u32, ident_id.id) }); + } + + pub fn getRegion(self: *IdentStore, ident_id: IdentId) base.Region { + return self.regions.get(ident_id).?; + } +}; + +pub const Symbol = struct { + ident_id: IdentId, + module_id: module.ModuleId, +}; + +pub const SymbolStore = struct { + // One per moduleId, so we can use ModuleId as index + ident_stores: std.ArrayList(IdentStore), + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) SymbolStore { + return SymbolStore{ + .ident_stores = std.ArrayList(IdentStore).init(allocator), + .allocator = allocator, + }; + } + + pub fn deinit(self: *SymbolStore) void { + for (self.ident_stores.items) |ident_store| { + ident_store.deinit(); + } + + self.ident_stores.deinit(); + } + + pub fn insert( + self: *SymbolStore, + ident: Ident, + region: base.Region, + module_id: base.ModuleId, + problems: *std.ArrayList(problem.Problem), + ) Symbol { + while (@as(u32, self.ident_stores.items.len) <= module_id.id) { + self.ident_stores.append(IdentStore.init(self.allocator)) catch cols.exit_on_oom; + } + + const ident_store = self.ident_stores.items[@as(usize, module_id.id)]; + const ident_id = ident_store.insert(ident, region, problems); + + return Symbol{ .ident_id = ident_id, .module_id = module_id }; + } + + pub fn getText(self: *SymbolStore, symbol: Symbol) []u8 { + const ident_store = self.ident_stores.items[@as(usize, symbol.module_id.id)]; + return ident_store.getText(symbol.ident_id); + } + + pub fn getRegion(self: *SymbolStore, symbol: Symbol) base.Region { + const ident_store = self.ident_stores.items[@as(usize, symbol.module_id.id)]; + return ident_store.getRegion(symbol.ident_id); + } +}; diff --git a/src/build/lift_functions.zig b/src/build/lift_functions.zig new file mode 100644 index 0000000000..aad67f1e41 --- /dev/null +++ b/src/build/lift_functions.zig @@ -0,0 +1,21 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const type_spec = @import("specialize_types.zig"); + +const lift_functions = @This(); +pub const IR = @import("lift_functions/ir.zig"); + +/// Lift all closures to the top-level and leave behind closure captures +/// +/// after this step, the program has no more implicit closures +/// +/// Implementation notes from Ayaz https://github.com/roc-lang/rfcs/blob/b4731508b60bf0e69d41083f09a5738123dfcefe/0102-compiling-lambda-sets.md#function_lift +pub fn liftFunctions( + ir: type_spec.IR, + other_modules: std.HashMap(base.ModuleId, lift_functions.IR), +) lift_functions.IR { + _ = ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/build/lift_functions/ir.zig b/src/build/lift_functions/ir.zig new file mode 100644 index 0000000000..0420fb8ed9 --- /dev/null +++ b/src/build/lift_functions/ir.zig @@ -0,0 +1,183 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); +const problem = @import("../../problem.zig"); + +pub const IR = struct { + env: *base.ModuleEnv, + exprs: cols.SafeList(Expr), + expr_regions: cols.SafeList(base.Region), + patterns: cols.SafeList(Pattern), + types: cols.SafeList(Type), + + pub fn init(env: *base.ModuleEnv, allocator: std.mem.Allocator) IR { + return IR{ + .env = env, + .exprs = cols.SafeList(Expr).init(allocator), + .expr_regions = cols.SafeList(base.Region).init(allocator), + .patterns = cols.SafeList(Pattern).init(allocator), + .types = cols.SafeList(Type).init(allocator), + }; + } + + pub fn deinit(self: *IR) void { + self.exprs.deinit(); + self.expr_regions.deinit(); + self.patterns.deinit(); + self.types.deinit(); + } +}; + +pub const TypeId = cols.SafeList(Type).Id; +pub const TypeSlice = cols.SafeList(Type).Slice; +pub const TypeNonEmptySlice = cols.SafeList(Type).NonEmptySlice; + +pub const Type = union(enum) { + Primitive: base.Primitive, + Box: TypeId, + List: TypeId, + Struct: TypeNonEmptySlice, + TagUnion: TypeNonEmptySlice, + FunctionPack: struct { + /// zero fields means no captures + opt_fields: TypeSlice, + }, +}; + +const ExprId = cols.SafeList(Expr).Id; +const ExprSlice = cols.SafeList(Expr).Slice; +const ExprNonEmptySlice = cols.SafeList(Expr).NonEmptySlice; + +const Expr = union(enum) { + Let: Def, + Str: cols.LargeStringId, + Number: base.Number, + List: struct { + elem_type: TypeId, + elems: ExprSlice, + }, + LocalLookup: struct { + ident: base.IdentId, + type: TypeId, + }, + ModuleLookup: struct { + ident: base.IdentId, + module: base.ModuleId, + type: TypeId, + }, + + /// This is *only* for calling functions, not for tag application. + /// The Tag variant contains any applied values inside it. + Call: struct { + fn_type: TypeId, + fn_expr: ExprId, + args: TypedExprSlice, + }, + + FunctionPack: struct { + fn_ident: base.IdentId, + captures: TypedPatternSlice, + }, + + Unit, + + Tag: struct { + discriminant: u16, + tag_union_type: TypeId, + args: TypedExprSlice, + }, + + When: struct { + /// The value being matched on + value: ExprId, + /// The type of the value being matched on + value_type: TypeId, + /// The return type of all branches and thus the whole when expression + branch_type: TypeId, + /// The branches of the when expression + branches: cols.NonEmptySlice(WhenBranch), + }, + + CompilerBug: problem.LiftFunctionsProblem, +}; + +const Def = struct { + pattern: PatternId, + /// Named variables in the pattern, e.g. `a` in `Ok a ->` + pattern_vars: TypedIdentSlice, + expr: ExprId, + expr_type: TypeId, +}; + +const WhenBranch = struct { + /// The pattern(s) to match the value against + patterns: PatternNonEmptySlice, + /// A boolean expression that must be true for this branch to be taken + guard: ?ExprId, + /// The expression to produce if the pattern matches + value: ExprId, +}; + +const WhenBranches = struct { + branches: cols.SafeList(WhenBranch), +}; + +pub const PatternId = cols.SafeList(Pattern).Id; +pub const PatternSlice = cols.SafeList(Pattern).Slice; +pub const PatternNonEmptySlice = cols.SafeList(Pattern).NonEmptySlice; + +pub const Pattern = union(enum) { + Identifier: base.IdentId, + As: struct { + inner_pattern: PatternId, + ident: base.IdentId, + }, + StrLiteral: cols.LargeStringId, + NumberLiteral: base.NumberLiteral, + AppliedTag: struct { + tag_union_type: TypeId, + tag_name: base.IdentId, + args: PatternSlice, + }, + StructDestructure: struct { + struct_type: TypeId, + destructs: RecordDestructSlice, + opt_spread: ?TypedPattern, + }, + List: struct { + elem_type: TypeId, + patterns: PatternSlice, + + /// Where a rest pattern splits patterns before and after it, if it does at all. + /// If present, patterns at index >= the rest index appear after the rest pattern. + /// For example: + /// [ .., A, B ] -> patterns = [A, B], rest = 0 + /// [ A, .., B ] -> patterns = [A, B], rest = 1 + /// [ A, B, .. ] -> patterns = [A, B], rest = 2 + /// Optionally, the rest pattern can be named - e.g. `[ A, B, ..others ]` + opt_rest: ?.{ u16, ?base.IdentId }, + }, + Underscore, + CompilerBug: problem.LiftFunctionsProblem, +}; + +const RecordDestructSlice = cols.SafeMultiList(RecordDestruct).Slice; + +pub const RecordDestruct = struct { + ident: base.IdentId, + field: base.FieldNameId, + type: DestructType, +}; + +pub const DestructType = union(enum) { + Required, + Guard: TypedPattern, +}; + +const TypedExpr = struct { type: TypeId, expr: ExprId }; +const TypedIdent = struct { ident: base.IdentId, type: TypeId }; +const TypedPattern = struct { pattern: PatternId, type: TypeId }; + +const TypedExprSlice = cols.SafeMultiList(TypedExpr).Slice; +const TypedIdentSlice = cols.SafeMultiList(TypedIdent).Slice; +const TypedPatternSlice = cols.SafeMultiList(TypedPattern).Slice; diff --git a/src/build/lower_statements.zig b/src/build/lower_statements.zig new file mode 100644 index 0000000000..6d4880f7b4 --- /dev/null +++ b/src/build/lower_statements.zig @@ -0,0 +1,19 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const func_spec = @import("specialize_functions.zig"); + +const lower_statements = @This(); +pub const IR = @import("lower_statements/ir.zig"); + +/// Convert expressions into statements for consumption by codegen. +/// +/// Implementation notes from Ayaz https://github.com/roc-lang/rfcs/blob/ayaz/compile-with-lambda-sets/0102-compiling-lambda-sets.md#lower_ir +pub fn lowerStatements( + ir: func_spec.IR, + other_modules: std.HashMap(base.ModuleId, lower_statements.IR), +) lower_statements.IR { + _ = ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/build/lower_statements/ir.zig b/src/build/lower_statements/ir.zig new file mode 100644 index 0000000000..6170b0dc6c --- /dev/null +++ b/src/build/lower_statements/ir.zig @@ -0,0 +1,255 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); +const problem = @import("../../problem.zig"); + +pub const IR = struct { + env: *base.ModuleEnv, + procs: std.AutoHashMap(base.IdentId, Procedure), + exprs: cols.SafeList(Expr), + layouts: cols.SafeList(Layout), + stmts: cols.SafeList(Stmt), + + pub fn init(env: *base.ModuleEnv, allocator: std.mem.Allocator) IR { + return IR{ + .env = env, + .procs = std.AutoHashMap(base.IdentId, Procedure).init(allocator), + .exprs = cols.SafeList(Expr).init(allocator), + .layouts = cols.SafeList(Layout).init(allocator), + .stmts = cols.SafeList(Stmt).init(allocator), + }; + } + + pub fn deinit(self: *IR) void { + self.procs.deinit(); + self.exprs.deinit(); + self.layouts.deinit(); + self.stmts.deinit(); + } +}; + +pub const Procedure = struct { + arguments: cols.SafeMultiList(IdentWithLayout).Slice, + body: StmtId, + return_layout: LayoutId, +}; + +// TODO: is this necessary? +pub const TagIdIntType = u16; + +pub const LayoutId = cols.SafeList(Layout).Id; +pub const LayoutSlice = cols.SafeList(Layout).Slice; +pub const LayoutNonEmptySlice = cols.SafeList(Layout).NonEmptySlice; + +pub const Layout = union(enum) { + Primitive: base.Primitive, + Box: LayoutId, + List: LayoutId, + Struct: LayoutNonEmptySlice, + TagUnion: LayoutNonEmptySlice, + // probably necessary for returning empty structs, but would be good to remove this if that's not the case + Unit, +}; + +// pub const IdentWithLayout = struct { +// ident: base.IdentId, +// layout: LayoutId, +// }; + +pub const SymbolWithLayout = struct { + symbol: base.Symbol, + layout: LayoutId, +}; + +// TODO: should these use `NonEmptySlice`s? +// +// Copied (and adapted) from: +// https://github.com/roc-lang/roc/blob/689c58f35e0a39ca59feba549f7fcf375562a7a6/crates/compiler/mono/src/layout.rs#L733 +pub const UnionLayout = union(enum) { + // TODO +}; + +pub const ExprId = cols.SafeList(Expr).Id; +pub const ExprSlice = cols.SafeList(Expr).Slice; +pub const ExprNonEmptySlice = cols.SafeList(Expr).NonEmptySlice; + +// TODO: which of `Expr` or `Stmt` should hold the CompilerBug: LowerIrProblem? + +pub const Expr = union(enum) { + Literal: base.Literal, + + // Functions + Call: Call, + + Tag: struct { + // TODO: should this be an index instead? + tag_layout: UnionLayout, + tag_id: TagIdIntType, + arguments: cols.SafeList(base.IdentId).Slice, + }, + Struct: cols.SafeList(base.IdentId).NonEmptySlice, + NullPointer, + StructAtIndex: struct { + index: u64, + field_layouts: []LayoutId, + structure: base.IdentId, + }, + + GetTagId: struct { + structure: usize, //Symbol, + union_layout: usize, //UnionLayout, + }, + + UnionAtIndex: struct { + structure: usize, //Symbol, + tag_id: usize, //TagIdIntType, + union_layout: usize, //UnionLayout, + index: u64, + }, + + GetElementPointer: struct { + structure: usize, //Symbol + union_layout: usize, //UnionLayout, + indices: []u64, + }, + + Array: struct { + elem_layout: LayoutId, + elems: cols.SafeList(ListLiteralElem).Slice, + }, + + EmptyArray, + + /// Returns a pointer to the given function. + FunctionPointer: struct { + symbol: usize, //Symbol, + }, + + Alloca: struct { + element_layout: LayoutId, + initializer: ?usize, //?Symbol, + }, + + Reset: struct { + symbol: usize, //Symbol, + }, + + // Just like Reset, but does not recursively decrement the children. + // Used in reuse analysis to replace a decref with a resetRef to avoid decrementing when the dec ref didn't. + ResetRef: struct { + symbol: usize, //Symbol, + }, +}; + +pub const ListLiteralElem = union(enum) { + StringLiteralId: []const u8, + Number: base.NumberLiteral, + Symbol: usize, //Symbol, +}; + +pub const CallType = union(enum) { + ByName: struct { + symbol: usize, //Symbol, + ret_layout: LayoutId, + arg_layouts: []LayoutId, + }, + ByPointer: struct { + pointer: usize, //Symbol, + ret_layout: LayoutId, + arg_layouts: []LayoutId, + }, + // Foreign: struct { + // foreign_symbol: usize, //ForeignSymbolId, + // ret_layout: LayoutId, + // }, + // LowLevel: struct { + // op: usize, //LowLevel, + // }, + // TODO: presumably these should be removed in an earlier stage + // HigherOrder(&'a HigherOrderLowLevel<'a>), +}; + +pub const Call = struct { + // TODO: consider putting `call_type` in a `Vec` in `IR` + call_type: CallType, + arguments: cols.SafeList(base.IdentId).Slice, +}; + +pub const StmtId = cols.SafeList(Stmt).Id; +pub const StmtSlice = cols.SafeList(Stmt).Slice; +pub const StmtNonEmptySlice = cols.SafeList(Stmt).NonEmptySlice; + +pub const Stmt = union(enum) { + Let: struct { + ident: base.IdentId, + expr: ExprId, + layout: ExprId, + continuation: StmtId, + }, + Switch: struct { + /// This *must* stand for an integer, because Switch potentially compiles to a jump table. + cond_ident: base.IdentId, + // TODO: can we make this layout a number type? + cond_layout: LayoutId, + /// The u64 in the tuple will be compared directly to the condition Expr. + /// If they are equal, this branch will be taken. + branches: Branch, + /// If no other branches pass, this default branch will be taken. + default_branch: struct { + info: Branch.Kind, + stmt: StmtId, + }, + /// Each branch must return a value of this type. + ret_layout: LayoutId, + }, + Ret: base.IdentId, + /// a join point `join f = in remainder` + Join: struct { + id: JoinPointId, + parameters: cols.SafeList(Param).Slice, + /// body of the join point + /// what happens after _jumping to_ the join point + body: StmtId, + /// what happens after _defining_ the join point + remainder: StmtId, + }, + Jump: struct { + join_point: JoinPointId, + idents: cols.SafeList(base.IdentId).Slice, + }, + Crash: struct { + ident: base.IdentId, + tag: base.CrashOrigin, + }, +}; + +pub const Branch = struct { + discriminant: u64, + kind: Kind, + stmt: StmtId, + + /// in the block below, symbol `scrutinee` is assumed be be of shape `tag_id` + pub const Kind = union(enum) { + None, + Constructor: struct { + scrutinee: base.Symbol, + layout: LayoutId, + tag_id: TagIdIntType, + }, + List: struct { + scrutinee: base.Symbol, + len: u64, + }, + Unique: struct { + scrutinee: base.Symbol, + unique: bool, + }, + }; +}; + +pub const JoinPointId = base.IdentId; + +pub const Param = struct { + ident: base.IdentId, + layout: LayoutId, +}; diff --git a/src/build/reference_count.zig b/src/build/reference_count.zig new file mode 100644 index 0000000000..b67b87d5a9 --- /dev/null +++ b/src/build/reference_count.zig @@ -0,0 +1,17 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const lower = @import("lower_statements.zig"); + +const reference_count = @This(); +pub const IR = @import("reference_count/ir.zig"); + +/// Check ownership of function arguments and add refcounting instructions where necessary. +pub fn referenceCount( + ir: lower.IR, + other_modules: std.HashMap(base.ModuleId, reference_count.IR), +) reference_count.IR { + _ = ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/build/reference_count/ir.zig b/src/build/reference_count/ir.zig new file mode 100644 index 0000000000..533247a243 --- /dev/null +++ b/src/build/reference_count/ir.zig @@ -0,0 +1,280 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); +const problem = @import("../../problem.zig"); + +pub const IR = struct { + env: *base.ModuleEnv, + procs: std.AutoHashMap(base.IdentId, Procedure), + exprs: cols.SafeList(Expr), + layouts: cols.SafeList(Layout), + stmts: cols.SafeList(Stmt), + + pub fn init(env: *base.ModuleEnv, allocator: std.mem.Allocator) IR { + return IR{ + .env = env, + .procs = std.AutoHashMap(base.IdentId, Procedure).init(allocator), + .exprs = cols.SafeList(Expr).init(allocator), + .layouts = cols.SafeList(Layout).init(allocator), + .stmts = cols.SafeList(Stmt).init(allocator), + }; + } + + pub fn deinit(self: *IR) void { + self.procs.deinit(); + self.exprs.deinit(); + self.layouts.deinit(); + self.stmts.deinit(); + } +}; + +pub const Procedure = struct { + arguments: cols.SafeMultiList(IdentWithLayout).Slice, + body: StmtId, + return_layout: LayoutId, +}; + +// TODO: is this necessary? +pub const TagIdIntType = u16; + +pub const LayoutId = cols.SafeList(Layout).Id; +pub const LayoutSlice = cols.SafeList(Layout).Slice; +pub const LayoutNonEmptySlice = cols.SafeList(Layout).NonEmptySlice; + +pub const Layout = union(enum) { + Primitive: base.Primitive, + Box: LayoutId, + List: LayoutId, + Struct: LayoutNonEmptySlice, + TagUnion: LayoutNonEmptySlice, + // probably necessary for returning empty structs, but would be good to remove this if that's not the case + Unit, +}; + +// pub const IdentWithLayout = struct { +// ident: base.IdentId, +// layout: LayoutId, +// }; + +pub const SymbolWithLayout = struct { + symbol: base.Symbol, + layout: LayoutId, +}; + +// TODO: should these use `NonEmptySlice`s? +// +// Copied (and adapted) from: +// https://github.com/roc-lang/roc/blob/689c58f35e0a39ca59feba549f7fcf375562a7a6/crates/compiler/mono/src/layout.rs#L733 +pub const UnionLayout = union(enum) { + // TODO +}; + +pub const ExprId = cols.SafeList(Expr).Id; +pub const ExprSlice = cols.SafeList(Expr).Slice; +pub const ExprNonEmptySlice = cols.SafeList(Expr).NonEmptySlice; + +// TODO: which of `Expr` or `Stmt` should hold the CompilerBug: LowerIrProblem? + +pub const Expr = union(enum) { + Literal: base.Literal, + + // Functions + Call: Call, + + Tag: struct { + // TODO: should this be an index instead? + tag_layout: UnionLayout, + tag_id: TagIdIntType, + arguments: cols.SafeList(base.IdentId).Slice, + }, + Struct: cols.SafeList(base.IdentId).NonEmptySlice, + NullPointer, + StructAtIndex: struct { + index: u64, + field_layouts: []LayoutId, + structure: base.IdentId, + }, + + GetTagId: struct { + structure: usize, //Symbol, + union_layout: usize, //UnionLayout, + }, + + UnionAtIndex: struct { + structure: usize, //Symbol, + tag_id: usize, //TagIdIntType, + union_layout: usize, //UnionLayout, + index: u64, + }, + + GetElementPointer: struct { + structure: usize, //Symbol + union_layout: usize, //UnionLayout, + indices: []u64, + }, + + Array: struct { + elem_layout: LayoutId, + elems: cols.SafeList(ListLiteralElem).Slice, + }, + + EmptyArray, + + /// Returns a pointer to the given function. + FunctionPointer: struct { + symbol: usize, //Symbol, + }, + + Alloca: struct { + element_layout: LayoutId, + initializer: ?usize, //?Symbol, + }, + + Reset: struct { + symbol: usize, //Symbol, + }, + + // Just like Reset, but does not recursively decrement the children. + // Used in reuse analysis to replace a decref with a resetRef to avoid decrementing when the dec ref didn't. + ResetRef: struct { + symbol: usize, //Symbol, + }, +}; + +pub const ListLiteralElem = union(enum) { + StringLiteralId: []const u8, + Number: base.NumberLiteral, + Symbol: usize, //Symbol, +}; + +pub const CallType = union(enum) { + ByName: struct { + symbol: usize, //Symbol, + ret_layout: LayoutId, + arg_layouts: []LayoutId, + }, + ByPointer: struct { + pointer: usize, //Symbol, + ret_layout: LayoutId, + arg_layouts: []LayoutId, + }, + // Foreign: struct { + // foreign_symbol: usize, //ForeignSymbolId, + // ret_layout: LayoutId, + // }, + // LowLevel: struct { + // op: usize, //LowLevel, + // }, + // TODO: presumably these should be removed in an earlier stage + // HigherOrder(&'a HigherOrderLowLevel<'a>), +}; + +pub const Call = struct { + // TODO: consider putting `call_type` in a `Vec` in `IR` + call_type: CallType, + arguments: cols.SafeList(base.IdentId).Slice, +}; + +pub const StmtId = cols.SafeList(Stmt).Id; +pub const StmtSlice = cols.SafeList(Stmt).Slice; +pub const StmtNonEmptySlice = cols.SafeList(Stmt).NonEmptySlice; + +pub const Stmt = union(enum) { + Let: struct { + ident: base.IdentId, + expr: ExprId, + layout: ExprId, + continuation: StmtId, + }, + Switch: struct { + /// This *must* stand for an integer, because Switch potentially compiles to a jump table. + cond_ident: base.IdentId, + // TODO: can we make this layout a number type? + cond_layout: LayoutId, + /// The u64 in the tuple will be compared directly to the condition Expr. + /// If they are equal, this branch will be taken. + branches: Branch, + /// If no other branches pass, this default branch will be taken. + default_branch: struct { + info: Branch.Kind, + stmt: StmtId, + }, + /// Each branch must return a value of this type. + ret_layout: LayoutId, + }, + Ret: base.IdentId, + RefCount: struct { + symbol: base.Symbol, + change: ModifyRefCount, + }, + /// a join point `join f = in remainder` + Join: struct { + id: JoinPointId, + parameters: cols.SafeList(Param).Slice, + /// body of the join point + /// what happens after _jumping to_ the join point + body: StmtId, + /// what happens after _defining_ the join point + remainder: StmtId, + }, + Jump: struct { + join_point: JoinPointId, + idents: cols.SafeList(base.IdentId).Slice, + }, + Crash: struct { + ident: base.IdentId, + tag: base.CrashOrigin, + }, +}; + +pub const ModifyRefCount = union(enum) { + /// Increment a reference count + Inc: usize, // (Symbol, u64), + + /// Decrement a reference count + Dec: usize, //(Symbol), + + /// A DecRef is a non-recursive reference count decrement + /// e.g. If we Dec a list of lists, then if the reference count of the outer list is one, + /// a Dec will recursively decrement all elements, then free the memory of the outer list. + /// A DecRef would just free the outer list. + /// That is dangerous because you may not free the elements, but in our Zig builtins, + /// sometimes we know we already dealt with the elements (e.g. by copying them all over + /// to a new list) and so we can just do a DecRef, which is much cheaper in such a case. + DecRef: usize, //(Symbol), + + /// Unconditionally deallocate the memory. For tag union that do pointer tagging (store the tag + /// id in the pointer) the backend has to clear the tag id! + Free: usize, //(Symbol), +}; + +pub const Branch = struct { + discriminant: u64, + kind: Kind, + stmt: StmtId, + + /// in the block below, symbol `scrutinee` is assumed be be of shape `tag_id` + pub const Kind = union(enum) { + None, + Constructor: struct { + scrutinee: base.Symbol, + layout: LayoutId, + tag_id: TagIdIntType, + }, + List: struct { + scrutinee: base.Symbol, + len: u64, + }, + Unique: struct { + scrutinee: base.Symbol, + unique: bool, + }, + }; +}; + +pub const JoinPointId = base.IdentId; + +pub const Param = struct { + ident: base.IdentId, + layout: LayoutId, +}; diff --git a/src/build/solve_functions.zig b/src/build/solve_functions.zig new file mode 100644 index 0000000000..9844bfc3f1 --- /dev/null +++ b/src/build/solve_functions.zig @@ -0,0 +1,22 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const func_lift = @import("lift_functions.zig"); + +const solve_functions = @This(); +pub const IR = @import("solve_functions/ir.zig"); + +/// Annotate the generic-ness of each function at the top-level +/// +/// infer all sets of functions passed to higher-order function (HOF) calls +/// after this step, every call to a HOF is assigned a variable with the set of functions passed (function set) or a generalized variable +/// +/// Implementation notes from Ayaz https://github.com/roc-lang/rfcs/blob/b4731508b60bf0e69d41083f09a5738123dfcefe/0102-compiling-lambda-sets.md#function_solve +pub fn solveFunctions( + ir: func_lift.IR, + other_modules: std.HashMap(base.ModuleId, solve_functions.IR), +) solve_functions.IR { + _ = ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/build/solve_functions/ir.zig b/src/build/solve_functions/ir.zig new file mode 100644 index 0000000000..0080755ebb --- /dev/null +++ b/src/build/solve_functions/ir.zig @@ -0,0 +1,191 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); +const problem = @import("../../problem.zig"); + +pub const IR = struct { + env: *base.ModuleEnv, + exprs: cols.SafeList(Expr), + expr_regions: cols.SafeList(base.Region), + patterns: cols.SafeList(Pattern), + types: cols.SafeList(Type), + + pub fn init(env: *base.ModuleEnv, allocator: std.mem.Allocator) IR { + return IR{ + .env = env, + .exprs = cols.SafeList(Expr).init(allocator), + .expr_regions = cols.SafeList(base.Region).init(allocator), + .patterns = cols.SafeList(Pattern).init(allocator), + .types = cols.SafeList(Type).init(allocator), + }; + } + + pub fn deinit(self: *IR) void { + self.exprs.deinit(); + self.expr_regions.deinit(); + self.patterns.deinit(); + self.types.deinit(); + } +}; + +pub const TypeId = cols.SafeList(Type).Id; +pub const TypeSlice = cols.SafeList(Type).Slice; +pub const TypeNonEmptySlice = cols.SafeList(Type).NonEmptySlice; + +pub const Type = union(enum) { + Primitive: base.Primitive, + Box: TypeId, + List: TypeId, + Struct: TypeNonEmptySlice, + TagUnion: TypeNonEmptySlice, + // TODO: can this go somewhere outside of the main function union? + FunctionPack: struct { + /// zero fields means no captures + opt_fields: TypeSlice, + }, +}; + +pub const ExprId = cols.SafeLift(Expr).Id; +pub const ExprSlice = cols.SafeLift(Expr).Slice; +pub const ExprNonEmptySlice = cols.SafeLift(Expr).NonEmptySlice; + +pub const Expr = union(enum) { + Let: Def, + Str: cols.LargeStringId, + Number: base.NumberLiteral, + List: struct { + elem_type: TypeId, + elems: ExprSlice, + }, + Lookup: TypedIdent, + + /// This is *only* for calling functions, not for tag application. + /// The Tag variant contains any applied values inside it. + Call: struct { + fn_type: TypeId, + fn_expr: ExprId, + args: cols.MultiArrayList(TypedExpr).Slice, + }, + + FunctionPack: struct { + fn_ident: base.IdentId, + captures: std.MultiArrayList(TypedExpr).Slice, + }, + + Unit, + + Struct: ExprNonEmptySlice, + + /// Look up exactly one field on a record, tuple, or tag payload. + /// At this point we've already unified those concepts and have + /// converted (for example) record field names to indices, and have + /// also dropped all fields that have no runtime representation (e.g. empty records). + /// + /// In a later compilation phase, these indices will be re-sorted + /// by alignment and converted to byte offsets, but we in this + /// phase we aren't concerned with alignment or sizes, just indices. + StructAccess: struct { + record_expr: ExprId, + record_type: TypeId, + field_type: TypeId, + field_id: cols.FieldNameId, + }, + + Tag: struct { + discriminant: u16, + tag_union_type: TypeId, + args: std.MultiArrayList(TypedExpr).Slice, + }, + + When: struct { + /// The value being matched on + value: ExprId, + /// The type of the value being matched on + value_type: TypeId, + /// The return type of all branches and thus the whole when expression + branch_type: TypeId, + /// The branches of the when expression + branches: cols.SafeList(WhenBranch).NonEmptySlice, + }, + + CompilerBug: problem.SolveFunctionsProblem, +}; + +pub const Def = struct { + pattern: PatternId, + /// Named variables in the pattern, e.g. `a` in `Ok a ->` + pattern_vars: std.MultiArrayList(TypedIdent).Slice, + expr: ExprId, + expr_type: TypeId, +}; + +pub const WhenBranch = struct { + /// The pattern(s) to match the value against + patterns: PatternNonEmptySlice, + /// A boolean expression that must be true for this branch to be taken + guard: ?ExprId, + /// The expression to produce if the pattern matches + value: ExprId, +}; + +pub const WhenBranches = struct { + branches: usize, //Vec>, +}; + +pub const PatternId = cols.SafeList(Pattern).Id; +pub const PatternSlice = cols.SafeList(Pattern).Slice; +pub const PatternNonEmptySlice = cols.SafeList(Pattern).NonEmptySlice; + +pub const Pattern = union(enum) { + Identifier: base.IdentId, + As: struct { + pattern: PatternId, + ident: base.IdentId, + }, + StrLiteral: cols.LargeStringId, + NumberLiteral: base.NumberLiteral, + AppliedTag: struct { + tag_union_type: TypeId, + tag_name: base.IdentId, + args: []PatternId, + }, + StructDestructure: struct { + struct_type: TypeId, + destructs: std.MultiArrayList(StructDestruct).Slice, + opt_spread: ?TypedPattern, + }, + List: struct { + elem_type: TypeId, + patterns: PatternSlice, + + /// Where a rest pattern splits patterns before and after it, if it does at all. + /// If present, patterns at index >= the rest index appear after the rest pattern. + /// For example: + /// [ .., A, B ] -> patterns = [A, B], rest = 0 + /// [ A, .., B ] -> patterns = [A, B], rest = 1 + /// [ A, B, .. ] -> patterns = [A, B], rest = 2 + /// Optionally, the rest pattern can be named - e.g. `[ A, B, ..others ]` + opt_rest: ?.{ u16, ?base.IdentId }, + }, + Underscore, + CompilerBug: problem.SpecializeTypesProblem, +}; + +pub const StructDestruct = struct { + ident: base.IdentId, + field: cols.FieldNameId, + destruct_type: DestructType, +}; + +pub const DestructType = union(enum) { + Required, + Guard: TypedPattern, +}; + +pub const TypedExpr = struct { expr: ExprId, type: TypeId }; +pub const TypedIdent = struct { ident: base.IdentId, type: TypeId }; +pub const TypedPattern = struct { pattern: PatternId, type: TypeId }; + +const TypedExprSlice = cols.SafeMultiList(TypedExpr).Slice; +const TypedIdentSlice = cols.SafeMultiList(TypedIdent).Slice; +const TypedPatternSlice = cols.SafeMultiList(TypedPattern).Slice; diff --git a/src/build/specialize_functions.zig b/src/build/specialize_functions.zig new file mode 100644 index 0000000000..05afe851b5 --- /dev/null +++ b/src/build/specialize_functions.zig @@ -0,0 +1,19 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const func_solve = @import("solve_functions.zig"); + +const specialize_functions = @This(); +pub const IR = @import("specialize_functions/ir.zig"); + +/// Make each generic function concrete by representing each set of potential captures using a tag union as an argument. +/// +/// Implementation notes from Ayaz https://github.com/roc-lang/rfcs/blob/b4731508b60bf0e69d41083f09a5738123dfcefe/0102-compiling-lambda-sets.md#function_specialize +pub fn specializeFunctions( + ir: func_solve.IR, + other_modules: std.HashMap(base.ModuleId, specialize_functions.IR), +) specialize_functions.IR { + _ = ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/build/specialize_functions/ir.zig b/src/build/specialize_functions/ir.zig new file mode 100644 index 0000000000..d4f1eb93aa --- /dev/null +++ b/src/build/specialize_functions/ir.zig @@ -0,0 +1,177 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); +const problem = @import("../../problem.zig"); + +pub const IR = struct { + env: *base.ModuleEnv, + exprs: cols.SafeList(Expr), + expr_regions: cols.SafeList(base.Region), + patterns: cols.SafeList(Pattern), + types: cols.SafeList(Type), + + pub fn init(env: *base.ModuleEnv, allocator: std.mem.Allocator) IR { + return IR{ + .env = env, + .exprs = cols.SafeList(Expr).init(allocator), + .expr_regions = cols.SafeList(base.Region).init(allocator), + .patterns = cols.SafeList(Pattern).init(allocator), + .types = cols.SafeList(Type).init(allocator), + }; + } + + pub fn deinit(self: *IR) void { + self.exprs.deinit(); + self.expr_regions.deinit(); + self.patterns.deinit(); + self.types.deinit(); + } +}; + +pub const TypeId = cols.SafeList(Type).Id; +pub const TypeSlice = cols.SafeList(Type).Slice; +pub const TypeNonEmptySlice = cols.SafeList(Type).NonEmptySlice; + +pub const Type = union(enum) { + Primitive: base.Primitive, + Box: TypeId, + List: TypeId, + Struct: TypeNonEmptySlice, + TagUnion: TypeNonEmptySlice, +}; + +pub const ExprId = cols.SafeList(Expr).Id; +pub const ExprSlice = cols.SafeList(Expr).Slice; +pub const ExprNonEmptySlice = cols.SafeList(Expr).NonEmptySlice; + +pub const Expr = union(enum) { + Let: Def, + Str: cols.LargeStringId, + Number: base.NumberLiteral, + List: struct { + elem_type: TypeId, + elems: ExprSlice, + }, + LocalLookup: struct { + ident: base.IdentId, + type: TypeId, + }, + ModuleLookup: struct { + ident: base.IdentId, + module: base.ModuleId, + type: TypeId, + }, + FunctionCall: struct { + fn_type: TypeId, + fn_ident: base.IdentId, + args: std.MultiArrayList(TypedExpr).Slice, + }, + + Unit, + + Struct: ExprNonEmptySlice, + StructAccess: struct { + record_expr: ExprId, + record_type: TypeId, + field_type: TypeId, + field_id: base.FieldNameId, + }, + + Tag: struct { + discriminant: u16, + tag_union_type: TypeId, + args: std.MultiArrayList(TypedExpr).Slice, + }, + + When: struct { + /// The value being matched on + value: ExprId, + /// The type of the value being matched on + value_type: TypeId, + /// The return type of all branches and thus the whole when expression + branch_type: TypeId, + /// The branches of the when expression + branches: cols.SafeList(WhenBranch).NonEmptySlice, + }, + + CompilerBug: problem.SpecializeFunctionsProblem, +}; + +pub const Def = struct { + pattern: PatternId, + /// Named variables in the pattern, e.g. `a` in `Ok a ->` + pattern_vars: std.MultiArrayList(TypedIdent).Slice, + expr: ExprId, + expr_type: TypeId, +}; + +pub const WhenBranch = struct { + /// The pattern(s) to match the value against + patterns: PatternNonEmptySlice, + /// A boolean expression that must be true for this branch to be taken + guard: ?ExprId, + /// The expression to produce if the pattern matches + value: ExprId, +}; + +pub const WhenBranches = struct { + // branches: Vec>, +}; + +pub const PatternId = cols.SafeList(Pattern).Id; +pub const PatternSlice = cols.SafeList(Pattern).Slice; +pub const PatternNonEmptySlice = cols.SafeList(Pattern).NonEmptySlice; + +pub const Pattern = union(enum) { + Identifier: base.IdentId, + As: struct { + pattern: PatternId, + ident: base.IdentId, + }, + StrLiteral: cols.LargeStringId, + NumberLiteral: base.NumberLiteral, + AppliedTag: struct { + tag_union_type: TypeId, + tag_name: base.IdentId, + args: PatternSlice, + }, + StructDestructure: struct { + struct_type: TypeId, + destructs: std.MultiArrayList(StructDestruct).Slice, + opt_spread: ?.TypedPattern, + }, + List: struct { + elem_type: TypeId, + patterns: PatternSlice, + + /// Where a rest pattern splits patterns before and after it, if it does at all. + /// If present, patterns at index >= the rest index appear after the rest pattern. + /// For example: + /// [ .., A, B ] -> patterns = [A, B], rest = 0 + /// [ A, .., B ] -> patterns = [A, B], rest = 1 + /// [ A, B, .. ] -> patterns = [A, B], rest = 2 + /// Optionally, the rest pattern can be named - e.g. `[ A, B, ..others ]` + opt_rest: ?struct { position: u16, ident: ?base.IdentId }, + }, + Underscore, + CompilerBug: problem.SpecializeFunctionsProblem, +}; + +pub const StructDestruct = struct { + ident: base.IdentId, + field: base.FieldNameId, + destruct_type: DestructType, +}; + +pub const DestructType = union(enum) { + Required, + Guard: TypedPattern, +}; + +pub const TypedExpr = struct { expr: ExprId, type: TypeId }; +pub const TypedIdent = struct { ident: base.IdentId, type: TypeId }; +pub const TypedPattern = struct { pattern: PatternId, type: TypeId }; + +pub const TypedExprSlice = cols.SafeMultiList(TypedExpr).Slice; +pub const TypedIdentSlice = cols.SafeMultiList(TypedIdent).Slice; +pub const TypedPatternSlice = cols.SafeMultiList(TypedPattern).Slice; diff --git a/src/build/specialize_types.zig b/src/build/specialize_types.zig new file mode 100644 index 0000000000..440fbc89c7 --- /dev/null +++ b/src/build/specialize_types.zig @@ -0,0 +1,22 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const resolve = @import("../check/resolve_imports.zig"); + +const specialize_types = @This(); +pub const IR = @import("specialize_types/ir.zig").IR; + +/// Create a copy of every function in the program, by walking from the entrypoint down the tree, replacing type variables with concrete types. +/// +/// replace all calls to generic functions with concrete instances +/// after this step, the program has no generic types +/// +/// Implementation notes from Ayaz https://github.com/roc-lang/rfcs/blob/ayaz/compile-with-lambda-sets/0102-compiling-lambda-sets.md#type_specialize +pub fn specializeTypes( + resolve_ir: resolve.IR, + other_modules: std.HashMap(base.ModuleId, specialize_types.IR), +) specialize_types.IR { + _ = resolve_ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/build/specialize_types/ir.zig b/src/build/specialize_types/ir.zig new file mode 100644 index 0000000000..f26521087f --- /dev/null +++ b/src/build/specialize_types/ir.zig @@ -0,0 +1,200 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); +const problem = @import("../../problem.zig"); + +pub const IR = struct { + env: *base.ModuleEnv, + exprs: cols.SafeList(Expr), + expr_regions: cols.SafeList(base.Region), + patterns: cols.SafeList(Pattern), + types: cols.SafeList(Type), + + pub fn init(env: *base.ModuleEnv, allocator: std.mem.Allocator) IR { + return IR{ + .env = env, + .exprs = cols.SafeList(Expr).init(allocator), + .expr_regions = cols.SafeList(base.Region).init(allocator), + .patterns = cols.SafeList(Pattern).init(allocator), + .types = cols.SafeList(Type).init(allocator), + }; + } + + pub fn deinit(self: *IR) void { + self.exprs.deinit(); + self.expr_regions.deinit(); + self.patterns.deinit(); + self.types.deinit(); + } +}; + +pub const TypeId = cols.SafeList(Type).Id; +pub const TypeSlice = cols.SafeList(Type).Slice; +pub const TypeNonEmptySlice = cols.SafeList(Type).NonEmptySlice; + +pub const Type = union(enum) { + Primitive: base.Primitive, + Box: TypeId, + List: TypeId, + Struct: cols.SafeList(TypeId).NonEmptySlice, + TagUnion: cols.SafeList(TypeId).NonEmptySlice, + Func: struct { + ret_then_args: cols.SafeList(TypeId).NonEmptySlice, + }, +}; + +pub const ExprId = cols.SafeList(Expr).Id; +pub const ExprSlice = cols.SafeList(Expr).Slice; +pub const ExprNonEmptySlice = cols.SafeList(Expr).NonEmptySlice; + +pub const Expr = union(enum) { + Let: Def, + Str: cols.LargeStringId, + Number: base.NumberLiteral, + List: struct { + elem_type: TypeId, + elems: ExprSlice, + }, + LocalLookup: struct { + ident: base.IdentId, + type: TypeId, + }, + ModuleLookup: struct { + ident: base.IdentId, + module: base.ModuleId, + type: TypeId, + }, + + Call: struct { + fn_type: TypeId, + fn_expr: ExprId, + args: std.MultiArrayList(TypedExpr).Slice, + }, + + Lambda: struct { + fn_type: TypeId, + arguments: cols.SafeList(TypedPattern).Slice, + body: ExprId, + recursive: base.Recursive, + }, + + Unit, + + /// A record literal or a tuple literal. + /// These have already been sorted alphabetically. + Struct: cols.SafeList(ExprId).NonEmptySlice, + + /// Look up exactly one field on a record, tuple, or tag payload. + /// At this point we've already unified those concepts and have + /// converted (for example) record field names to indices, and have + /// also dropped all fields that have no runtime representation (e.g. empty records). + /// + /// In a later compilation phase, these indices will be re-sorted + /// by alignment and converted to byte offsets, but we in this + /// phase we aren't concerned with alignment or sizes, just indices. + StructAccess: struct { + record_expr: ExprId, + record_type: TypeId, + field_type: TypeId, + field_id: cols.FieldNameId, + }, + + /// Same as SmallTag but with u16 discriminant instead of u8 + Tag: struct { + discriminant: u16, + tag_union_type: TypeId, + args: std.MultiArrayList(TypedExpr).Slice, + }, + + When: struct { + /// The value being matched on + value: ExprId, + /// The type of the value being matched on + value_type: TypeId, + /// The return type of all branches and thus the whole when expression + branch_type: TypeId, + /// The branches of the when expression + branches: cols.SafeList(WhenBranch).NonEmptySlice, + }, + + CompilerBug: problem.SpecializeTypesProblem, +}; + +pub const Def = struct { + pattern: PatternId, + /// Named variables in the pattern, e.g. `a` in `Ok a ->` + pattern_vars: std.MultiArrayList(TypedIdent).Slice, + expr: ExprId, + expr_type: TypeId, +}; + +pub const WhenBranch = struct { + /// The pattern(s) to match the value against + patterns: cols.SafeList(Pattern).NonEmptySlice, + /// A boolean expression that must be true for this branch to be taken + guard: ?ExprId, + /// The expression to produce if the pattern matches + value: ExprId, +}; + +pub const WhenBranches = struct { + // branches: Vec>, +}; + +pub const PatternId = cols.SafeList(Pattern).Id; +pub const PatternSlice = cols.SafeList(Pattern).Slice; +pub const PatternNonEmptySlice = cols.SafeList(Pattern).NonEmptySlice; + +pub const StructDestruct = struct { + ident: base.IdentId, + field: cols.FieldNameId, + destruct_type: DestructType, +}; + +pub const Pattern = union(enum) { + Identifier: base.IdentId, + As: struct { + pattern: PatternId, + ident: base.IdentId, + }, + StrLiteral: cols.LargeStringId, + NumberLiteral: base.NumberLiteral, + AppliedTag: struct { + tag_union_type: TypeId, + tag_name: base.IdentId, + args: cols.SafeList(PatternId).Slice, + }, + StructDestructure: struct { + struct_type: TypeId, + destructs: std.MultiArrayList(StructDestruct).Slice, + opt_spread: ?TypedPattern, + }, + List: struct { + elem_type: TypeId, + patterns: cols.SafeList(PatternId).Slice, + + /// Where a rest pattern splits patterns before and after it, if it does at all. + /// If present, patterns at index >= the rest index appear after the rest pattern. + /// For example: + /// [ .., A, B ] -> patterns = [A, B], rest = 0 + /// [ A, .., B ] -> patterns = [A, B], rest = 1 + /// [ A, B, .. ] -> patterns = [A, B], rest = 2 + /// Optionally, the rest pattern can be named - e.g. `[ A, B, ..others ]` + opt_rest: ?.{ u16, ?base.IdentId }, + }, + Underscore, + CompilerBug: problem.SpecializeTypesProblem, +}; + +pub const DestructType = union(enum) { + Required, + Guard: TypedPattern, +}; + +pub const TypedExpr = struct { pattern: PatternId, type: TypeId }; +pub const TypedIdent = struct { pattern: PatternId, type: TypeId }; +pub const TypedPattern = struct { pattern: PatternId, type: TypeId }; + +pub const TypedExprSlice = cols.SafeMultiList(TypedExpr).Slice; +pub const TypedIdentSlice = cols.SafeMultiList(TypedIdent).Slice; +pub const TypedPatternSlice = cols.SafeMultiList(TypedPattern).Slice; diff --git a/src/check/canonicalize.zig b/src/check/canonicalize.zig new file mode 100644 index 0000000000..30ffc108ee --- /dev/null +++ b/src/check/canonicalize.zig @@ -0,0 +1,16 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const parse = @import("parse.zig"); + +const can = @This(); +pub const IR = @import("ir.zig").IR; + +pub fn canonicalize( + parse_ir: parse.IR, + other_modules: std.HashMap(base.ModuleId, can.IR), +) can.IR { + _ = parse_ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/check/canonicalize/ir.zig b/src/check/canonicalize/ir.zig new file mode 100644 index 0000000000..1a3db30b26 --- /dev/null +++ b/src/check/canonicalize/ir.zig @@ -0,0 +1,355 @@ +const std = @import("std"); +const base = @import("../../base.zig"); +const cols = @import("../../collections.zig"); + +const TypeVar = base.TypeVar; + +pub const IR = struct { + env: *base.SoloModuleEnv, + module_id: base.ModuleId, + // exposed_imports: std.AutoHashMap(comptime K: type, comptime V: type) + // MutMap, + // exposed_symbols: std.AutoHashMap(base.IdentId, .{}), + // referenced_values: VecSet, + /// all aliases. `bool` indicates whether it is exposed + // aliases: MutMap, + rigid_variables: RigidVariables, + exprs: cols.SafeList(Expr), + exprs_at_regions: cols.SafeMultiList(ExprAtRegion), + typed_exprs_at_regions: cols.SafeMultiList(TypedExprAtRegion), +}; + +// TODO: don't use symbol in this module, no imports really exist yet? + +const ExprId = cols.SafeList(Expr).Id; + +pub const Expr = union(enum) { + // Literals + + // Num stores the `a` variable in `Num a`. Not the same as the variable + // stored in Int and Float below, which is strictly for better error messages + Num: struct { + num_var: TypeVar, + literal: base.SmallStringId, + value: IntValue, + bound: NumBound, + }, + + // Int and Float store a variable to generate better error messages + Int: struct { + num_var: TypeVar, + precision_var: TypeVar, + literal: base.SmallStringId, + value: IntValue, + bound: IntBound, + }, + Float: struct { + num_var: TypeVar, + precision_var: TypeVar, + literal: base.SmallStringId, + value: f64, + bound: FloatBound, + }, + Str: base.LargeStringId, + // Number variable, precision variable, value, bound + SingleQuote: struct { + num_var: TypeVar, + precision_var: TypeVar, + value: u32, + bound: SingleQuoteBound, + }, + List: struct { + elem_var: TypeVar, + loc_elems: ExprAtRegionSlice, + }, + + // Lookups + Var: struct { + symbol: base.Symbol, + type_var: TypeVar, + }, + + // Branching + When: WhenId, + If: struct { + cond_var: TypeVar, + branch_var: TypeVar, + branches: IfBranchSlice, + final_else: ExprAtRegionId, + }, + + // Let + LetRec: struct { + defs: DefSlice, + cont: ExprAtRegionId, + cycle_mark: IllegalCycleMark, + }, + + /// This is *only* for calling functions, not for tag application. + /// The Tag variant contains any applied values inside it. + Call: struct { + // TODO: + // Box<(Variable, Loc, Variable, Variable)>, + args: TypedExprAtRegionSlice, + called_via: CalledVia, + }, + + Closure: ClosureData, + + // Product Types + Record: struct { + record_var: Variable, + // TODO: + // fields: SendMap, + }, + + /// Empty record constant + EmptyRecord, + + Tuple: struct { + tuple_var: TypeVar, + elems: TypedExprAtRegionSlice, + }, + + /// The "crash" keyword + Crash: struct { + msg: ExprAtRegionId, + ret_var: Variable, + }, + + /// Look up exactly one field on a record, e.g. (expr).foo. + RecordAccess: struct { + record_var: TypeVar, + ext_var: TypeVar, + field_var: TypeVar, + loc_expr: ExprAtRegionId, + field: Lowercase, + }, + + // Sum Types + Tag: struct { + tag_union_var: TypeVar, + ext_var: TypeVar, + name: TagName, + arguments: TypedExprAtRegionSlice, + }, + + ZeroArgumentTag: struct { + closure_name: Symbol, + variant_var: TypeVar, + ext_var: TypeVar, + name: TagName, + }, + + /// Compiles, but will crash if reached + RuntimeError: RuntimeError, +}; + +pub const IntValue = struct { + bytes: [16]u8, + kind: Kind, + + pub const Kind = enum { I128, U128 }; +}; + +pub const ExprAtRegionId = cols.SafeMultiList(ExprAtRegion).Id; +pub const ExprAtRegionSlice = cols.SafeMultiList(ExprAtRegion).Slice; + +pub const ExprAtRegion = struct { + expr: ExprId, + region: base.Region, +}; + +pub const TypedExprAtRegionSlice = cols.SafeMultiList(TypedExprAtRegion).Slice; + +pub const TypedExprAtRegion = struct { + expr: ExprId, + type_var: TypeVar, + region: base.Region, +}; + +pub const PatternAtRegion = struct { + pattern: PatternId, + region: base.Region, +}; + +pub const Function = struct { + return_var: TypeVar, + fx_var: TypeVar, + function_var: TypeVar, + expr: ExprId, + region: base.Region, +}; + +pub const IfBranchSlice = cols.SafeMultiList(IfBranch).Slice; + +pub const IfBranch = struct { + cond: ExprAtRegion, + body: ExprAtRegion, +}; + +pub const WhenId = cols.SafeMultiList(When).Id; + +pub const When = struct { + /// The actual condition of the when expression. + loc_cond: ExprAtRegionId, + cond_var: TypeVar, + /// Type of each branch (and therefore the type of the entire `when` expression) + expr_var: TypeVar, + region: base.Region, + /// The branches of the when, and the type of the condition that they expect to be matched + /// against. + branches: WhenBranchSlice, + branches_cond_var: TypeVar, + /// Whether the branches are exhaustive. + exhaustive: ExhaustiveMark, +}; + +pub const WhenBranchPatternSlice = cols.SafeMultiList(WhenBranchPattern).Slice; + +pub const WhenBranchPattern = struct { + pattern: PatternAtRegion, + /// Degenerate branch patterns are those that don't fully bind symbols that the branch body + /// needs. For example, in `A x | B y -> x`, the `B y` pattern is degenerate. + /// Degenerate patterns emit a runtime error if reached in a program. + degenerate: bool, +}; + +pub const WhenBranchSlice = cols.SafeMultiList(WhenBranch).Slice; + +pub const WhenBranch = struct { + patterns: WhenBranchPatternSlice, + value: ExprAtRegionId, + guard: ?ExprAtRegionId, + /// Whether this branch is redundant in the `when` it appears in + redundant: RedundantMark, +}; + + +/// A pattern, including possible problems (e.g. shadowing) so that +/// codegen can generate a runtime error if this pattern is reached. +pub const Pattern = union(enum) { + Identifier: Symbol, + As(Box>, Symbol), + AppliedTag { + whole_var: Variable, + ext_var: Variable, + tag_name: TagName, + arguments: Vec<(Variable, Loc)>, + }, + UnwrappedOpaque { + whole_var: Variable, + opaque: Symbol, + argument: Box<(Variable, Loc)>, + + // The following help us link this opaque reference to the type specified by its + // definition, which we then use during constraint generation. For example + // suppose we have + // + // Id n := [Id U64 n] + // strToBool : Str -> Bool + // + // f = \@Id who -> strToBool who + // + // Then `opaque` is "Id", `argument` is "who", but this is not enough for us to + // infer the type of the expression as "Id Str" - we need to link the specialized type of + // the variable "n". + // That's what `specialized_def_type` and `type_arguments` are for; they are specialized + // for the expression from the opaque definition. `type_arguments` is something like + // [(n, fresh1)], and `specialized_def_type` becomes "[Id U64 fresh1]". + specialized_def_type: Box, + type_arguments: Vec, + lambda_set_variables: Vec, + }, + RecordDestructure { + whole_var: Variable, + ext_var: Variable, + destructs: Vec>, + }, + TupleDestructure { + whole_var: Variable, + ext_var: Variable, + destructs: Vec>, + }, + List { + list_var: Variable, + elem_var: Variable, + patterns: ListPatterns, + }, + NumLiteral(Variable, Box, IntValue, NumBound), + IntLiteral(Variable, Variable, Box, IntValue, IntBound), + FloatLiteral(Variable, Variable, Box, f64, FloatBound), + StrLiteral(Box), + SingleQuote(Variable, Variable, char, SingleQuoteBound), + Underscore, + + /// An identifier that marks a specialization of an ability member. + /// For example, given an ability member definition `hash : a -> U64 where a implements Hash`, + /// there may be the specialization `hash : Bool -> U64`. In this case we generate a + /// new symbol for the specialized "hash" identifier. + AbilityMemberSpecialization { + /// The symbol for this specialization. + ident: Symbol, + /// The ability name being specialized. + specializes: Symbol, + }, + + // Runtime Exceptions + Shadowed(Region, Loc, Symbol), + OpaqueNotInScope(Loc), + // Example: (5 = 1 + 2) is an unsupported pattern in an assignment; Int patterns aren't allowed in assignments! + UnsupportedPattern(Region), + // parse error patterns + MalformedPattern(MalformedPatternProblem, Region), +}; + +/// Describes a bound on the width of an integer. +pub const IntBound = union(enum) { + /// There is no bound on the width. + None, + /// Must have an exact width. + Exact: base.Primitive.Num, + /// Must have a certain sign and a minimum width. + AtLeast: struct { + sign: SignDemand, + width: base.Primitive.Num, + }, +}; + +pub const FloatBound = union(enum) { + None, + Exact: FloatWidth, +}; + +pub const NumBound = union(enum) { + None, + /// Must be an integer of a certain size, or any float. + AtLeastIntOrFloat: struct { + sign: SignDemand, + width: base.Primitive.Num, + }, +}; + +pub const SingleQuoteBound = union(enum) { + AtLeast: struct { width: base.Primitive.Num }, +}; + +pub const FloatWidth = enum { + Dec, + F32, + F64, +}; + +pub const SignDemand = enum { + /// Can be signed or unsigned. + NoDemand, + /// Must be signed. + Signed, +}; + +/// Marks whether a when branch is redundant using a variable. +pub const RedundantMark = TypeVar; + +/// Marks whether a when expression is exhaustive using a variable. +pub const ExhaustiveMark = TypeVar; + diff --git a/src/check/parse.zig b/src/check/parse.zig new file mode 100644 index 0000000000..f4235254f5 --- /dev/null +++ b/src/check/parse.zig @@ -0,0 +1,11 @@ +const std = @import("std"); +const base = @import("../base.zig"); + +const parse = @This(); +pub const IR = @import("ir.zig").IR; + +pub fn parseModule(body: []u8) parse.IR { + _ = body; + + @panic("not implemented"); +} diff --git a/src/check/parse/ir.zig b/src/check/parse/ir.zig new file mode 100644 index 0000000000..7b2c650f7b --- /dev/null +++ b/src/check/parse/ir.zig @@ -0,0 +1,745 @@ +const base = @import("../base.zig"); + +pub const IR = struct { + header: SpacesBefore(Header), + defs: Defs, +}; + +pub fn Spaces(comptime T: type) type { + return struct { + before: []CommentOrNewline, + item: T, + after: []CommentOrNewline, + + pub fn no_spaces(item: T) Spaces(T) { + return Spaces(T){ .before = .{}, .item = item, .after = .{} }; + } + }; +} + +pub fn SpacesBefore(comptime T: type) type { + return struct { + before: []CommentOrNewline, + item: T, + }; +} + +pub fn SpacesAfter(comptime T: type) type { + return struct { + after: []CommentOrNewline, + item: T, + }; +} + +// // TODO: convert to fn Spaced(comptime T: type) type that returns a union using T +// pub enum Spaced<'a, T> { +// Item(T), + +// // Spaces +// SpaceBefore(&'a Spaced<'a, T>, &'a [CommentOrNewline<'a>]), +// SpaceAfter(&'a Spaced<'a, T>, &'a [CommentOrNewline<'a>]), +// } + +pub fn SpacedUnion(comptime T: type) type { + return enum(union) { + + }; +} + +pub const Header = union(enum) { + Module: ModuleHeader, + App: AppHeader, + Package: PackageHeader, + Platform: PlatformHeader, + Hosted: HostedHeader, +}; + +pub const WhenBranch = struct { + patterns: [].{ Pattern, Region }, + value: .{ Expr, Region }, + guard: ?.{ Expr, Region }, +}; + +pub const WhenPattern = struct { + pattern: .{ Pattern, Region }, + guard: ?.{ Expr, Region }, +}; + +pub enum StrSegment<'a> { + Plaintext(&'a str), // e.g. "foo" + Unicode(Loc<&'a str>), // e.g. "00A0" in "\u(00A0)" + EscapedChar(EscapedChar), // e.g. '\n' in "Hello!\n" + Interpolated(Loc<&'a Expr<'a>>), // e.g. "$(expr)" +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum SingleQuoteSegment<'a> { + Plaintext(&'a str), // e.g. 'f' + Unicode(Loc<&'a str>), // e.g. '00A0' in '\u(00A0)' + EscapedChar(EscapedChar), // e.g. '\n' + // No interpolated expressions in single-quoted strings +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum EscapedChar { + Newline, // \n + Tab, // \t + DoubleQuote, // \" + SingleQuote, // \' + Backslash, // \\ + CarriageReturn, // \r + Dollar, // \$ +} + +pub enum SingleQuoteLiteral<'a> { + /// The most common case: a plain character with no escapes + PlainLine(&'a str), + Line(&'a [SingleQuoteSegment<'a>]), +} + +pub enum StrLiteral<'a> { + /// The most common case: a plain string with no escapes or interpolations + PlainLine(&'a str), + Line(&'a [StrSegment<'a>]), + Block(&'a [&'a [StrSegment<'a>]]), +} + +pub const ResultTryKind = enum { + KeywordPrefix, + OperatorSuffix, +}; + +/// A parsed expression. This uses lifetimes extensively for two reasons: +/// +/// 1. It uses Bump::alloc for all allocations, which returns a reference. +/// 2. It often stores references into the input string instead of allocating. +/// +/// This dramatically reduces allocations during parsing. Once parsing is done, +/// we move on to canonicalization, which often needs to allocate more because +/// it's doing things like turning local variables into fully qualified symbols. +/// Once canonicalization is done, the arena and the input string get dropped. +pub enum Expr<'a> { + // Number Literals + Float(&'a str), + Num(&'a str), + NonBase10Int { + string: &'a str, + base: Base, + is_negative: bool, + }, + + /// String Literals + Str(StrLiteral<'a>), // string without escapes in it + /// eg 'b' + SingleQuote(&'a str), + + /// Look up exactly one field on a record, e.g. `x.foo`. + RecordAccess(&'a Expr<'a>, &'a str), + + /// e.g. `.foo` or `.0` + AccessorFunction(Accessor<'a>), + + /// Update the value of a field in a record, e.g. `&foo` + RecordUpdater(&'a str), + + /// Look up exactly one field on a tuple, e.g. `(x, y).1`. + TupleAccess(&'a Expr<'a>, &'a str), + + /// Early return on failures - e.g. the ? in `File.read_utf8(path)?` + TrySuffix(&'a Expr<'a>), + + // Collection Literals + List(Collection<'a, &'a Loc>>), + + RecordUpdate { + update: &'a Loc>, + fields: Collection<'a, Loc>>>, + }, + + Record(Collection<'a, Loc>>>), + + Tuple(Collection<'a, &'a Loc>>), + + /// Mapper-based record builders, e.g. + /// { Result.parallel <- + /// foo: Http.get_data(Foo), + /// bar: Http.get_data(Bar), + /// } + RecordBuilder { + mapper: &'a Loc>, + fields: Collection<'a, Loc>>>, + }, + + // Lookups + Var { + module_name: &'a str, // module_name will only be filled if the original Roc code stated something like `5 + SomeModule.my_var`, module_name will be blank if it was `5 + my_var` + ident: &'a str, + }, + + Underscore(&'a str), + + // The "crash" keyword + Crash, + + // Tags + Tag(&'a str), + + // Reference to an opaque type, e.g. @Opaq + OpaqueRef(&'a str), + + // Pattern Matching + Closure(&'a [Loc>], &'a Loc>), + /// Multiple defs in a row + Defs(&'a Defs<'a>, &'a Loc>), + + Dbg, + DbgStmt { + first: &'a Loc>, + extra_args: &'a [&'a Loc>], + continuation: &'a Loc>, + pnc_style: bool, + }, + + /// The `try` keyword that performs early return on errors + Try, + // This form of try is a desugared Result unwrapper + LowLevelTry(&'a Loc>, ResultTryKind), + + // This form of debug is a desugared call to roc_dbg + LowLevelDbg(&'a (&'a str, &'a str), &'a Loc>, &'a Loc>), + + // Application + /// To apply by name, do Apply(Var(...), ...) + /// To apply a tag by name, do Apply(Tag(...), ...) + Apply(&'a Loc>, &'a [&'a Loc>], CalledVia), + PncApply(&'a Loc>, Collection<'a, &'a Loc>>), + BinOps(&'a [(Loc>, Loc)], &'a Loc>), + UnaryOp(&'a Loc>, Loc), + + // Conditionals + If { + if_thens: &'a [(Loc>, Loc>)], + final_else: &'a Loc>, + indented_else: bool, + }, + When( + /// The condition + &'a Loc>, + /// A | B if bool -> expression + /// | if -> + /// Vec, because there may be many patterns, and the guard + /// is Option because each branch may be preceded by + /// a guard (".. if .."). + &'a [&'a WhenBranch<'a>], + ), + + Return( + /// The return value + &'a Loc>, + /// The unused code after the return statement + Option<&'a Loc>>, + ), + + // Blank Space (e.g. comments, spaces, newlines) before or after an expression. + // We preserve this for the formatter; canonicalization ignores it. + SpaceBefore(&'a Expr<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a Expr<'a>, &'a [CommentOrNewline<'a>]), + ParensAround(&'a Expr<'a>), + + // Problems + MalformedIdent(&'a str, crate::ident::BadIdent), + // Both operators were non-associative, e.g. (True == False == False). + // We should tell the author to disambiguate by grouping them with parens. + PrecedenceConflict(&'a PrecedenceConflict<'a>), + EmptyRecordBuilder(&'a Loc>), + SingleFieldRecordBuilder(&'a Loc>), + OptionalFieldInRecordBuilder(&'a Loc<&'a str>, &'a Loc>), +} + +pub fn split_around(items: &[T], target: usize) -> (&[T], &[T]) { + let (before, rest) = items.split_at(target); + let after = &rest[1..]; + + (before, after) +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct PrecedenceConflict<'a> { + pub whole_region: Region, + pub binop1_position: Position, + pub binop2_position: Position, + pub binop1: BinOp, + pub binop2: BinOp, + pub expr: &'a Loc>, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct TypeHeader<'a> { + pub name: Loc<&'a str>, + pub vars: &'a [Loc>], +} + +impl<'a> TypeHeader<'a> { + pub fn region(&self) -> Region { + Region::across_all( + [self.name.region] + .iter() + .chain(self.vars.iter().map(|v| &v.region)), + ) + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum TypeVar<'a> { + Identifier(&'a str), + SpaceBefore(&'a TypeVar<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a TypeVar<'a>, &'a [CommentOrNewline<'a>]), + + // These are syntactically parsed as exprs first, so if there's anything else here, + // we consider it malformed but preserve it for error reporting and more resilient parsing. + Malformed(&'a Expr<'a>), +} + +/// The `implements` keyword associated with ability definitions. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Implements<'a> { + Implements, + SpaceBefore(&'a Implements<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a Implements<'a>, &'a [CommentOrNewline<'a>]), +} + +/// An ability demand is a value defining the ability; for example `hash : a -> U64 where a implements Hash` +/// for a `Hash` ability. +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct AbilityMember<'a> { + pub name: Loc>, + pub typ: Loc>, +} + +impl AbilityMember<'_> { + pub fn region(&self) -> Region { + Region::across_all([self.name.region, self.typ.region].iter()) + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum TypeDef<'a> { + /// A type alias. This is like a standalone annotation, except the pattern + /// must be a capitalized Identifier, e.g. + /// + /// Foo : Bar Baz + Alias { + header: TypeHeader<'a>, + ann: Loc>, + }, + + /// An opaque type, wrapping its inner type. E.g. Age := U64. + Opaque { + header: TypeHeader<'a>, + typ: Loc>, + derived: Option<&'a ImplementsAbilities<'a>>, + }, + + /// An ability definition. E.g. + /// Hash implements + /// hash : a -> U64 where a implements Hash + Ability { + header: TypeHeader<'a>, + loc_implements: Loc>, + members: &'a [AbilityMember<'a>], + }, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum ValueDef<'a> { + // TODO in canonicalization, validate the pattern; only certain patterns + // are allowed in annotations. + Annotation(Loc>, Loc>), + + // TODO in canonicalization, check to see if there are any newlines after the + // annotation; if not, and if it's followed by a Body, then the annotation + // applies to that expr! (TODO: verify that the pattern for both annotation and body match.) + // No need to track that relationship in any data structure. + Body(&'a Loc>, &'a Loc>), + + AnnotatedBody { + ann_pattern: &'a Loc>, + ann_type: &'a Loc>, + lines_between: &'a [CommentOrNewline<'a>], + body_pattern: &'a Loc>, + body_expr: &'a Loc>, + }, + + Dbg { + condition: &'a Loc>, + preceding_comment: Region, + }, + + Expect { + condition: &'a Loc>, + preceding_comment: Region, + }, + + /// e.g. `import InternalHttp as Http exposing [Req]`. + ModuleImport(ModuleImport<'a>), + + /// e.g. `import "path/to/my/file.txt" as myFile : Str` + IngestedFileImport(IngestedFileImport<'a>), + + Stmt(&'a Loc>), + + StmtAfterExpr, +} + +impl<'a> ValueDef<'a> { + pub fn replace_expr(&mut self, new_expr: &'a Loc>) { + match self { + ValueDef::Body(_, expr) => *expr = new_expr, + ValueDef::AnnotatedBody { body_expr, .. } => *body_expr = new_expr, + _ => internal_error!("replacing expr in unsupported ValueDef"), + } + } +} + +pub struct RecursiveValueDefIter<'a, 'b> { + current: &'b Defs<'a>, + index: usize, + pending: std::vec::Vec<&'b Defs<'a>>, +} + +pub struct ModuleImport<'a> { + pub before_name: &'a [CommentOrNewline<'a>], + pub name: Loc>, + pub params: Option>, + pub alias: Option>>>, + pub exposed: Option< + header::KeywordItem< + 'a, + ImportExposingKeyword, + Collection<'a, Loc>>>, + >, + >, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct ModuleImportParams<'a> { + pub before: &'a [CommentOrNewline<'a>], + pub params: Loc>>>>, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct IngestedFileImport<'a> { + pub before_path: &'a [CommentOrNewline<'a>], + pub path: Loc>, + pub name: header::KeywordItem<'a, ImportAsKeyword, Loc<&'a str>>, + pub annotation: Option>, +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct IngestedFileAnnotation<'a> { + pub before_colon: &'a [CommentOrNewline<'a>], + pub annotation: Loc>, +} + +impl<'a> Malformed for IngestedFileAnnotation<'a> { + fn is_malformed(&self) -> bool { + self.annotation.value.is_malformed() + } +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct ImportAsKeyword; + +impl header::Keyword for ImportAsKeyword { + const KEYWORD: &'static str = "as"; +} + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct ImportExposingKeyword; + +impl header::Keyword for ImportExposingKeyword { + const KEYWORD: &'static str = "exposing"; +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct ImportedModuleName<'a> { + pub package: Option<&'a str>, + pub name: ModuleName<'a>, +} + +impl<'a> From> for QualifiedModuleName<'a> { + fn from(imported: ImportedModuleName<'a>) -> Self { + Self { + opt_package: imported.package, + module: imported.name.into(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub struct ImportAlias<'a>(&'a str); + +impl<'a> ImportAlias<'a> { + pub const fn new(name: &'a str) -> Self { + ImportAlias(name) + } + + pub const fn as_str(&'a self) -> &'a str { + self.0 + } +} + +#[derive(Debug, Clone, PartialEq, Default)] +pub struct Defs<'a> { + pub tags: std::vec::Vec, ValueDef<'a>>>, + pub regions: std::vec::Vec, + pub space_before: std::vec::Vec>>, + pub space_after: std::vec::Vec>>, + pub spaces: std::vec::Vec>, + pub type_defs: std::vec::Vec>, + pub value_defs: std::vec::Vec>, +} + +pub struct SplitDefsAround<'a> { + pub before: Defs<'a>, + pub after: Defs<'a>, +} + +/// Should always be a zero-argument `Apply`; we'll check this in canonicalization +pub type AbilityName<'a> = Loc>; + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct ImplementsClause<'a> { + pub var: Loc>, + pub abilities: &'a [AbilityName<'a>], +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum AbilityImpls<'a> { + // `{ eq: myEq }` + AbilityImpls(Collection<'a, Loc>>>), + + // We preserve this for the formatter; canonicalization ignores it. + SpaceBefore(&'a AbilityImpls<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a AbilityImpls<'a>, &'a [CommentOrNewline<'a>]), +} + +/// `Eq` or `Eq { eq: myEq }` +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum ImplementsAbility<'a> { + ImplementsAbility { + /// Should be a zero-argument `Apply` or an error; we'll check this in canonicalization + ability: Loc>, + impls: Option>>, + }, + + // We preserve this for the formatter; canonicalization ignores it. + SpaceBefore(&'a ImplementsAbility<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a ImplementsAbility<'a>, &'a [CommentOrNewline<'a>]), +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct ImplementsAbilities<'a> { + pub before_implements_kw: &'a [CommentOrNewline<'a>], + pub implements: Region, + pub after_implements_kw: &'a [CommentOrNewline<'a>], + pub item: Loc>>>, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum FunctionArrow { + /// -> + Pure, + /// => + Effectful, +} + +#[derive(Debug, Copy, Clone, PartialEq)] +pub enum TypeAnnotation<'a> { + /// A function. The types of its arguments, the type of arrow used, then the type of its return value. + Function( + &'a [Loc>], + FunctionArrow, + &'a Loc>, + ), + + /// Applying a type to some arguments (e.g. Map.Map String Int) + Apply(&'a str, &'a str, &'a [Loc>]), + + /// A bound type variable, e.g. `a` in `(a -> a)` + BoundVariable(&'a str), + + /// Inline type alias, e.g. `as List a` in `[Cons a (List a), Nil] as List a` + As( + &'a Loc>, + &'a [CommentOrNewline<'a>], + TypeHeader<'a>, + ), + + Record { + fields: Collection<'a, Loc>>>, + /// The row type variable in an open record, e.g. the `r` in `{ name: Str }r`. + /// This is None if it's a closed record annotation like `{ name: Str }`. + ext: Option<&'a Loc>>, + }, + + Tuple { + elems: Collection<'a, Loc>>, + /// The row type variable in an open tuple, e.g. the `r` in `( Str, Str )r`. + /// This is None if it's a closed tuple annotation like `( Str, Str )`. + ext: Option<&'a Loc>>, + }, + + /// A tag union, e.g. `[ + TagUnion { + /// The row type variable in an open tag union, e.g. the `a` in `[Foo, Bar]a`. + /// This is None if it's a closed tag union like `[Foo, Bar]`. + ext: Option<&'a Loc>>, + tags: Collection<'a, Loc>>, + }, + + /// '_', indicating the compiler should infer the type + Inferred, + + /// The `*` type variable, e.g. in (List *) + Wildcard, + + /// A "where" clause demanding abilities designated by a `where`, e.g. `a -> U64 where a implements Hash` + Where(&'a Loc>, &'a [Loc>]), + + // We preserve this for the formatter; canonicalization ignores it. + SpaceBefore(&'a TypeAnnotation<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a TypeAnnotation<'a>, &'a [CommentOrNewline<'a>]), + + /// A malformed type annotation, which will code gen to a runtime error + Malformed(&'a str), +} + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Tag<'a> { + Apply { + name: Loc<&'a str>, + args: &'a [Loc>], + }, + + // We preserve this for the formatter; canonicalization ignores it. + SpaceBefore(&'a Tag<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a Tag<'a>, &'a [CommentOrNewline<'a>]), +} + +pub fn AssignedField(comptime T: type) type { + return union(enum) { + // A required field with a label, e.g. `{ name: "blah" }` or `{ name : Str }` + RequiredValue(Loc<&'a str>, &'a [CommentOrNewline<'a>], &'a Loc), + + // An ignored field, e.g. `{ _name: "blah" }` or `{ _ : Str }` + IgnoredValue(Loc<&'a str>, &'a [CommentOrNewline<'a>], &'a Loc), + + // A label with no value, e.g. `{ name }` (this is sugar for { name: name }) + LabelOnly(Loc<&'a str>), + + // We preserve this for the formatter; canonicalization ignores it. + SpaceBefore(&'a AssignedField<'a, Val>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a AssignedField<'a, Val>, &'a [CommentOrNewline<'a>]), + }; +} + +pub enum CommentOrNewline<'a> { + Newline, + LineComment(&'a str), + DocComment(&'a str), +} + +pub const PatternAs = struct { + spaces_before: &'a [CommentOrNewline<'a>], + identifier: Loc<&'a str>, + ident_region: base.Region, +} + +pub enum Pattern<'a> { + // Identifier + Identifier { + ident: &'a str, + }, + QualifiedIdentifier { + module_name: &'a str, + ident: &'a str, + }, + + Tag(&'a str), + + OpaqueRef(&'a str), + + Apply(&'a Loc>, &'a [Loc>]), + + PncApply(&'a Loc>, Collection<'a, Loc>>), + + /// This is Located rather than Located so we can record comments + /// around the destructured names, e.g. { x ### x does stuff ###, y } + /// In practice, these patterns will always be Identifier + RecordDestructure(Collection<'a, Loc>>), + + /// A required field pattern, e.g. { x: Just 0 } -> ... + /// Can only occur inside of a RecordDestructure + RequiredField(&'a str, &'a Loc>), + + /// An optional field pattern, e.g. { x ? Just 0 } -> ... + /// Can only occur inside of a RecordDestructure + OptionalField(&'a str, &'a Loc>), + + // Literal + NumLiteral(&'a str), + NonBase10Literal { + string: &'a str, + base: Base, + is_negative: bool, + }, + FloatLiteral(&'a str), + StrLiteral(StrLiteral<'a>), + + /// Underscore pattern + /// Contains the name of underscore pattern (e.g. "a" is for "_a" in code) + /// Empty string is unnamed pattern ("" is for "_" in code) + Underscore(&'a str), + SingleQuote(&'a str), + + /// A tuple pattern, e.g. (Just x, 1) + Tuple(Collection<'a, Loc>>), + + /// A list pattern like [_, x, ..] + List(Collection<'a, Loc>>), + + /// A list-rest pattern ".." + /// Can only occur inside of a [Pattern::List] + ListRest(Option<(&'a [CommentOrNewline<'a>], PatternAs<'a>)>), + + As(&'a Loc>, PatternAs<'a>), + + // Space + SpaceBefore(&'a Pattern<'a>, &'a [CommentOrNewline<'a>]), + SpaceAfter(&'a Pattern<'a>, &'a [CommentOrNewline<'a>]), + + // Malformed + Malformed(&'a str), + MalformedIdent(&'a str, crate::ident::BadIdent), + MalformedExpr(&'a Expr<'a>), +} + +pub const Base = enum { + Octal, + Binary, + Hex, + Decimal, +}; + +pub fn Collection(comptime T: type) type { + return struct { + items: []T, + // Use a pointer to a slice (rather than just a slice), in order to avoid bloating + // Ast variants. The final_comments field is rarely accessed in the hot path, so + // this shouldn't matter much for perf. + // Use an Option, so it's possible to initialize without allocating. + final_comments: ?[]CommentOrNewline, + }; +} + diff --git a/src/check/resolve_imports.zig b/src/check/resolve_imports.zig new file mode 100644 index 0000000000..db098d7bcf --- /dev/null +++ b/src/check/resolve_imports.zig @@ -0,0 +1,16 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const can = @import("canonicalize.zig"); + +const resolve = @This(); +pub const IR = @import("resolve_imports/ir.zig").IR; + +pub fn resolveImports( + can_ir: can.IR, + other_modules: std.HashMap(base.ModuleId, resolve.IR), +) resolve.IR { + _ = can_ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/check/resolve_imports/ir.zig b/src/check/resolve_imports/ir.zig new file mode 100644 index 0000000000..f651a8b5ca --- /dev/null +++ b/src/check/resolve_imports/ir.zig @@ -0,0 +1,77 @@ +//! A simplified mocking of the future `resolve_imports` compiler stage's artifacts, +//! which is roughly the artifacts of today's `roc_can` compiler stage. + +const std = @import("std"); +const base = @import("../base.zig"); +const cols = @import("../collections.zig"); +const problem = @import("../problem.zig"); + +const TypeVar = base.TypeVar; +const Region = base.Region; + +// created from `Declarations` +pub const IR = struct { + env: base.ModuleEnv, + declarations: cols.SafeList(DeclarationTag), + regions: cols.SafeList(base.Region), + + // utable: UnificationTable, + // pub type_var_slices: Vec, + type_vars: []TypeVar, + idents: base.IdentStore, + // symbols: Vec, + // symbol_regions: Vec, + + host_exposed_annotations: std.AutoHashMap(usize, TypeVar), + + function_bodies: cols.SafeList(FunctionDef), + function_regions: cols.SafeList(Region), + expressions: []Expr, + expression_regions: []Region, + destructs: []DestructureDef, +}; + +// pub const TypeVar = + +pub const TypeContent = union(enum) {}; + +pub const Pattern = union(enum) {}; + +pub const Expr = union(enum) {}; + +pub const DestructureDef = union(enum) {}; + +pub const DeclarationTag = union(enum) { + Value, + Function: cols.SafeList(FunctionDef).Id, + Recursive: cols.SafeList(FunctionDef).Id, + TailRecursive: cols.SafeList(FunctionDef).Id, + Destructure: cols.SafeList(DestructureDef).Id, + MutualRecursion: struct { + length: u16, + cycle_mark: IllegalCycleMark, + }, +}; + +/// Marks whether a recursive let-cycle was determined to be illegal during solving. +pub const IllegalCycleMark = ?TypeVar; + +pub const EarlyReturn = struct { + type_var: TypeVar, + region: Region, + kind: Kind, + + const Kind = enum { + Return, + Try, + }; +}; + +pub const FunctionDef = struct { + closure_type: TypeVar, + return_type: TypeVar, + fx_type: TypeVar, + early_returns: std.ArrayList(EarlyReturn), + captured_symbols: usize, // Vec<(Symbol, TypeVar)>, + arguments: usize, //Vec<(TypeVar, Pattern, Region)>, +}; diff --git a/src/check/typecheck.zig b/src/check/typecheck.zig new file mode 100644 index 0000000000..6076c2cd7b --- /dev/null +++ b/src/check/typecheck.zig @@ -0,0 +1,13 @@ +const std = @import("std"); +const base = @import("../base.zig"); +const resolve = @import("resolve_imports.zig"); + +pub fn typecheck( + resolve_ir: resolve.IR, + other_modules: std.HashMap(base.ModuleId, resolve.IR), +) void { + _ = resolve_ir; + _ = other_modules; + + @panic("not implemented"); +} diff --git a/src/collections.zig b/src/collections.zig new file mode 100644 index 0000000000..094706a6c3 --- /dev/null +++ b/src/collections.zig @@ -0,0 +1,27 @@ +const std = @import("std"); +const interner = @import("collections/string_interner.zig"); +const name = @import("collections/name_interner.zig"); +const safe_list = @import("collections/safe_list.zig"); + +pub const SmallStringInterner = interner.SmallStringInterner; +pub const SmallStringId = interner.SmallStringId; +pub const LargeStringInterner = interner.LargeStringInterner; +pub const LargeStringId = interner.LargeStringId; + +pub const TagNameId = name.TagNameId; +pub const TagNameInterner = name.TagNameInterner; +pub const FieldNameId = name.FieldNameId; +pub const FieldNameInterner = name.FieldNameInterner; + +pub const SafeList = safe_list.SafeList; +pub const SafeMultiList = safe_list.SafeMultiList; + +pub fn exit_on_oom() noreturn { + const oom_message = + \\I ran out of memory! I can't do anything to recover, so I'm exiting. + \\Try reducing memory usage on your machine and then running again. + ; + + std.debug.print(oom_message, .{}); + std.process.exit(1); +} diff --git a/src/collections/name_interner.zig b/src/collections/name_interner.zig new file mode 100644 index 0000000000..f0bbba3570 --- /dev/null +++ b/src/collections/name_interner.zig @@ -0,0 +1,54 @@ +const std = @import("std"); +const interner = @import("./string_interner.zig"); + +/// A typed ID for an interned tag name. +pub const TagNameId = struct { id: u32 }; + +/// A thin wrapper around a small string interner that +/// allows for typed IDs of tag names which can't be +/// interchanged with other interned string IDs. +pub const TagNameInterner = struct { + names: interner.SmallStringInterner, + + pub fn init(allocator: std.mem.Allocator) TagNameInterner { + return TagNameInterner{ .names = interner.SmallStringInterner.init(allocator) }; + } + + pub fn deinit(self: *TagNameInterner) void { + self.names.deinit(); + } + + pub fn insert(self: *TagNameInterner, name: []u8) TagNameId { + return TagNameId{ .id = self.names.insert(name).id }; + } + + pub fn get(self: *TagNameInterner, id: TagNameId) []u8 { + return self.names.get(interner.SmallStringInternerId{ .id = id.id }); + } +}; + +/// A typed ID for an interned record field name. +pub const FieldNameId = struct { id: u32 }; + +/// A thin wrapper around a small string interner that +/// allows for typed IDs of record field names which can't be +/// interchanged with other interned string IDs. +pub const FieldNameInterner = struct { + names: interner.SmallStringInterner, + + pub fn init(allocator: std.mem.Allocator) FieldNameInterner { + return FieldNameInterner{ .names = interner.SmallStringInterner.init(allocator) }; + } + + pub fn deinit(self: *FieldNameInterner) void { + self.names.deinit(); + } + + pub fn insert(self: *FieldNameInterner, name: []u8) FieldNameId { + return FieldNameId{ .id = self.names.insert(name).id }; + } + + pub fn get(self: *FieldNameInterner, id: FieldNameId) []u8 { + return self.names.get(interner.SmallStringInternerId{ .id = id.id }); + } +}; diff --git a/src/collections/safe_list.zig b/src/collections/safe_list.zig new file mode 100644 index 0000000000..028e11892f --- /dev/null +++ b/src/collections/safe_list.zig @@ -0,0 +1,102 @@ +const std = @import("std"); +const testing = std.testing; +const cols = @import("../collections.zig"); + +// a thing that you have to give the right type... no more "trust me bro!" +pub fn SafeList(comptime T: type) type { + return struct { + items: std.ArrayList(T), + + pub const Id = struct { id: u32 }; + pub const Slice = std.ArrayList(T).Slice; + pub const NonEmptySlice = struct { + slice: std.ArrayList(T).Slice, + + pub fn make_unchecked(items: []T) NonEmptySlice(T) { + return NonEmptySlice(T){ .slice = items }; + } + + pub fn first(slice: *NonEmptySlice(T)) *T { + return slice.slice[0]; + } + }; + + pub fn init(allocator: std.mem.Allocator) SafeList(T) { + return SafeList{ .items = std.ArrayList(T).init(allocator) }; + } + + pub fn deinit(self: *SafeList(T)) void { + self.items.deinit(); + } + + pub fn len(self: *SafeList(T)) usize { + return self.items.items.len; + } + + pub fn append(self: *SafeList(T), item: T) Id { + const length = self.len(); + self.items.append(item) catch cols.exit_on_oom; + + return Id{ .id = @as(u32, length) }; + } + + pub fn appendSlice(self: *SafeList(T), items: []const T) Slice { + const start_length = self.len(); + self.items.appendSlice(items) catch cols.exit_on_oom; + + return self.items.items[start_length..]; + } + + pub fn get(self: *SafeList(T), id: Id) *T { + return self.items.items[@as(usize, id.id)]; + } + }; +} + +pub fn SafeMultiList(comptime T: type) type { + return struct { + items: std.MultiArrayList(T), + allocator: std.mem.Allocator, + + pub const Id = struct { id: u32 }; + pub const Slice = std.MultiArrayList(T).Slice; + pub const X = std.MultiArrayList(T).Field; + + pub fn init(allocator: std.mem.Allocator) SafeList(T) { + return SafeList{ + .items = std.MultiArrayList(T){}, + .allocator = allocator, + }; + } + + pub fn deinit(self: *SafeMultiList(T)) void { + self.items.deinit(); + } + + pub fn len(self: *SafeMultiList(T)) usize { + return self.items.items.len; + } + + pub fn append(self: *SafeMultiList(T), item: T) Id { + const length = self.len(); + self.items.append(item) catch cols.exit_on_oom; + + return Id{ .id = @as(u32, length) }; + } + }; +} + +test "safe list_u32 inserting and getting" { + var list_u32 = SafeList(u32).init(testing.allocator); + defer list_u32.deinit(); + + try testing.expectEqual(list_u32.len(), 0); + + const id = list_u32.insert(1); + + try testing.expectEqual(list_u32.len(), 1); + + const item = list_u32.get(id); + + try testing.expectEqual(item.* == 1); +} diff --git a/src/collections/string_interner.zig b/src/collections/string_interner.zig new file mode 100644 index 0000000000..aa5cc43baf --- /dev/null +++ b/src/collections/string_interner.zig @@ -0,0 +1,163 @@ +const std = @import("std"); +const cols = @import("../collections.zig"); + +pub const SmallStringId = struct { id: u32 }; + +pub const SmallStringInterner = struct { + /// A deduplicated list of strings + strings: std.ArrayList([]u8), + /// All string indices that have the given hash + string_indices_per_hash: std.AutoHashMap(u32, std.ArrayList(u32)), + /// All outer indices that have to the given hash + outer_ids_per_hash: std.AutoHashMap(u32, std.ArrayList(SmallStringId)), + /// A unique for every string, which may or may not correspond + /// to the same underlying string + outer_indices: std.ArrayList(u32), + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator) SmallStringInterner { + return SmallStringInterner{ + .strings = std.ArrayList([]u8).init(allocator), + .string_indices_per_hash = std.AutoHashMap(u32, std.ArrayList(u32)).init(allocator), + .outer_ids_per_hash = std.AutoHashMap(u32, std.ArrayList(SmallStringId)).init(allocator), + .outer_indices = std.ArrayList(u32).init(allocator), + .allocator = allocator, + }; + } + + pub fn deinit(self: *SmallStringInterner) void { + var string_indices_iter = self.string_indices_per_hash.valueIterator(); + while (string_indices_iter.next()) |index_list| { + index_list.deinit(); + } + + var outer_ids_iter = self.outer_ids_per_hash.valueIterator(); + while (outer_ids_iter.next()) |outer_id_list| { + outer_id_list.deinit(); + } + + self.strings.deinit(); + self.string_indices_per_hash.deinit(); + self.outer_ids_per_hash.deinit(); + self.outer_indices.deinit(); + } + + pub fn insert(self: *SmallStringInterner, string: []u8) SmallStringId { + const hash = fnvStringHash(string); + + const string_indices = self.stringIndicesForHash(hash); + for (string_indices.items) |string_index| { + const interned = self.strings.items[string_index]; + if (string == interned) { + return self.addOuterIdForHash(hash, string_index); + } + } + + const copied_string = self.strings.allocator.alloc(u8, string.len); + std.mem.copyForwards(u8, copied_string, string); + + const strings_len = @as(u32, self.strings.items.len); + self.strings.append(copied_string) catch cols.exit_on_oom; + + return self.addOuterIdForHash(hash, strings_len); + } + + fn stringIndicesForHash(self: *SmallStringInterner, hash: u32) *cols.SafeList(u32) { + const res = self.string_indices_per_hash.getOrPut(hash) catch cols.exit_on_oom; + if (!res.found_existing) { + res.value_ptr = cols.SafeList(u32).init(self.allocator); + } + + return res.value_ptr.*; + } + + fn addOuterIdForHash(self: *SmallStringInterner, hash: u32, string_index: u32) SmallStringId { + const len = SmallStringId{ .id = @as(u32, self.outer_indices.items.len) }; + self.outer_indices.append(string_index) catch cols.exit_on_oom; + + const res = self.outer_ids_per_hash.getOrPut(hash) catch cols.exit_on_oom; + if (!res.found_existing) { + res.value_ptr = cols.SafeList(u32).init(self.allocator); + } + + res.value_ptr.append(len) catch cols.exit_on_oom; + + return len; + } + + pub fn idsHaveSameValue( + self: *SmallStringInterner, + first_id: SmallStringId, + second_id: SmallStringId, + ) bool { + const first_string_index = self.outer_indices[@as(usize, first_id.id)]; + const second_string_index = self.outer_indices[@as(usize, second_id.id)]; + + return first_string_index == second_string_index; + } + + pub fn lookup(self: *SmallStringInterner, string: []u8) std.ArrayList(SmallStringId).Slice { + const hash = fnvStringHash(string); + + if (self.outer_ids_per_hash.get(hash)) |outer_ids| { + return outer_ids.items; + } else { + return &.{}; + } + } + + pub fn get(self: *SmallStringInterner, id: SmallStringId) []u8 { + const string_index = self.outer_indices.items[@as(usize, id.id)]; + return self.strings.items[@as(usize, string_index)]; + } +}; + +pub const LargeStringId = struct { id: u32 }; + +pub const LargeStringInterner = struct { + // these are not deduped because equality checking on large strings becomes expensive + // and they are pretty likely unique anyway + strings: std.ArrayList([]u8), + + pub fn init(allocator: std.mem.Allocator) LargeStringInterner { + return LargeStringInterner{ + .strings = std.ArrayList([]u8).init(allocator), + }; + } + + pub fn deinit(self: *LargeStringInterner) void { + self.strings.deinit(); + } + + pub fn insert(self: *LargeStringInterner, string: []u8) LargeStringId { + const len = self.strings.items.len; + + const copied_string = self.strings.allocator.alloc(u8, string.len) catch cols.exit_on_oom; + std.mem.copyForwards(u8, copied_string, string); + + self.strings.append(copied_string) catch cols.exit_on_oom; + + return LargeStringId{ .id = @as(u32, len) }; + } + + pub fn get(self: *LargeStringInterner, id: LargeStringId) []u8 { + return self.strings.items[@as(usize, id.id)]; + } +}; + +/// A simple string hash. +/// +/// http://isthe.com/chongo/tech/comp/fnv/#FNV-1 +pub fn fnvStringHash(string: []const u8) u32 { + const FNV_PRIME_32_BIT: u32 = 16777619; + const OFFSET_BASIS_32_BIT: u32 = 2166136261; + + var hash = OFFSET_BASIS_32_BIT; + + for (string) |byte| { + hash *= FNV_PRIME_32_BIT; + hash ^= @as(u32, byte); + } + + return hash; +} diff --git a/src/coordinate.zig b/src/coordinate.zig new file mode 100644 index 0000000000..c5132696a0 --- /dev/null +++ b/src/coordinate.zig @@ -0,0 +1,53 @@ +const std = @import("std"); +const base = @import("base/main.zig"); +const resolve = @import("check/resolve_imports.zig"); +const type_spec = @import("build/specialize_types.zig"); +const func_lift = @import("build/lift_functions.zig"); +const func_spec = @import("build/specialize_types.zig"); +const func_solve = @import("build/lift_functions.zig"); +const lower = @import("build/lower_ir.zig"); +const refcount = @import("build/reference_count.zig"); + +const ResolveIR = type_spec.ResolveIR; +const TypeSpecIR = type_spec.TypeSpecIR; +const RefCountIR = refcount.RefCountIR; + +fn typecheck_module(filepath: []u8) std.AutoHashMap(base.ModuleId, ResolveIR) { + const allocator = std.heap.GeneralPurposeAllocator(.{}){}; + defer allocator.deinit(); + + _ = filepath; + + // const main_module = .{}; + // const dep_modules: [_][]u8 = [_].{}; + const module_dep_adjacencies = std.AutoHashMap(base.ModuleId, base.ModuleId).init(allocator); + + // TODO: in order of dependencies before who depends on them, run each phase of the compiler + return module_dep_adjacencies; +} + +fn prepare_module_for_codegen() void {} + +/// Run the `build` phase of compiling Roc code except for codegen. +/// +/// For now, we pass the IR from the last `check` stage (e.g. the `resolve_imports` +/// IR that is narrowed by typechecking) as a single, combined module here along +/// with a singleton `Env` that holds all common large data, like interned strings, +/// symbols, and interned tag names. +/// +/// In the future, we will not be combining all modules into a single "module" so this +/// will need to do more complicated coordination of the compiler stages. That said, +/// this still represents the long-term planned order of compilation. +fn pipe_ir_from_typechecking_to_codegen( + typechecked_modules: []TypeSpecIR, + resolve_ir: ResolveIR, +) RefCountIR { + const type_spec_ir = type_spec.specialize_types(resolve_ir, typechecked_modules); + const func_lift_ir = func_lift.lift_functions(type_spec_ir); + const func_solve_ir = func_solve.solve_functions(func_lift_ir); + const func_spec_ir = func_spec.specialize_functions(func_solve_ir); + const lower_ir_data = lower.lower_ir(func_spec_ir); + const ref_count_ir = refcount.reference_count(lower_ir_data); + + return ref_count_ir; +} diff --git a/src/env.zig b/src/env.zig new file mode 100644 index 0000000000..f0b897e26f --- /dev/null +++ b/src/env.zig @@ -0,0 +1,134 @@ +// An environment containing indexable data useful throughout most or all stages +// in the new compiler pipeline. +// pub const Env = struct { +// pub symbols: SymbolStore, +// // no deduping because these tend to be unique and potentially large +// string_literals: Vec, +// tag_names: TagNameCache, +// field_names: FieldNameCache, +// problems: Vec, +// // TODO: these should probably be made a part of `problems` +// compiler_problems: Vec, +// // TODO: where are these used, and how do we manage them? +// // pub tuple_elem_indices: Vec, +// // pub record_fields: Vec>, + +// pub fn add_string_literal(&mut self, s: String) -> StringLiteralId { +// let len = self.string_literals.len(); +// self.string_literals.push(s); + +// StringLiteralId(Index::new(len as u32)) +// } + +// pub fn add_field_name(&mut self, name: &str) -> FieldNameId { +// self.field_names.add_name(name) +// } + +// pub fn add_field_name_slice( +// &mut self, +// name_ids: impl IntoIterator, +// ) -> FieldNameIdSlice { +// self.field_names.add_name_slice(name_ids) +// } + +// pub fn add_tag_name(&mut self, name: &str) -> TagNameId { +// self.tag_names.add_name(name) +// } + +// pub fn add_tag_name_slice( +// &mut self, +// name_ids: impl IntoIterator, +// ) -> TagNameIdSlice { +// self.tag_names.add_name_slice(name_ids) +// } +// } + +// pub struct StringLiteralId(Index); + +// impl core::ops::Index for Env { +// type Output = str; + +// fn index(&self, index: StringLiteralId) -> &Self::Output { +// &self.string_literals[index.0.index()] +// } +// } + +// pub struct FieldNameId(DedupedStringId); + +// impl core::ops::Index for Env { +// type Output = str; + +// fn index(&self, index: FieldNameId) -> &Self::Output { +// &self.field_names.field_names[index.0] +// } +// } + +// pub struct FieldNameIdSlice(Slice); + +// impl core::ops::Index for Env { +// type Output = [FieldNameId]; + +// fn index(&self, index: FieldNameIdSlice) -> &Self::Output { +// &self.field_names.name_ids_for_slicing[index.0.indices()] +// } +// } + +// #[derive(Debug, Default)] +// pub struct FieldNameCache { +// field_names: DedupedStringStore, +// name_ids_for_slicing: Vec, +// } + +// impl FieldNameCache { +// pub fn add_name(&mut self, name: &str) -> FieldNameId { +// FieldNameId(self.field_names.insert(name)) +// } + +// pub fn add_name_slice( +// &mut self, +// name_ids: impl IntoIterator, +// ) -> FieldNameIdSlice { +// FieldNameIdSlice(slice_extend_new(&mut self.name_ids_for_slicing, name_ids)) +// } +// } + +// #[derive(Debug, Clone, Copy, PartialEq, Eq)] +// pub struct TagNameId(DedupedStringId); + +// impl core::ops::Index for Env { +// type Output = str; + +// fn index(&self, index: TagNameId) -> &Self::Output { +// &self.tag_names.tag_names[index.0] +// } +// } + +// #[derive(Debug, Clone, Copy, PartialEq, Eq)] +// pub struct TagNameIdSlice(Slice); + +// impl core::ops::Index for Env { +// type Output = [TagNameId]; + +// fn index(&self, index: TagNameIdSlice) -> &Self::Output { +// &self.tag_names.name_ids_for_slicing[index.0.indices()] +// } +// } + +// #[derive(Debug, Default)] +// pub struct TagNameCache { +// tag_names: DedupedStringStore, +// name_ids_for_slicing: Vec, +// } + +// impl TagNameCache { +// pub fn add_name(&mut self, name: &str) -> TagNameId { +// TagNameId(self.tag_names.insert(name)) +// } + +// pub fn add_name_slice( +// &mut self, +// name_ids: impl IntoIterator, +// ) -> TagNameIdSlice { +// TagNameIdSlice(slice_extend_new(&mut self.name_ids_for_slicing, name_ids)) +// } +// } diff --git a/src/problem.zig b/src/problem.zig new file mode 100644 index 0000000000..b6d0c8f3b9 --- /dev/null +++ b/src/problem.zig @@ -0,0 +1,25 @@ +const base = @import("base.zig"); + +const Problem = union(enum) { + CompilerProblem: CompilerProblem, + IdentProblems: struct { + problems: base.IdentProblems, + region: base.Region, + }, +}; + +const CompilerProblem = union(enum) { + SpecializeTypes: SpecializeTypesProblem, + LiftFunctions: LiftFunctionsProblem, + SolveFunctions: SolveFunctionsProblem, + SpecializeFunctions: SpecializeFunctionsProblem, + LowerIr: LowerIrProblem, + ReferenceCount: ReferenceCountProblem, +}; + +const SpecializeTypesProblem = enum {}; +const LiftFunctionsProblem = enum {}; +const SolveFunctionsProblem = enum {}; +const SpecializeFunctionsProblem = enum {}; +const LowerIrProblem = enum {}; +const ReferenceCountProblem = enum {};