From c72e77af69500fb11df41c3495e3c46a4968b626 Mon Sep 17 00:00:00 2001 From: Michael Shulman Date: Sun, 2 Mar 2025 10:55:03 -0600 Subject: [PATCH 1/5] make lexer state non-boolean-blind --- lib/parser/lexer.ml | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/parser/lexer.ml b/lib/parser/lexer.ml index f19215bf..14451b2d 100644 --- a/lib/parser/lexer.ml +++ b/lib/parser/lexer.ml @@ -14,8 +14,13 @@ module Located_token = struct type t = Position.range * Token_whitespace.t end +(* As the lexer "state" we remember whether we just saw a line comment. *) +module LexerState = struct + type t = [ `Linecomment | `None ] +end + (* We define the lexer using a basic utf-8 character parser from Fmlib. *) -module Basic = Ucharacter.Make_utf8 (Bool) (Located_token) (Unit) +module Basic = Ucharacter.Make_utf8 (LexerState) (Located_token) (Unit) open Basic let backquote = Uchar.of_char '`' @@ -26,7 +31,7 @@ let rbrace = Uchar.of_char '}' (* A line comment starts with a backquote and extends to the end of the line. *) let line_comment : Whitespace.t t = let* c = uword (fun c -> c = backquote) (fun c -> c <> newline) "line comment" in - let* () = set true in + let* () = set `Linecomment in return (`Line (String.sub c 1 (String.length c - 1))) (* A block comment starts with {` and ends with `}, and can be nested. *) @@ -43,9 +48,10 @@ let block_comment : Whitespace.t t = | _ when c = backquote -> rest buf nesting `Backquote | _ -> rest (buf ^ Utf8.Encoder.to_internal c) nesting (state_of c) in let* _ = backtrack (string "{`") "\"{`\"" in - let* () = set false in + let* () = set `None in rest "" 0 `None +(* This combinator parses not just newlines but also spaces and tabs, but it only counts the number of newlines. Thus it returns (`Newlines 0) if it parses only spaces and tabs (possibly including the newline at the end of a preceding line comment). *) let newlines : Whitespace.t t = let* n = one_or_more_fold_left @@ -53,12 +59,13 @@ let newlines : Whitespace.t t = (fun n c -> return (if c = '\n' then n + 1 else n)) (one_of_chars " \t\n\r" "space, tab, or newline") in let* line = get in - let* () = set false in - return (`Newlines (if line then n - 1 else n)) + let* () = set `None in + (* If we just saw a line comment, then we don't include the newline that *ended* the line comment as a "newline". *) + return (`Newlines (if line = `Linecomment then n - 1 else n)) -(* Whitespace.T consists of spaces, tabs, newlines, and comments. *) +(* Whitespace.t consists of spaces, tabs, newlines, and comments. We only record newlines when there are a positive number of them. *) let whitespace : Whitespace.t list t = - let* () = set false in + let* () = set `None in let* ws = zero_or_more_fold_left Emp (fun ws w -> if w = `Newlines 0 then return ws else return (Snoc (ws, w))) @@ -264,8 +271,8 @@ module Parser = struct include Basic.Parser (* This is how we make the lexer to plug into the parser. *) - let start : t = make_partial Position.start false bof - let restart (lex : t) : t = make_partial (position lex) false token |> transfer_lookahead lex + let start : t = make_partial Position.start `None bof + let restart (lex : t) : t = make_partial (position lex) `None token |> transfer_lookahead lex end module Single_token = struct From a9311b46bed044ff68506697b495fb3d4b6dba82 Mon Sep 17 00:00:00 2001 From: Michael Shulman Date: Sun, 2 Mar 2025 10:59:56 -0600 Subject: [PATCH 2/5] pp_arrow --- lib/parser/builtins.ml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/parser/builtins.ml b/lib/parser/builtins.ml index 0d0b98f2..07fb58d8 100644 --- a/lib/parser/builtins.ml +++ b/lib/parser/builtins.ml @@ -411,8 +411,7 @@ let rec pp_doms : formatter -> pi_dom list -> unit = pp_term `None ppf ty; pp_doms ppf doms -let () = - set_print arrow @@ fun space ppf obs ws -> +let pp_arrow space ppf obs ws = let doms, wsarrow, cod = get_pi `First obs ws in pp_open_box ppf 1; if true then ( @@ -427,6 +426,8 @@ let () = pp_term space ppf cod); pp_close_box ppf () +let () = set_print arrow pp_arrow + (* ******************** Abstraction ******************** *) From 56f3a23701a801189eb1fc161f547585e6935e2a Mon Sep 17 00:00:00 2001 From: Michael Shulman Date: Sun, 2 Mar 2025 13:44:47 -0600 Subject: [PATCH 3/5] allow bar notation for tuples --- README.md | 13 +- lib/parser/builtins.ml | 269 +++++++++++++++++++++++----------- lib/parser/print.ml | 11 +- lib/parser/unparse.ml | 11 +- test/black/letin.t | 5 +- test/black/transparent.t | 30 +--- test/parser/parse_builtins.ml | 38 ++++- test/parser/reformat.expected | 107 ++++++++++++-- test/parser/reformat.ml | 12 ++ 9 files changed, 356 insertions(+), 140 deletions(-) diff --git a/README.md b/README.md index 93a84008..860b343b 100644 --- a/README.md +++ b/README.md @@ -483,7 +483,7 @@ If you have forgotten the context and type of a hole that were displayed when it ## Record types and tuples -We now describe the various other classes of types that can be defined by the user, starting with the simplest, record types. +We now describe the various other classes of types that can be defined by the user, starting with the simplest: record types. ### Defining record types @@ -512,7 +512,7 @@ def Σ (A : Type) (B : A → Type) : Type ≔ sig ( snd : B fst, ) ``` -However, we consider it better style in general to use specialized record types rather than generic Σ-types, as it provides better error-checking and documentation of the meaning of the fields. It is also probably more efficient to use one record type with a lot of fields than an iterated Σ-type. In the future we plan to implement metaprogramming-like capabilities for proving theorems about arbitrary record types, so that using them in preference to generic Σ-types does not entail a loss of expressivity. +However, we consider it better style in general to use specialized record types rather than generic Σ-types, as it provides better error-checking and documentation of the meaning of the fields. It is also probably more efficient to use one record type with a lot of fields than an iterated Σ-type. In the future we plan to implement metaprogramming-like capabilities for proving theorems about arbitrary record types, so that using them in preference to generic Σ-types will not entail a loss of expressivity. Currently user notations cannot bind variables, so it is not possible to define a binding notation such as `(x : A) × B x` for Σ-types. But if we define a non-dependent product type, we can give it an infix notation: ``` @@ -586,6 +586,15 @@ def wrapped_zero : wrapped_nat ≔ (zero. ,) Syntactically, tuples are an outfix notation that includes the parentheses, rather than an infix meaning of the comma; thus the parentheses are always required. Tuples are not associative: neither `(a, (b, c))` nor `((a, b), c)` can be written as `(a,b,c)`. The latter belongs to a record type with three fields, whereas the former two belong to a record type with two fields, one of which is itself a record type with two fields. (This aligns with the behavior of functional programming languages such as Haskell and OCaml.) +Finally, there is an alternative notation for tuples that uses bars `|` rather than commas. In this case there may not be a trailing bar, but there can be an extra initial one. +``` +def nat.magma : Magma ≔ ( +| t ≔ ℕ +| op ≔ plus +) +``` +Some people prefer the look of this when a tuple definition stretches over multiple lines, especially given its parallelism to match and comatch statements (below). + ### Accessing fields diff --git a/lib/parser/builtins.ml b/lib/parser/builtins.ml index 07fb58d8..c4ac8f93 100644 --- a/lib/parser/builtins.ml +++ b/lib/parser/builtins.ml @@ -18,7 +18,28 @@ module StringSet = Set.Make (String) let parens = make "parens" Outfix -(* Parentheses are parsed, processed, and printed along with tuples, since their notation overlaps. *) +let pp_parens space ppf obs ws = + match obs with + | [ body ] -> + let wslparen, ws = take LParen ws in + let wsrparen, ws = take RParen ws in + taken_last ws; + pp_open_hovbox ppf 1; + if true then ( + pp_tok ppf LParen; + pp_ws `None ppf wslparen; + pp_term `None ppf body; + pp_tok ppf RParen); + pp_close_box ppf (); + pp_ws space ppf wsrparen + | _ -> fatal (Anomaly "invalid notation arguments for parens") + +let () = + set_tree parens (Closed_entry (eop LParen (term RParen (Done_closed parens)))); + set_print parens pp_parens; + set_print_as_case parens pp_parens + +(* Parentheses are processed along with tuples, since their notation overlaps: a labeled 1-tuple "(x ≔ M)" is parsed as a coloneq inside a parens. *) (* ******************** Let-binding @@ -316,7 +337,7 @@ let rec get_pi_args : | _ -> raise Invalid_telescope with Invalid_telescope -> Nondep { wsarrow; ty = Term doms } :: accum -(* Get all the domains and eventual codomain from a right-associated iterated function-type. *) +(* Get all the domains and eventual codomain from a right-associated iterated function-type. This is used for both printing and processing, so we extract the whitespace as well. *) let rec get_pi : type lt ls rt rs. arrow_opt -> @@ -543,89 +564,142 @@ let () = | _ -> fatal (Anomaly (Printf.sprintf "invalid notation arguments for Type: %d" (List.length ws))) (* ******************** - Anonymous tuples - ******************** *) + Coloneq + ******************** *) +(* Coloneq is an auxiliary notation that appears inside tuples for labeled arguments. It's not processed on its own, only as part of a tuple, but it is parsed and printed on its own. *) let coloneq = make "coloneq" (Infixr No.minus_omega) let () = set_tree coloneq (Open_entry (eop Coloneq (done_open coloneq))); set_processor coloneq { process = (fun _ _ _ _ -> fatal Parse_error) } -(* The notation for tuples is "( x ≔ M, y ≔ N, z ≔ P )". The parentheses don't conflict with ordinary parentheses, since ≔ and , are not term-forming operators all by themselves. The 0-ary tuple "()" is included, and also doesn't conflict since ordinary parentheses must contain a term. We also allow some of the components of the tuple to be unlabeled, as in "(M, N, P)"; these are assigned to the fields that don't have a corresponding labeled component in the order they appear in the record type. The only thing that's not allowed is an unlabeled 1-tuple "(M)" since that would conflict with ordinary parentheses. (TODO: We could essentially allow that by making the tupling and projection of a 1-ary record type implicit coercions.) *) +let pp_coloneq space ppf obs ws = + let wscoloneq, ws = take Coloneq ws in + taken_last ws; + match obs with + | [ x; body ] -> + pp_open_hovbox ppf 2; + if true then ( + pp_term `Nobreak ppf x; + pp_tok ppf Coloneq; + pp_ws `Break ppf wscoloneq; + pp_term space ppf body); + pp_close_box ppf () + | _ -> fatal (Anomaly "invalid notation arguments for tuple") + +let () = set_print coloneq pp_coloneq + +(* ******************** + Tuples + ******************** *) + +(* The notation for tuples is "( x ≔ M, y ≔ N, z ≔ P )", possibly with a trailing comma. The parentheses don't conflict with ordinary parentheses, since ≔ and , are not term-forming operators all by themselves. The 0-ary tuple "()" is included, and also doesn't conflict since ordinary parentheses must contain a term. We also allow some of the components of the tuple to be unlabeled, as in "(M, N, P)"; these are assigned to the fields that don't have a corresponding labeled component in the order they appear in the record type. The only thing that's not allowed is an unlabeled 1-tuple "(M)" since that would conflict with ordinary parentheses, but you can write (M,) since trailing commas are always allowed, as well as give an empty label (_ ≔ M). An alternative notation is "( x ≔ M | y ≔ N | z ≔ P )". In this version, an extra initial bar is allowed, but not a trailing one, and labels are required. Bars and commas cannot be mixed. The empty tuple () cannot contain a comma or a bar. *) -let rec tuple_fields () = +(* We parse tuples using four notation objcts. The first is 'parens', defined above, which includes labeled 1-tuples "(x ≔ M)" with a coloneq notation inside. The second is empty_tuple, which is just the empty tuple "()". *) + +let empty_tuple = make "empty_tuple" Outfix +let () = set_tree empty_tuple (Closed_entry (eop LParen (op RParen (Done_closed empty_tuple)))) + +(* The third is comma_tuple, which parses tuples that are separated by commas and contain at least one comma. *) +let comma_tuple = make "comma_tuple" Outfix + +(* This subtree parses the part of a comma tuple that comes *after* a comma. Since trailing commas are allowed, it could end here, or it could parse another term followed either by a comma or by the ending parenthesis. *) +let rec comma_tuple_fields () = Inner { empty_branch with - ops = TokMap.singleton RParen (Done_closed parens); + ops = TokMap.singleton RParen (Done_closed comma_tuple); term = Some - (TokMap.of_list [ (Op ",", Lazy (lazy (tuple_fields ()))); (RParen, Done_closed parens) ]); + (TokMap.of_list + [ (Op ",", Lazy (lazy (comma_tuple_fields ()))); (RParen, Done_closed comma_tuple) ]); } -let () = set_tree parens (Closed_entry (eop LParen (tuple_fields ()))) +let () = set_tree comma_tuple (Closed_entry (eop LParen (term (Op ",") (comma_tuple_fields ())))) + +(* The third is bar_tuple, which parses tuples that are separated by bars and contain at least one bar. *) +let bar_tuple = make "bar_tuple" Outfix + +(* This subtree parses the part of a bar tuple that comes *after* a bar. Since trailing bars are not allowed, it must have a term, which is followed by either another bar or the ending parenthesis. *) +let rec bar_tuple_fields () = + terms [ (Op "|", Lazy (lazy (bar_tuple_fields ()))); (RParen, Done_closed bar_tuple) ] + +let () = + set_tree bar_tuple + (Closed_entry + (eop LParen + (Inner + { + empty_branch with + ops = TokMap.singleton (Op "|") (bar_tuple_fields ()); + term = Some (TokMap.singleton (Op "|") (bar_tuple_fields ())); + }))) let rec process_tuple : type n. - bool -> (Field.t option, n check located) Abwd.t -> Field.Set.t -> (string option, n) Bwv.t -> observation list -> Asai.Range.t option -> - Whitespace.alist -> n check located = - fun first flds found ctx obs loc ws -> + fun flds found ctx obs loc -> match obs with | [] -> { value = Raw.Struct (Eta, flds); loc } | Term { value = Notn n; loc } :: obs when equal (notn n) coloneq -> ( - let ws = Option.fold ~none:ws ~some:snd (take_opt (Op ",") ws) in match args n with | [ Term { value = Ident ([ x ], _); loc = xloc }; Term tm ] -> let tm = process ctx tm in let fld = Field.intern x in if Field.Set.mem fld found then fatal ?loc:xloc (Duplicate_field_in_tuple fld) - else - process_tuple false (Abwd.add (Some fld) tm flds) (Field.Set.add fld found) ctx obs loc - ws + else process_tuple (Abwd.add (Some fld) tm flds) (Field.Set.add fld found) ctx obs loc | [ Term { value = Placeholder _; _ }; Term tm ] -> let tm = process ctx tm in - process_tuple false (Abwd.add None tm flds) found ctx obs loc ws + process_tuple (Abwd.add None tm flds) found ctx obs loc | Term x :: _ -> fatal ?loc:x.loc Invalid_field_in_tuple | _ -> fatal (Anomaly "invalid notation arguments for tuple")) - | [ Term body ] when first && Option.is_none (take_opt (Op ",") ws) -> process ctx body | Term tm :: obs -> let tm = process ctx tm in - let ws = Option.fold ~none:ws ~some:snd (take_opt (Op ",") ws) in - process_tuple false (Abwd.add None tm flds) found ctx obs loc ws + process_tuple (Abwd.add None tm flds) found ctx obs loc let () = set_processor parens { process = - (fun ctx obs loc ws -> - let _, ws = take LParen ws in - process_tuple true Abwd.empty Field.Set.empty ctx obs loc ws); - } - -let pp_coloneq space ppf obs ws = - let wscoloneq, ws = take Coloneq ws in - taken_last ws; + (fun ctx obs loc _ -> + match obs with + (* If a parenthesis contains a ≔, then it's actually a labeled 1-tuple. *) + | [ Term { value = Notn n; _ } ] when equal (notn n) coloneq -> + process_tuple Abwd.empty Field.Set.empty ctx obs loc + (* Otherwise, there must be only one term inside, and it's an ordinary parenthesis. *) + | [ Term body ] -> process ctx body + | _ -> fatal (Anomaly "invalid notation arguments for parens")); + }; + (* The other kinds of tuples are all processed identically, since their sequences of terms are all the same. *) + let pt = + { process = (fun ctx obs loc _ -> process_tuple Abwd.empty Field.Set.empty ctx obs loc) } in + set_processor empty_tuple pt; + set_processor comma_tuple pt; + set_processor bar_tuple pt + +let pp_empty_tuple space ppf obs ws = match obs with - | [ x; body ] -> - pp_open_hovbox ppf 2; - if true then ( - pp_term `Nobreak ppf x; - pp_tok ppf Coloneq; - pp_ws `Break ppf wscoloneq; - pp_term space ppf body); - pp_close_box ppf () - | _ -> fatal (Anomaly "invalid notation arguments for tuple") + | [] -> + let wslparen, ws = take LParen ws in + let wsrparen, ws = take RParen ws in + taken_last ws; + pp_tok ppf LParen; + pp_ws `None ppf wslparen; + pp_tok ppf RParen; + pp_ws space ppf wsrparen + | _ :: _ -> fatal (Anomaly "invalid notation arguments for empty tuple") -let () = set_print coloneq pp_coloneq +let () = + set_print empty_tuple pp_empty_tuple; + set_print_as_case empty_tuple pp_empty_tuple -let rec pp_fields : formatter -> observation list -> Whitespace.alist -> Whitespace.alist = +let rec pp_comma_fields : formatter -> observation list -> Whitespace.alist -> Whitespace.alist = fun ppf obs ws -> match obs with | [] -> ws @@ -633,7 +707,7 @@ let rec pp_fields : formatter -> observation list -> Whitespace.alist -> Whitesp pp_term `None ppf tm; match obs with | [] -> ws - | _ -> + | _ :: _ -> let wscomma, ws = take (Op ",") ws in pp_tok ppf (Op ","); pp_ws @@ -641,32 +715,67 @@ let rec pp_fields : formatter -> observation list -> Whitespace.alist -> Whitesp | `Wide -> `Break | `Narrow -> `Custom (("", 0, ""), ("", 0, ""))) ppf wscomma; - pp_fields ppf obs ws) + pp_comma_fields ppf obs ws) + +(* TODO: Don't open a box in case state? *) +let pp_comma_tuple space ppf obs ws = + let style, state, spacing = (style (), state (), spacing ()) in + (match state with + | `Term -> + if style = `Noncompact then pp_open_box ppf 0; + pp_open_hvbox ppf 2 + | `Case -> pp_open_vbox ppf 2); + pp_tok ppf LParen; + let wslparen, ws = take LParen ws in + pp_ws + (match (style, spacing) with + | `Compact, `Wide -> `Nobreak + | `Compact, `Narrow -> `None + | `Noncompact, _ -> `Break) + ppf wslparen; + let ws = pp_comma_fields ppf obs ws in + (match (obs, style, spacing, state) with + | [ _ ], _, _, _ -> pp_print_string ppf "," + | _, `Compact, `Wide, _ -> pp_print_string ppf " " + | _, `Compact, `Narrow, _ -> () + | _, `Noncompact, _, `Term -> + pp_close_box ppf (); + pp_print_custom_break ~fits:("", 1, "") ~breaks:(",", 0, "") ppf + | _, `Noncompact, _, `Case -> pp_print_custom_break ~fits:("", 1, "") ~breaks:(",", -2, "") ppf); + let ws = + match take_opt (Op ",") ws with + | Some (wscomma, ws) -> + pp_ws `None ppf wscomma; + ws + | None -> ws in + pp_tok ppf RParen; + let wsrparen, ws = take RParen ws in + taken_last ws; + pp_ws space ppf wsrparen; + pp_close_box ppf () + +let () = + set_print comma_tuple pp_comma_tuple; + set_print_as_case comma_tuple pp_comma_tuple -let pp_tuple space ppf obs ws = +let rec pp_bar_fields : formatter -> observation list -> Whitespace.alist -> Whitespace.alist = + fun ppf obs ws -> match obs with - | [] -> - let wslparen, ws = take LParen ws in - let wsrparen, ws = take RParen ws in - taken_last ws; - pp_tok ppf LParen; - pp_ws `None ppf wslparen; - pp_tok ppf RParen; - pp_ws `None ppf wsrparen - | [ body ] -> - let wslparen, ws = take LParen ws in - let wsrparen, ws = take RParen ws in - taken_last ws; - pp_open_hovbox ppf 1; - if true then ( - pp_tok ppf LParen; - pp_ws `None ppf wslparen; - pp_term `None ppf body; - pp_tok ppf RParen); - pp_close_box ppf (); - pp_ws space ppf wsrparen + | [] -> ws + | tm :: obs -> + let wsbar, ws = take (Op "|") ws in + pp_print_cut ppf (); + pp_tok ppf (Op "|"); + pp_ws `Nobreak ppf wsbar; + pp_term (`Custom (("", 1, ""), ("", 0, ""))) ppf tm; + pp_bar_fields ppf obs ws + +(* TODO: Don't open a box in case state? *) +let pp_bar_tuple space ppf obs ws = + match obs with + | [] -> fatal (Anomaly "invalid notation arguments for bar tuple") | _ :: _ -> - let style, state, spacing = (style (), state (), spacing ()) in + let style, state, _spacing = (style (), state (), spacing ()) in (match state with | `Term -> if style = `Noncompact then pp_open_box ppf 0; @@ -674,26 +783,15 @@ let pp_tuple space ppf obs ws = | `Case -> pp_open_vbox ppf 2); pp_tok ppf LParen; let wslparen, ws = take LParen ws in - pp_ws - (match (style, spacing) with - | `Compact, `Wide -> `Nobreak - | `Compact, `Narrow -> `None - | `Noncompact, _ -> `Break) - ppf wslparen; - let ws = pp_fields ppf obs ws in - (match (style, spacing, state) with - | `Compact, `Wide, _ -> pp_print_string ppf " " - | `Compact, `Narrow, _ -> () - | `Noncompact, _, `Term -> + pp_ws `None ppf wslparen; + let ws = must_start_with ~count:(List.length obs) (Op "|") ws in + let ws = pp_bar_fields ppf obs ws in + (match (style, state) with + | `Compact, _ -> () + | `Noncompact, `Term -> pp_close_box ppf (); - pp_print_custom_break ~fits:("", 1, "") ~breaks:(",", 0, "") ppf - | `Noncompact, _, `Case -> pp_print_custom_break ~fits:("", 1, "") ~breaks:(",", -2, "") ppf); - let ws = - match take_opt (Op ",") ws with - | Some (wscomma, ws) -> - pp_ws `None ppf wscomma; - ws - | None -> ws in + pp_print_custom_break ~fits:("", 1, "") ~breaks:("", 0, "") ppf + | `Noncompact, `Case -> pp_print_custom_break ~fits:("", 1, "") ~breaks:("", -2, "") ppf); pp_tok ppf RParen; let wsrparen, ws = take RParen ws in taken_last ws; @@ -701,8 +799,8 @@ let pp_tuple space ppf obs ws = pp_close_box ppf () let () = - set_print parens pp_tuple; - set_print_as_case parens pp_tuple + set_print bar_tuple pp_bar_tuple; + set_print_as_case bar_tuple pp_bar_tuple (* ******************** Comatches @@ -1922,6 +2020,9 @@ let builtins = |> Situation.add arrow |> Situation.add universe |> Situation.add coloneq + |> Situation.add empty_tuple + |> Situation.add comma_tuple + |> Situation.add bar_tuple |> Situation.add comatch |> Situation.add dot |> Situation.add implicit_mtch diff --git a/lib/parser/print.ml b/lib/parser/print.ml index 285dc395..7b35046c 100644 --- a/lib/parser/print.ml +++ b/lib/parser/print.ml @@ -23,10 +23,13 @@ let take_opt (tok : Token.t) (ws : Whitespace.alist) = | [] -> Some ([], []) | (t, x) :: xs -> if tok = t then Some (x, xs) else None -let must_start_with (tok : Token.t) (ws : Whitespace.alist) = - match ws with - | (t, _) :: _ when t = tok -> ws - | _ -> (tok, []) :: ws +(* Ensure that WS starts with at least COUNT copies of TOK. *) +let must_start_with ?(count = 1) (tok : Token.t) (ws : Whitespace.alist) = + let rec how_many n ws = + match ws with + | (t, _) :: ws when t = tok -> how_many (n + 1) ws + | _ -> n in + List.append (List.init (max 0 (count - how_many 0 ws)) (fun _ -> (tok, []))) ws (* Ensure that we took all the elements. *) let taken_last (ws : Whitespace.alist) = diff --git a/lib/parser/unparse.ml b/lib/parser/unparse.ml index b2cfd940..47a88f7c 100644 --- a/lib/parser/unparse.ml +++ b/lib/parser/unparse.ml @@ -253,7 +253,14 @@ let rec unparse : unparse_lam cube vars Emp tm li ri | Struct (Eta, _, fields, _) -> unlocated - (outfix ~notn:parens ~ws:[] + (outfix + ~notn: + (match Bwd.length fields with + | 0 -> empty_tuple + (* We always unparse 1-tuples using parens, no commas or bars. *) + | 1 -> parens + | _ -> comma_tuple) + ~ws:[] ~inner: (Abwd.fold (fun fld (tm, l) acc -> @@ -268,7 +275,7 @@ let rec unparse : ~first:(unlocated (Ident ([ Field.to_string fld ], []))) ~inner:Emp ~last:tm ~left_ok:(No.le_refl No.minus_omega) ~right_ok:(No.le_refl No.minus_omega)) - (* An unlabeled 1-tuple must be written (_ := M). *) + (* An unlabeled 1-tuple is currently unparsed as (_ := M). *) | `Unlabeled when Bwd.length fields = 1 -> unlocated (infix ~notn:coloneq ~ws:[] ~first:(unlocated (Placeholder [])) diff --git a/test/black/letin.t b/test/black/letin.t index eeb05855..e1949625 100644 --- a/test/black/letin.t +++ b/test/black/letin.t @@ -352,10 +352,7 @@ Let doesn't make a case tree unless it needs to: → info[I0000] ○ constant foo' defined - ( - fst ≔ X ↦ X, - snd ≔ Type, - ) + ( fst ≔ X ↦ X, snd ≔ Type ) : prod (Type → Type) Type diff --git a/test/black/transparent.t b/test/black/transparent.t index 1acbab96..008cf1fd 100644 --- a/test/black/transparent.t +++ b/test/black/transparent.t @@ -36,45 +36,27 @@ Transparency and translucency y1 : prod1 - ( - fst ≔ x2a .fst, - snd ≔ x2a .snd, - ) + ( fst ≔ x2a .fst, snd ≔ x2a .snd ) : prod2a - ( - fst ≔ a, - snd ≔ b, - ) + ( fst ≔ a, snd ≔ b ) : prod2a - ( - x2b .fst, - x2b .snd, - ) + ( x2b .fst, x2b .snd ) : prod2b - ( - a, - b, - ) + ( a, b ) : prod2b x3a : prod3a - ( - fst ≔ a, - snd ≔ b, - ) + ( fst ≔ a, snd ≔ b ) : prod3a x3b : prod3b - ( - a, - b, - ) + ( a, b ) : prod3b diff --git a/test/parser/parse_builtins.ml b/test/parser/parse_builtins.ml index adfbf543..92c096f7 100644 --- a/test/parser/parse_builtins.ml +++ b/test/parser/parse_builtins.ml @@ -131,7 +131,7 @@ let () = Term (Ident [ "B" ]); ] )); - assert (parse "()" = Notn ("parens", [])); + assert (parse "()" = Notn ("empty_tuple", [])); assert ( parse "(x := y)" @@ -142,13 +142,13 @@ let () = assert ( parse "(x := y , z := w)" = Notn - ( "parens", + ( "comma_tuple", [ Term (Notn ("coloneq", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); Term (Notn ("coloneq", [ Term (Ident [ "z" ]); Term (Ident [ "w" ]) ])); ] )); - assert (parse "(x , y)" = Notn ("parens", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); + assert (parse "(x , y)" = Notn ("comma_tuple", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); assert ( parse "[.x ↦ y | .z ↦ w]" @@ -159,7 +159,7 @@ let () = assert ( parse "(x := y , z := w,)" = Notn - ( "parens", + ( "comma_tuple", [ Term (Notn ("coloneq", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); Term (Notn ("coloneq", [ Term (Ident [ "z" ]); Term (Ident [ "w" ]) ])); @@ -168,7 +168,35 @@ let () = assert ( parse "(x := y , z := w, a ≔ b)" = Notn - ( "parens", + ( "comma_tuple", + [ + Term (Notn ("coloneq", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); + Term (Notn ("coloneq", [ Term (Ident [ "z" ]); Term (Ident [ "w" ]) ])); + Term (Notn ("coloneq", [ Term (Ident [ "a" ]); Term (Ident [ "b" ]) ])); + ] )); + + assert ( + parse "( x := y | z := w)" + = Notn + ( "bar_tuple", + [ + Term (Notn ("coloneq", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); + Term (Notn ("coloneq", [ Term (Ident [ "z" ]); Term (Ident [ "w" ]) ])); + ] )); + + assert ( + parse "(| x := y | z := w)" + = Notn + ( "bar_tuple", + [ + Term (Notn ("coloneq", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); + Term (Notn ("coloneq", [ Term (Ident [ "z" ]); Term (Ident [ "w" ]) ])); + ] )); + + assert ( + parse "(x := y | z := w | a ≔ b)" + = Notn + ( "bar_tuple", [ Term (Notn ("coloneq", [ Term (Ident [ "x" ]); Term (Ident [ "y" ]) ])); Term (Notn ("coloneq", [ Term (Ident [ "z" ]); Term (Ident [ "w" ]) ])); diff --git a/test/parser/reformat.expected b/test/parser/reformat.expected index dd4116cc..1209a4df 100644 --- a/test/parser/reformat.expected +++ b/test/parser/reformat.expected @@ -285,10 +285,7 @@ let x in x -( - fst ≔ M, - snd ≔ N, -) +( fst ≔ M, snd ≔ N ) f ( fst ≔ M, snd ≔ N ) @@ -326,18 +323,60 @@ f snd ≔ blah, ) +( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) + +( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) + +f + ( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ), + ) + +(| fst ≔ M | snd ≔ N ) + +f (| fst ≔ M | snd ≔ N ) + ( - fst ≔ M, - snd ≔ ( foo ≔ N, bar ≔ P ), + | fst ≔ blah blah blah blah blah blah + | snd ≔ + blah blah blah blah blah blah + blah blah ) ( - fst ≔ M, - snd ≔ ( foo ≔ N, bar ≔ P ), + | fst ≔ blah blah blah blah blah blah + | snd ≔ blah ) f - ( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ), + ( + | fst ≔ + blah blah blah blah blah blah + | snd ≔ + blah blah blah blah blah blah + blah blah + ) + +f + ( + | fst ≔ + blah blah blah blah blah blah + | snd ≔ blah + ) + +( + | fst ≔ M + | snd ≔ (| foo ≔ N | bar ≔ P ) +) + +( + | fst ≔ M + | snd ≔ (| foo ≔ N | bar ≔ P ) +) + +f + ( + | fst ≔ M + | snd ≔ (| foo ≔ N | bar ≔ P ) ) [ @@ -572,8 +611,7 @@ let x blah blah in x -( fst ≔ M, - snd ≔ N ) +( fst ≔ M, snd ≔ N ) f ( fst ≔ M, snd ≔ N ) @@ -603,16 +641,55 @@ f blah blah, snd ≔ blah ) -( fst ≔ M, - snd ≔ ( foo ≔ N, bar ≔ P ) ) +( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) -( fst ≔ M, - snd ≔ ( foo ≔ N, bar ≔ P ) ) +( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) f ( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) +(| fst ≔ M | snd ≔ N ) + +f (| fst ≔ M | snd ≔ N ) + +( + | fst ≔ blah blah blah blah blah blah + | snd ≔ + blah blah blah blah blah blah + blah blah ) + +( + | fst ≔ blah blah blah blah blah blah + | snd ≔ blah ) + +f + ( + | fst ≔ + blah blah blah blah blah blah + | snd ≔ + blah blah blah blah blah blah + blah blah ) + +f + ( + | fst ≔ + blah blah blah blah blah blah + | snd ≔ blah ) + +( + | fst ≔ M + | snd ≔ (| foo ≔ N | bar ≔ P ) ) + +( + | fst ≔ M + | snd ≔ (| foo ≔ N | bar ≔ P ) ) + +f + ( + | fst ≔ M + | snd ≔ (| foo ≔ N | bar ≔ P ) ) + [ | .fst ↦ blah blah blah blah blah blah diff --git a/test/parser/reformat.ml b/test/parser/reformat.ml index fd72caf7..92a9208b 100644 --- a/test/parser/reformat.ml +++ b/test/parser/reformat.ml @@ -104,6 +104,18 @@ let test_compactness () = reformat "(fst := M, snd := (foo := N, bar := P),)"; reformat "f (fst := M, snd := (foo := N, bar := P))"; + reformat "(fst := M | snd := N)"; + reformat "f (fst := M | snd := N)"; + reformat "(fst := blah blah blah blah blah blah | snd := blah blah blah blah blah blah blah blah)"; + reformat "(fst := blah blah blah blah blah blah | snd := blah)"; + reformat + "f (fst := blah blah blah blah blah blah | snd := blah blah blah blah blah blah blah blah)"; + reformat "f (fst := blah blah blah blah blah blah | snd := blah)"; + set_margin 40; + reformat "(fst := M | snd := (foo := N | bar := P))"; + reformat "(| fst := M | snd := (foo := N | bar := P))"; + reformat "f (fst := M | snd := (foo := N | bar := P))"; + (* Comatches *) set_margin 30; reformat From 8a4ab260c16ad9486d43e67139823369dd7fc2f2 Mon Sep 17 00:00:00 2001 From: Michael Shulman Date: Sun, 2 Mar 2025 12:53:21 -0800 Subject: [PATCH 4/5] allow bar notation for record types --- README.md | 9 ++++ lib/parser/builtins.ml | 108 +++++++++++++++++++++++++++++++++-------- 2 files changed, 97 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 860b343b..483ed492 100644 --- a/README.md +++ b/README.md @@ -541,6 +541,15 @@ or even zero fields: def ⊤ : Type ≔ sig () ``` +Finally, there is an alternative notation for record types that uses bars `|` instead of commas. In this case there may not be a trailing bar, but there can be an extra initial one. +``` +def Magma : Type ≔ sig ( +| t : Type +| op : t → t → t +) +``` +Some people prefer the look of this when a record type definition stretches over multiple lines, especially given its parallelism to data and codata types (below). + ### Tuples diff --git a/lib/parser/builtins.ml b/lib/parser/builtins.ml index c4ac8f93..88fae9cb 100644 --- a/lib/parser/builtins.ml +++ b/lib/parser/builtins.ml @@ -1597,7 +1597,7 @@ let () = set_print codata pp_codata let record = make "record" Outfix -let rec record_fields () = +let rec record_comma_fields () = Inner { empty_branch with @@ -1605,7 +1605,27 @@ let rec record_fields () = term = Some (TokMap.singleton Colon - (terms [ (Op ",", Lazy (lazy (record_fields ()))); (RParen, Done_closed record) ])); + (terms + [ (Op ",", Lazy (lazy (record_comma_fields ()))); (RParen, Done_closed record) ])); + } + +let rec record_bar_fields () = + term Colon (terms [ (Op "|", Lazy (lazy (record_bar_fields ()))); (RParen, Done_closed record) ]) + +let record_fields () = + Inner + { + empty_branch with + ops = TokMap.of_list [ (RParen, Done_closed record); (Op "|", record_bar_fields ()) ]; + term = + Some + (TokMap.singleton Colon + (terms + [ + (Op ",", Lazy (lazy (record_comma_fields ()))); + (Op "|", Lazy (lazy (record_bar_fields ()))); + (RParen, Done_closed record); + ])); } let () = @@ -1693,26 +1713,22 @@ let () = | _ -> fatal ?loc:attr.loc Unrecognized_attribute in (opacity, ws, obs) | _ -> fatal (Anomaly "invalid notation arguments for record") in - match take_opt Mapsto ws with - | None -> + match (take_opt Mapsto ws, obs) with + | None, _ -> let ctx = Bwv.snoc ctx None in let (Any_tel tel) = process_tel ctx StringSet.empty obs in { value = Record ({ value = [ None ]; loc }, tel, opacity); loc } - | Some _ -> ( - match obs with - | Term x :: obs -> - with_loc x.loc @@ fun () -> - let (Wrap vars) = Vec.of_list (List.map fst (process_var_list x [ (None, []) ])) in - let (Bplus ac) = Fwn.bplus (Vec.length vars) in - let ctx = Bwv.append ac ctx vars in - let (Any_tel tel) = process_tel ctx StringSet.empty obs in - Range.locate - (Record (locate_opt x.loc (namevec_of_vec ac vars), tel, opacity)) - loc - | _ -> fatal (Anomaly "invalid notation arguments for record"))); + | Some _, Term x :: obs -> + with_loc x.loc @@ fun () -> + let (Wrap vars) = Vec.of_list (List.map fst (process_var_list x [ (None, []) ])) in + let (Bplus ac) = Fwn.bplus (Vec.length vars) in + let ctx = Bwv.append ac ctx vars in + let (Any_tel tel) = process_tel ctx StringSet.empty obs in + Range.locate (Record (locate_opt x.loc (namevec_of_vec ac vars), tel, opacity)) loc + | _ -> fatal (Anomaly "invalid notation arguments for record")); } -let rec pp_record_fields ppf obs ws = +let rec pp_record_comma_fields ppf obs ws = match obs with | [] -> let wsrparen, ws = take RParen ws in @@ -1732,7 +1748,28 @@ let rec pp_record_fields ppf obs ws = else ( pp_tok ppf (Op ","); pp_ws `Break ppf wscomma); - pp_record_fields ppf obs ws + pp_record_comma_fields ppf obs ws + | [ _ ] -> fatal (Anomaly "invalid notation arguments for record") + +let rec pp_record_bar_fields ppf obs ws = + match obs with + | [] -> + let wsrparen, ws = take RParen ws in + taken_last ws; + wsrparen + | var :: body :: obs -> + pp_open_hvbox ppf 2; + let wsbar, ws = take (Op "|") ws in + pp_tok ppf (Op "|"); + pp_ws `Nobreak ppf wsbar; + pp_term `Break ppf var; + let wscolon, ws = take Colon ws in + pp_tok ppf Colon; + pp_ws `Nobreak ppf wscolon; + pp_close_box ppf (); + pp_term `None ppf body; + pp_print_cut ppf (); + pp_record_bar_fields ppf obs ws | [ _ ] -> fatal (Anomaly "invalid notation arguments for record") let pp_record space ppf obs ws = @@ -1740,15 +1777,46 @@ let pp_record space ppf obs ws = let wssig, ws = take Sig ws in pp_tok ppf Sig; pp_ws `Nobreak ppf wssig; + let obs, ws = + match (take_opt (Op "#") ws, obs) with + | None, _ -> (obs, ws) + | Some (wshash, ws), attr :: obs -> + pp_tok ppf (Op "#"); + pp_ws `None ppf wshash; + let wslp, ws = take LParen ws in + pp_tok ppf LParen; + pp_ws `None ppf wslp; + pp_term `None ppf attr; + let wsrp, ws = take RParen ws in + pp_tok ppf RParen; + pp_ws `None ppf wsrp; + (obs, ws) + | _ -> fatal (Anomaly "invalid notation arguments for record") in + let obs, ws = + match (take_opt Mapsto ws, obs) with + | None, _ -> (obs, ws) + | Some (wsmapsto, ws), x :: obs -> + pp_term `Nobreak ppf x; + pp_tok ppf Mapsto; + pp_ws `Break ppf wsmapsto; + (obs, ws) + | _ -> fatal (Anomaly "invalid notation arguments for record") in let wslparen, ws = take LParen ws in pp_tok ppf LParen; pp_ws `Break ppf wslparen; - let wsrparen = pp_record_fields ppf obs ws in + let wsrparen = + match take_opt (Op "|") ws with + | Some _ -> + let ws = must_start_with (Op "|") ws in + pp_record_bar_fields ppf obs ws + | None -> pp_record_comma_fields ppf obs ws in pp_tok ppf RParen; pp_ws space ppf wsrparen; pp_close_box ppf () -let () = set_print record pp_record +let () = + set_print record pp_record; + set_print_as_case record pp_record (* ******************** Datatypes From 42f3cc14dc39f65c3163b667e13f4f0142a2f4ce Mon Sep 17 00:00:00 2001 From: Michael Shulman Date: Sun, 2 Mar 2025 18:35:21 -0800 Subject: [PATCH 5/5] bar notation tests --- test/black/letin.t | 5 +++- test/black/transparent.t | 30 ++++++++++++++++----- test/black/tuples.t/run.t | 49 +++++++++++++++++++++++++++++++++++ test/black/tuples.t/tuples.ny | 32 +++++++++++++++++++++++ test/parser/reformat.expected | 33 +++++++++++++++++------ 5 files changed, 134 insertions(+), 15 deletions(-) create mode 100644 test/black/tuples.t/run.t create mode 100644 test/black/tuples.t/tuples.ny diff --git a/test/black/letin.t b/test/black/letin.t index e1949625..eeb05855 100644 --- a/test/black/letin.t +++ b/test/black/letin.t @@ -352,7 +352,10 @@ Let doesn't make a case tree unless it needs to: → info[I0000] ○ constant foo' defined - ( fst ≔ X ↦ X, snd ≔ Type ) + ( + fst ≔ X ↦ X, + snd ≔ Type, + ) : prod (Type → Type) Type diff --git a/test/black/transparent.t b/test/black/transparent.t index 008cf1fd..1acbab96 100644 --- a/test/black/transparent.t +++ b/test/black/transparent.t @@ -36,27 +36,45 @@ Transparency and translucency y1 : prod1 - ( fst ≔ x2a .fst, snd ≔ x2a .snd ) + ( + fst ≔ x2a .fst, + snd ≔ x2a .snd, + ) : prod2a - ( fst ≔ a, snd ≔ b ) + ( + fst ≔ a, + snd ≔ b, + ) : prod2a - ( x2b .fst, x2b .snd ) + ( + x2b .fst, + x2b .snd, + ) : prod2b - ( a, b ) + ( + a, + b, + ) : prod2b x3a : prod3a - ( fst ≔ a, snd ≔ b ) + ( + fst ≔ a, + snd ≔ b, + ) : prod3a x3b : prod3b - ( a, b ) + ( + a, + b, + ) : prod3b diff --git a/test/black/tuples.t/run.t b/test/black/tuples.t/run.t new file mode 100644 index 00000000..0fc9ae85 --- /dev/null +++ b/test/black/tuples.t/run.t @@ -0,0 +1,49 @@ + $ narya -v tuples.ny + → info[I0001] + ○ axiom A assumed + + → info[I0001] + ○ axiom B assumed + + → info[I0001] + ○ axiom a assumed + + → info[I0001] + ○ axiom b assumed + + → info[I0000] + ○ constant A×B defined + + → info[I0000] + ○ constant p1 defined + + → info[I0000] + ○ constant p2 defined + + → info[I0000] + ○ constant p3 defined + + → info[I0000] + ○ constant p4 defined + + → info[I0000] + ○ constant p5 defined + + → info[I0000] + ○ constant p6 defined + + → info[I0000] + ○ constant p7 defined + + → info[I0000] + ○ constant p8 defined + + → info[I0000] + ○ constant p9 defined + + → info[I0000] + ○ constant A×B' defined + + → info[I0000] + ○ constant p1' defined + diff --git a/test/black/tuples.t/tuples.ny b/test/black/tuples.t/tuples.ny new file mode 100644 index 00000000..e3398eb7 --- /dev/null +++ b/test/black/tuples.t/tuples.ny @@ -0,0 +1,32 @@ +axiom A:Type +axiom B:Type +axiom a:A +axiom b:B + +def A×B : Type ≔ sig ( fst:A, snd:B ) + +def p1 : A×B ≔ (a,b) +def p2 : A×B ≔ (a,b,) +def p3 : A×B ≔ (a|b) +def p4 : A×B ≔ ( +| a +| b +) +def p5 : A×B ≔ (fst ≔ a, snd ≔ b) +def p6 : A×B ≔ (snd ≔ b, fst ≔ a) +def p7 : A×B ≔ (snd ≔ b | fst ≔ a) +def p8 : A×B ≔ ( +| fst ≔ a +| snd ≔ b +) +def p9 : A×B ≔ ( +| snd ≔ b +| fst ≔ a +) + +def A×B' : Type ≔ sig ( +| fst : A +| snd : B +) + +def p1' : A×B' ≔ (a , b) diff --git a/test/parser/reformat.expected b/test/parser/reformat.expected index 1209a4df..072078f9 100644 --- a/test/parser/reformat.expected +++ b/test/parser/reformat.expected @@ -285,7 +285,10 @@ let x in x -( fst ≔ M, snd ≔ N ) +( + fst ≔ M, + snd ≔ N, +) f ( fst ≔ M, snd ≔ N ) @@ -323,15 +326,24 @@ f snd ≔ blah, ) -( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) +( + fst ≔ M, + snd ≔ ( foo ≔ N, bar ≔ P ), +) -( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) +( + fst ≔ M, + snd ≔ ( foo ≔ N, bar ≔ P ), +) f ( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ), ) -(| fst ≔ M | snd ≔ N ) +( + | fst ≔ M + | snd ≔ N +) f (| fst ≔ M | snd ≔ N ) @@ -611,7 +623,8 @@ let x blah blah in x -( fst ≔ M, snd ≔ N ) +( fst ≔ M, + snd ≔ N ) f ( fst ≔ M, snd ≔ N ) @@ -641,15 +654,19 @@ f blah blah, snd ≔ blah ) -( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) +( fst ≔ M, + snd ≔ ( foo ≔ N, bar ≔ P ) ) -( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) +( fst ≔ M, + snd ≔ ( foo ≔ N, bar ≔ P ) ) f ( fst ≔ M, snd ≔ ( foo ≔ N, bar ≔ P ) ) -(| fst ≔ M | snd ≔ N ) +( + | fst ≔ M + | snd ≔ N ) f (| fst ≔ M | snd ≔ N )