From b9a1530444e9b7c2d5d8c54499f33a6efd649f15 Mon Sep 17 00:00:00 2001 From: Guillaume Cluzel Date: Wed, 5 Jun 2024 09:59:52 +0200 Subject: [PATCH] Improve the error messages displayed by the JSON5 lexer and parser. * Display the line number in the error message * Try to harmonize the error messages with the ones displayed by the classic yojson parser. --- CHANGES.md | 4 ++ lib/json5/lexer.ml | 92 ++++++++++++++++++++++----------------------- lib/json5/parser.ml | 79 ++++++++++++++++++++++++-------------- 3 files changed, 101 insertions(+), 74 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 4ad3ea8..2ceabb7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,10 @@ *2024-06-04* +### Added + +- Add locations in the JSON5 parser error messages. + ### Fixed - Don't expose `Yojson_five` internals anymore (@Leonidas_from_XIV, #180) diff --git a/lib/json5/lexer.ml b/lib/json5/lexer.ml index 12f66b0..7eb93f3 100644 --- a/lib/json5/lexer.ml +++ b/lib/json5/lexer.ml @@ -9,7 +9,6 @@ type token = | CLOSE_BRACKET | COLON | COMMA - | COMMENT of string | TRUE | FALSE | NULL @@ -17,26 +16,33 @@ type token = | INT_OR_FLOAT of string | INT of string | STRING of string - | IDENTIFIER_NAME of string + | EOF let pp_token ppf = function - | OPEN_PAREN -> Format.fprintf ppf "'('" - | CLOSE_PAREN -> Format.fprintf ppf "')'" - | OPEN_BRACE -> Format.fprintf ppf "'{'" - | CLOSE_BRACE -> Format.fprintf ppf "'}'" - | OPEN_BRACKET -> Format.fprintf ppf "'['" - | CLOSE_BRACKET -> Format.fprintf ppf "']'" - | COLON -> Format.fprintf ppf "':'" - | COMMA -> Format.fprintf ppf "','" - | COMMENT s -> Format.fprintf ppf "COMMENT '%s'" s - | TRUE -> Format.fprintf ppf "'true'" - | FALSE -> Format.fprintf ppf "'false'" - | NULL -> Format.fprintf ppf "'null'" - | FLOAT s -> Format.fprintf ppf "FLOAT '%s'" s - | INT_OR_FLOAT s -> Format.fprintf ppf "INT_OR_FLOAT '%s'" s - | INT s -> Format.fprintf ppf "INT '%s'" s - | STRING s -> Format.fprintf ppf "STRING '%s'" s - | IDENTIFIER_NAME s -> Format.fprintf ppf "IDENTIFIER_NAME '%s'" s + | OPEN_PAREN -> Format.pp_print_string ppf "(" + | CLOSE_PAREN -> Format.pp_print_string ppf ")" + | OPEN_BRACE -> Format.pp_print_string ppf "{" + | CLOSE_BRACE -> Format.pp_print_string ppf "}" + | OPEN_BRACKET -> Format.pp_print_string ppf "[" + | CLOSE_BRACKET -> Format.pp_print_string ppf "]" + | COLON -> Format.pp_print_string ppf ":" + | COMMA -> Format.pp_print_string ppf "," + | TRUE -> Format.pp_print_string ppf "true" + | FALSE -> Format.pp_print_string ppf "false" + | NULL -> Format.pp_print_string ppf "null" + | FLOAT s | INT_OR_FLOAT s | INT s -> Format.pp_print_string ppf s + | STRING s -> Format.fprintf ppf "%S" s + | EOF -> Format.pp_print_string ppf "eof" + +let custom_error lexbuf = + let { Lexing.pos_fname = file; pos_lnum = line; _ }, _ = + Sedlexing.lexing_positions lexbuf + in + let file_line = + if String.equal file "" then "Line" else Format.sprintf "File %s, line" file + in + Format.sprintf "%s %d: Unexpected character '%s'" file_line line + (Sedlexing.Utf8.lexeme lexbuf) let source_character = [%sedlex.regexp? any] let line_terminator = [%sedlex.regexp? 0x000A | 0x000D | 0x2028 | 0x2029] @@ -182,10 +188,7 @@ let string_lex_single lexbuf strbuf = | Sub (source_character, ('\'' | line_terminator)) -> Buffer.add_string strbuf (lexeme lexbuf); lex lexbuf strbuf - | _ -> - lexeme lexbuf - |> Format.sprintf "Unexpected character: %s" - |> Result.error + | _ -> Error (custom_error lexbuf) in lex lexbuf strbuf @@ -202,10 +205,7 @@ let string_lex_double lexbuf strbuf = | Sub (source_character, ('"' | line_terminator)) -> Buffer.add_string strbuf (lexeme lexbuf); lex lexbuf strbuf - | _ -> - lexeme lexbuf - |> Format.sprintf "Unexpected character: %s" - |> Result.error + | _ -> Error (custom_error lexbuf) in lex lexbuf strbuf @@ -217,35 +217,35 @@ let string_lex lexbuf quote = let rec lex tokens buf = let lexeme = Sedlexing.Utf8.lexeme in + let pos, _ = Sedlexing.lexing_positions buf in match%sedlex buf with - | '(' -> lex (OPEN_PAREN :: tokens) buf - | ')' -> lex (CLOSE_PAREN :: tokens) buf - | '{' -> lex (OPEN_BRACE :: tokens) buf - | '}' -> lex (CLOSE_BRACE :: tokens) buf - | '[' -> lex (OPEN_BRACKET :: tokens) buf - | ']' -> lex (CLOSE_BRACKET :: tokens) buf - | ':' -> lex (COLON :: tokens) buf - | ',' -> lex (COMMA :: tokens) buf + | '(' -> lex ((OPEN_PAREN, pos) :: tokens) buf + | ')' -> lex ((CLOSE_PAREN, pos) :: tokens) buf + | '{' -> lex ((OPEN_BRACE, pos) :: tokens) buf + | '}' -> lex ((CLOSE_BRACE, pos) :: tokens) buf + | '[' -> lex ((OPEN_BRACKET, pos) :: tokens) buf + | ']' -> lex ((CLOSE_BRACKET, pos) :: tokens) buf + | ':' -> lex ((COLON, pos) :: tokens) buf + | ',' -> lex ((COMMA, pos) :: tokens) buf | Chars {|"'|} -> let* s = string_lex buf (lexeme buf) in - lex (STRING s :: tokens) buf + lex ((STRING s, pos) :: tokens) buf | multi_line_comment | single_line_comment | white_space | line_terminator -> lex tokens buf - | "true" -> lex (TRUE :: tokens) buf - | "false" -> lex (FALSE :: tokens) buf - | "null" -> lex (NULL :: tokens) buf + | "true" -> lex ((TRUE, pos) :: tokens) buf + | "false" -> lex ((FALSE, pos) :: tokens) buf + | "null" -> lex ((NULL, pos) :: tokens) buf | json5_float -> let s = lexeme buf in - lex (FLOAT s :: tokens) buf + lex ((FLOAT s, pos) :: tokens) buf | json5_int -> let s = lexeme buf in - lex (INT s :: tokens) buf + lex ((INT s, pos) :: tokens) buf | json5_int_or_float -> let s = lexeme buf in - lex (INT_OR_FLOAT s :: tokens) buf + lex ((INT_OR_FLOAT s, pos) :: tokens) buf | identifier_name -> let s = lexeme buf in - lex (IDENTIFIER_NAME s :: tokens) buf - | eof -> Ok (List.rev tokens) - | _ -> - lexeme buf |> Format.asprintf "Unexpected character: '%s'" |> Result.error + lex ((STRING s, pos) :: tokens) buf + | eof -> Ok (List.rev ((EOF, pos) :: tokens)) + | _ -> Error (custom_error buf) diff --git a/lib/json5/parser.ml b/lib/json5/parser.ml index 1f19c82..78baa56 100644 --- a/lib/json5/parser.ml +++ b/lib/json5/parser.ml @@ -1,46 +1,71 @@ open Let_syntax.Result +let custom_error pos error = + let file_line = + if String.equal pos.Lexing.pos_fname "" then "Line" + else Format.sprintf "File %s, line" pos.Lexing.pos_fname + in + let msg = Format.sprintf "%s %d: %s" file_line pos.Lexing.pos_lnum error in + Error msg + let rec parse_list acc = function - | [] -> Error "List never ends" - | Lexer.CLOSE_BRACKET :: xs -> Ok (acc, xs) + | [] -> Error "Unexpected end of input" + | [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input" + | (Lexer.CLOSE_BRACKET, _) :: xs -> Ok (acc, xs) | xs -> ( let* v, xs = parse xs in match xs with - | [] -> Error "List was not closed" - | Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs -> + | [] -> Error "Unexpected end of input" + | [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input" + | (Lexer.CLOSE_BRACKET, _) :: xs | (COMMA, _) :: (CLOSE_BRACKET, _) :: xs + -> Ok (v :: acc, xs) - | COMMA :: xs -> parse_list (v :: acc) xs - | x :: _ -> + | (COMMA, _) :: xs -> parse_list (v :: acc) xs + | (x, pos) :: _ -> let s = Format.asprintf "Unexpected list token: %a" Lexer.pp_token x in - Error s) + custom_error pos s) and parse_assoc acc = function - | [] -> Error "Assoc never ends" - | Lexer.CLOSE_BRACE :: xs -> Ok (acc, xs) - | STRING k :: COLON :: xs | IDENTIFIER_NAME k :: COLON :: xs -> ( - let* v, xs = parse xs in - let item = (k, v) in + | [] -> Error "Unexpected end of input" + | [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input" + | (CLOSE_BRACE, _) :: xs -> Ok (acc, xs) + | (STRING k, _) :: xs -> ( match xs with - | [] -> Error "Object was not closed" - | Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs -> - Ok (item :: acc, xs) - | COMMA :: xs -> parse_assoc (item :: acc) xs - | x :: _ -> + | [] -> Error "Unexpected end of input" + | [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input" + | (Lexer.COLON, _) :: xs -> ( + let* v, xs = parse xs in + let item = (k, v) in + match xs with + | [] -> Error "Unexpected end of input" + | [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input" + | (CLOSE_BRACE, _) :: xs | (COMMA, _) :: (CLOSE_BRACE, _) :: xs -> + Ok (item :: acc, xs) + | (COMMA, _) :: xs -> parse_assoc (item :: acc) xs + | (x, pos) :: _ -> + let s = + Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token + x + in + custom_error pos s) + | (x, pos) :: _ -> let s = - Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x + Format.asprintf "Expected ':' but found '%a'" Lexer.pp_token x in - Error s) - | x :: _ -> + custom_error pos s) + | (x, pos) :: _ -> let s = - Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x + Format.asprintf "Expected string or identifier but found '%a'" + Lexer.pp_token x in - Error s + custom_error pos s and parse = function - | [] -> Error "empty list of tokens" - | token :: xs -> ( + | [] -> Error "Unexpected end of input" + | [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input" + | (token, pos) :: xs -> ( match token with | TRUE -> Ok (Ast.Bool true, xs) | FALSE -> Ok (Bool false, xs) @@ -57,12 +82,10 @@ and parse = function (Ast.Assoc (List.rev a), xs) | x -> let s = Format.asprintf "Unexpected token: %a" Lexer.pp_token x in - Error s) + custom_error pos s) -let parse_from_lexbuf ?fname ?lnum lexbuffer = - let fname = Option.value fname ~default:"" in +let parse_from_lexbuf ?(fname = "") ?(lnum = 1) lexbuffer = Sedlexing.set_filename lexbuffer fname; - let lnum = Option.value lnum ~default:1 in let pos = { Lexing.pos_fname = fname; pos_lnum = lnum; pos_bol = 0; pos_cnum = 0 } in