Skip to content

Commit

Permalink
Improve the error messages displayed by the JSON5 lexer and parser.
Browse files Browse the repository at this point in the history
* Display the line number in the error message
* Try to harmonize the error messages with the ones displayed by
  the classic yojson parser.
  • Loading branch information
gcluzel committed Jun 7, 2024
1 parent a9c234f commit b9a1530
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 74 deletions.
4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@

*2024-06-04*

### Added

- Add locations in the JSON5 parser error messages.

### Fixed

- Don't expose `Yojson_five` internals anymore (@Leonidas_from_XIV, #180)
Expand Down
92 changes: 46 additions & 46 deletions lib/json5/lexer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -9,34 +9,40 @@ type token =
| CLOSE_BRACKET
| COLON
| COMMA
| COMMENT of string
| TRUE
| FALSE
| NULL
| FLOAT of string
| INT_OR_FLOAT of string
| INT of string
| STRING of string
| IDENTIFIER_NAME of string
| EOF

let pp_token ppf = function
| OPEN_PAREN -> Format.fprintf ppf "'('"
| CLOSE_PAREN -> Format.fprintf ppf "')'"
| OPEN_BRACE -> Format.fprintf ppf "'{'"
| CLOSE_BRACE -> Format.fprintf ppf "'}'"
| OPEN_BRACKET -> Format.fprintf ppf "'['"
| CLOSE_BRACKET -> Format.fprintf ppf "']'"
| COLON -> Format.fprintf ppf "':'"
| COMMA -> Format.fprintf ppf "','"
| COMMENT s -> Format.fprintf ppf "COMMENT '%s'" s
| TRUE -> Format.fprintf ppf "'true'"
| FALSE -> Format.fprintf ppf "'false'"
| NULL -> Format.fprintf ppf "'null'"
| FLOAT s -> Format.fprintf ppf "FLOAT '%s'" s
| INT_OR_FLOAT s -> Format.fprintf ppf "INT_OR_FLOAT '%s'" s
| INT s -> Format.fprintf ppf "INT '%s'" s
| STRING s -> Format.fprintf ppf "STRING '%s'" s
| IDENTIFIER_NAME s -> Format.fprintf ppf "IDENTIFIER_NAME '%s'" s
| OPEN_PAREN -> Format.pp_print_string ppf "("
| CLOSE_PAREN -> Format.pp_print_string ppf ")"
| OPEN_BRACE -> Format.pp_print_string ppf "{"
| CLOSE_BRACE -> Format.pp_print_string ppf "}"
| OPEN_BRACKET -> Format.pp_print_string ppf "["
| CLOSE_BRACKET -> Format.pp_print_string ppf "]"
| COLON -> Format.pp_print_string ppf ":"
| COMMA -> Format.pp_print_string ppf ","
| TRUE -> Format.pp_print_string ppf "true"
| FALSE -> Format.pp_print_string ppf "false"
| NULL -> Format.pp_print_string ppf "null"
| FLOAT s | INT_OR_FLOAT s | INT s -> Format.pp_print_string ppf s
| STRING s -> Format.fprintf ppf "%S" s
| EOF -> Format.pp_print_string ppf "eof"

let custom_error lexbuf =
let { Lexing.pos_fname = file; pos_lnum = line; _ }, _ =
Sedlexing.lexing_positions lexbuf
in
let file_line =
if String.equal file "" then "Line" else Format.sprintf "File %s, line" file
in
Format.sprintf "%s %d: Unexpected character '%s'" file_line line
(Sedlexing.Utf8.lexeme lexbuf)

let source_character = [%sedlex.regexp? any]
let line_terminator = [%sedlex.regexp? 0x000A | 0x000D | 0x2028 | 0x2029]
Expand Down Expand Up @@ -182,10 +188,7 @@ let string_lex_single lexbuf strbuf =
| Sub (source_character, ('\'' | line_terminator)) ->
Buffer.add_string strbuf (lexeme lexbuf);
lex lexbuf strbuf
| _ ->
lexeme lexbuf
|> Format.sprintf "Unexpected character: %s"
|> Result.error
| _ -> Error (custom_error lexbuf)
in
lex lexbuf strbuf

Expand All @@ -202,10 +205,7 @@ let string_lex_double lexbuf strbuf =
| Sub (source_character, ('"' | line_terminator)) ->
Buffer.add_string strbuf (lexeme lexbuf);
lex lexbuf strbuf
| _ ->
lexeme lexbuf
|> Format.sprintf "Unexpected character: %s"
|> Result.error
| _ -> Error (custom_error lexbuf)
in
lex lexbuf strbuf

Expand All @@ -217,35 +217,35 @@ let string_lex lexbuf quote =
let rec lex tokens buf =
let lexeme = Sedlexing.Utf8.lexeme in
let pos, _ = Sedlexing.lexing_positions buf in
match%sedlex buf with
| '(' -> lex (OPEN_PAREN :: tokens) buf
| ')' -> lex (CLOSE_PAREN :: tokens) buf
| '{' -> lex (OPEN_BRACE :: tokens) buf
| '}' -> lex (CLOSE_BRACE :: tokens) buf
| '[' -> lex (OPEN_BRACKET :: tokens) buf
| ']' -> lex (CLOSE_BRACKET :: tokens) buf
| ':' -> lex (COLON :: tokens) buf
| ',' -> lex (COMMA :: tokens) buf
| '(' -> lex ((OPEN_PAREN, pos) :: tokens) buf
| ')' -> lex ((CLOSE_PAREN, pos) :: tokens) buf
| '{' -> lex ((OPEN_BRACE, pos) :: tokens) buf
| '}' -> lex ((CLOSE_BRACE, pos) :: tokens) buf
| '[' -> lex ((OPEN_BRACKET, pos) :: tokens) buf
| ']' -> lex ((CLOSE_BRACKET, pos) :: tokens) buf
| ':' -> lex ((COLON, pos) :: tokens) buf
| ',' -> lex ((COMMA, pos) :: tokens) buf
| Chars {|"'|} ->
let* s = string_lex buf (lexeme buf) in
lex (STRING s :: tokens) buf
lex ((STRING s, pos) :: tokens) buf
| multi_line_comment | single_line_comment | white_space | line_terminator ->
lex tokens buf
| "true" -> lex (TRUE :: tokens) buf
| "false" -> lex (FALSE :: tokens) buf
| "null" -> lex (NULL :: tokens) buf
| "true" -> lex ((TRUE, pos) :: tokens) buf
| "false" -> lex ((FALSE, pos) :: tokens) buf
| "null" -> lex ((NULL, pos) :: tokens) buf
| json5_float ->
let s = lexeme buf in
lex (FLOAT s :: tokens) buf
lex ((FLOAT s, pos) :: tokens) buf
| json5_int ->
let s = lexeme buf in
lex (INT s :: tokens) buf
lex ((INT s, pos) :: tokens) buf
| json5_int_or_float ->
let s = lexeme buf in
lex (INT_OR_FLOAT s :: tokens) buf
lex ((INT_OR_FLOAT s, pos) :: tokens) buf
| identifier_name ->
let s = lexeme buf in
lex (IDENTIFIER_NAME s :: tokens) buf
| eof -> Ok (List.rev tokens)
| _ ->
lexeme buf |> Format.asprintf "Unexpected character: '%s'" |> Result.error
lex ((STRING s, pos) :: tokens) buf
| eof -> Ok (List.rev ((EOF, pos) :: tokens))
| _ -> Error (custom_error buf)
79 changes: 51 additions & 28 deletions lib/json5/parser.ml
Original file line number Diff line number Diff line change
@@ -1,46 +1,71 @@
open Let_syntax.Result

let custom_error pos error =
let file_line =
if String.equal pos.Lexing.pos_fname "" then "Line"
else Format.sprintf "File %s, line" pos.Lexing.pos_fname
in
let msg = Format.sprintf "%s %d: %s" file_line pos.Lexing.pos_lnum error in
Error msg

let rec parse_list acc = function
| [] -> Error "List never ends"
| Lexer.CLOSE_BRACKET :: xs -> Ok (acc, xs)
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input"
| (Lexer.CLOSE_BRACKET, _) :: xs -> Ok (acc, xs)
| xs -> (
let* v, xs = parse xs in
match xs with
| [] -> Error "List was not closed"
| Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs ->
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input"
| (Lexer.CLOSE_BRACKET, _) :: xs | (COMMA, _) :: (CLOSE_BRACKET, _) :: xs
->
Ok (v :: acc, xs)
| COMMA :: xs -> parse_list (v :: acc) xs
| x :: _ ->
| (COMMA, _) :: xs -> parse_list (v :: acc) xs
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected list token: %a" Lexer.pp_token x
in
Error s)
custom_error pos s)

and parse_assoc acc = function
| [] -> Error "Assoc never ends"
| Lexer.CLOSE_BRACE :: xs -> Ok (acc, xs)
| STRING k :: COLON :: xs | IDENTIFIER_NAME k :: COLON :: xs -> (
let* v, xs = parse xs in
let item = (k, v) in
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input"
| (CLOSE_BRACE, _) :: xs -> Ok (acc, xs)
| (STRING k, _) :: xs -> (
match xs with
| [] -> Error "Object was not closed"
| Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs ->
Ok (item :: acc, xs)
| COMMA :: xs -> parse_assoc (item :: acc) xs
| x :: _ ->
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input"
| (Lexer.COLON, _) :: xs -> (
let* v, xs = parse xs in
let item = (k, v) in
match xs with
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input"
| (CLOSE_BRACE, _) :: xs | (COMMA, _) :: (CLOSE_BRACE, _) :: xs ->
Ok (item :: acc, xs)
| (COMMA, _) :: xs -> parse_assoc (item :: acc) xs
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token
x
in
custom_error pos s)
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x
Format.asprintf "Expected ':' but found '%a'" Lexer.pp_token x
in
Error s)
| x :: _ ->
custom_error pos s)
| (x, pos) :: _ ->
let s =
Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x
Format.asprintf "Expected string or identifier but found '%a'"
Lexer.pp_token x
in
Error s
custom_error pos s

and parse = function
| [] -> Error "empty list of tokens"
| token :: xs -> (
| [] -> Error "Unexpected end of input"
| [ (Lexer.EOF, pos) ] -> custom_error pos "Unexpected end of input"
| (token, pos) :: xs -> (
match token with
| TRUE -> Ok (Ast.Bool true, xs)
| FALSE -> Ok (Bool false, xs)
Expand All @@ -57,12 +82,10 @@ and parse = function
(Ast.Assoc (List.rev a), xs)
| x ->
let s = Format.asprintf "Unexpected token: %a" Lexer.pp_token x in
Error s)
custom_error pos s)

let parse_from_lexbuf ?fname ?lnum lexbuffer =
let fname = Option.value fname ~default:"" in
let parse_from_lexbuf ?(fname = "") ?(lnum = 1) lexbuffer =
Sedlexing.set_filename lexbuffer fname;
let lnum = Option.value lnum ~default:1 in
let pos =
{ Lexing.pos_fname = fname; pos_lnum = lnum; pos_bol = 0; pos_cnum = 0 }
in
Expand Down

0 comments on commit b9a1530

Please sign in to comment.