diff --git a/CHANGES.md b/CHANGES.md index 9f467c02..ac2447e9 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,8 @@ ### Added +- Added support for JSON5 (@dhilist, @gorm-issuu, @gertsonderby, #152) + ### Changed ### Deprecated diff --git a/dune-project b/dune-project index 242aac15..f766e1eb 100644 --- a/dune-project +++ b/dune-project @@ -35,3 +35,14 @@ meant for developers that are worried about performance changes in Yojson.") (core (>= v0.14.0)) (core_unix (>= v0.14.0)) (sexplib (>= v0.9.0)))) + +(package + (name yojson-json5) + (synopsis "Yojson_json5 is a parsing and printing library for the JSON5 format") + (description "Yojson_json5 is a parsing and printing library for the JSON5 format. +It supports parsing JSON5 to Yojson.Basic.t and Yojson.Safe.t types.") + (depends + (ocaml (>= 4.08)) + (sedlex (>= 2.5)) + (alcotest (and :with-test (>= 0.8.5))))) + diff --git a/lib/json5/ast.ml b/lib/json5/ast.ml new file mode 100644 index 00000000..669ed98b --- /dev/null +++ b/lib/json5/ast.ml @@ -0,0 +1,27 @@ +type t = + | Assoc of (string * t) list + | List of t list + | StringLit of string + | IntLit of string + | FloatLit of string + | Bool of bool + | Null + +let rec to_basic = function + | Assoc l -> `Assoc (List.map (fun (name, obj) -> (name, to_basic obj)) l) + | List l -> `List (List.map to_basic l) + | StringLit s -> `String s + | FloatLit s -> `Float (float_of_string s) + | IntLit s -> `Int (int_of_string s) + | Bool b -> `Bool b + | Null -> `Null + +let rec to_safe = function + | Assoc l -> `Assoc (List.map (fun (name, obj) -> (name, to_safe obj)) l) + | List l -> `List (List.map to_safe l) + | StringLit s -> `String s + | FloatLit s -> `Float (float_of_string s) + | IntLit s -> ( + match int_of_string_opt s with Some i -> `Int i | None -> `Intlit s) + | Bool b -> `Bool b + | Null -> `Null diff --git a/lib/json5/basic.ml b/lib/json5/basic.ml new file mode 100644 index 00000000..02998593 --- /dev/null +++ b/lib/json5/basic.ml @@ -0,0 +1,7 @@ +include Yojson.Basic + +include Read.Make (struct + type t = Yojson.Basic.t + + let convert = Ast.to_basic +end) diff --git a/lib/json5/dune b/lib/json5/dune new file mode 100644 index 00000000..f0e63ab8 --- /dev/null +++ b/lib/json5/dune @@ -0,0 +1,6 @@ +(library + (name yojson_json5) + (public_name yojson-json5) + (libraries yojson sedlex) + (preprocess + (pps sedlex.ppx))) diff --git a/lib/json5/let_syntax.ml b/lib/json5/let_syntax.ml new file mode 100644 index 00000000..7196494f --- /dev/null +++ b/lib/json5/let_syntax.ml @@ -0,0 +1,4 @@ +module Result = struct + let ( let* ) = Result.bind + let ( let+ ) v f = Result.map f v +end diff --git a/lib/json5/lexer.ml b/lib/json5/lexer.ml new file mode 100644 index 00000000..12f66b0a --- /dev/null +++ b/lib/json5/lexer.ml @@ -0,0 +1,251 @@ +open Let_syntax.Result + +type token = + | OPEN_PAREN + | CLOSE_PAREN + | OPEN_BRACE + | CLOSE_BRACE + | OPEN_BRACKET + | CLOSE_BRACKET + | COLON + | COMMA + | COMMENT of string + | TRUE + | FALSE + | NULL + | FLOAT of string + | INT_OR_FLOAT of string + | INT of string + | STRING of string + | IDENTIFIER_NAME of string + +let pp_token ppf = function + | OPEN_PAREN -> Format.fprintf ppf "'('" + | CLOSE_PAREN -> Format.fprintf ppf "')'" + | OPEN_BRACE -> Format.fprintf ppf "'{'" + | CLOSE_BRACE -> Format.fprintf ppf "'}'" + | OPEN_BRACKET -> Format.fprintf ppf "'['" + | CLOSE_BRACKET -> Format.fprintf ppf "']'" + | COLON -> Format.fprintf ppf "':'" + | COMMA -> Format.fprintf ppf "','" + | COMMENT s -> Format.fprintf ppf "COMMENT '%s'" s + | TRUE -> Format.fprintf ppf "'true'" + | FALSE -> Format.fprintf ppf "'false'" + | NULL -> Format.fprintf ppf "'null'" + | FLOAT s -> Format.fprintf ppf "FLOAT '%s'" s + | INT_OR_FLOAT s -> Format.fprintf ppf "INT_OR_FLOAT '%s'" s + | INT s -> Format.fprintf ppf "INT '%s'" s + | STRING s -> Format.fprintf ppf "STRING '%s'" s + | IDENTIFIER_NAME s -> Format.fprintf ppf "IDENTIFIER_NAME '%s'" s + +let source_character = [%sedlex.regexp? any] +let line_terminator = [%sedlex.regexp? 0x000A | 0x000D | 0x2028 | 0x2029] + +let line_terminator_sequence = + [%sedlex.regexp? 0x000A | 0x000D, Opt 0x000A | 0x2028 | 0x2029] + +(* NUMBERS, 7.8.3 *) +let non_zero_digit = [%sedlex.regexp? '1' .. '9'] +let decimal_digit = [%sedlex.regexp? '0' .. '9'] +let decimal_digits = [%sedlex.regexp? Plus decimal_digit] +let hex_digit = [%sedlex.regexp? '0' .. '9' | 'a' .. 'f' | 'A' .. 'F'] +let exponent_indicator = [%sedlex.regexp? 'e' | 'E'] + +let signed_integer = + [%sedlex.regexp? decimal_digits | '+', decimal_digits | '-', decimal_digits] + +let exponent_part = [%sedlex.regexp? exponent_indicator, signed_integer] + +let decimal_integer_literal = + [%sedlex.regexp? '0' | non_zero_digit, Opt decimal_digits] + +let hex_integer_literal = + [%sedlex.regexp? "0x", Plus hex_digit | "0X", Plus hex_digit] + +(* float *) +let float_literal = + [%sedlex.regexp? + ( decimal_integer_literal, '.', Opt decimal_digits, Opt exponent_part + | '.', decimal_digits, Opt exponent_part )] + +let json5_float = + [%sedlex.regexp? float_literal | '+', float_literal | '-', float_literal] + +(* int_or_float *) +let int_or_float_literal = + [%sedlex.regexp? decimal_integer_literal, Opt exponent_part] + +let json5_int_or_float = + [%sedlex.regexp? + int_or_float_literal | '+', int_or_float_literal | '-', int_or_float_literal] + +(* int/hex *) +let int_literal = + [%sedlex.regexp? decimal_digits | '+', decimal_digits | '-', decimal_digits] + +let json5_int = + [%sedlex.regexp? + ( hex_integer_literal + | '+', hex_integer_literal + | '-', hex_integer_literal + | int_literal )] + +(* STRING LITERALS, 7.8.4 *) +let unicode_escape_sequence = + [%sedlex.regexp? 'u', hex_digit, hex_digit, hex_digit, hex_digit] + +let single_escape_character = [%sedlex.regexp? Chars {|'"\\bfnrtv|}] + +let escape_character = + [%sedlex.regexp? single_escape_character | decimal_digit | 'x' | 'u'] + +let non_escape_character = + [%sedlex.regexp? Sub (source_character, (escape_character | line_terminator))] + +let character_escape_sequence = + [%sedlex.regexp? single_escape_character | non_escape_character] + +let line_continuation = [%sedlex.regexp? '\\', line_terminator_sequence] +let hex_escape_sequence = [%sedlex.regexp? 'x', hex_digit, hex_digit] + +let escape_sequence = + [%sedlex.regexp? + ( character_escape_sequence + | '0', Opt (decimal_digit, decimal_digit) + | hex_escape_sequence | unicode_escape_sequence )] + +let single_string_character = + [%sedlex.regexp? + ( Sub (source_character, ('\'' | '\\' | line_terminator)) + | '\\', escape_sequence + | line_continuation )] + +let double_string_character = + [%sedlex.regexp? + ( Sub (source_character, ('"' | '\\' | line_terminator)) + | '\\', escape_sequence + | line_continuation )] + +let string_literal = + [%sedlex.regexp? + ( '"', Star double_string_character, '"' + | '\'', Star single_string_character, '\'' )] + +(* IDENTIFIER_NAME (keys in objects) *) +let unicode_combining_mark = [%sedlex.regexp? mn | mc] +let unicode_digit = [%sedlex.regexp? nd] +let unicode_connector_punctuation = [%sedlex.regexp? pc] +let unicode_letter = [%sedlex.regexp? lu | ll | lt | lm | lo | nl] +let zwnj = [%sedlex.regexp? 0x200C] +let zwj = [%sedlex.regexp? 0x200D] + +let identifier_start = + [%sedlex.regexp? unicode_letter | '$' | '_' | '\\', unicode_escape_sequence] + +let identifier_part = + [%sedlex.regexp? + ( identifier_start | unicode_combining_mark | unicode_digit + | unicode_connector_punctuation | zwnj | zwj )] + +let identifier_name = [%sedlex.regexp? identifier_start, Star identifier_part] + +(* COMMENTS 7.4 *) +let single_line_comment_char = + [%sedlex.regexp? Sub (source_character, line_terminator)] + +let single_line_comment = [%sedlex.regexp? "//", Star single_line_comment_char] +let multi_line_not_asterisk_char = [%sedlex.regexp? Sub (source_character, '*')] +let multi_line_not_slash_char = [%sedlex.regexp? Sub (source_character, '/')] + +let multi_line_comment_char = + [%sedlex.regexp? + multi_line_not_asterisk_char | '*', Plus multi_line_not_slash_char] + +let multi_line_comment = + [%sedlex.regexp? "/*", Star multi_line_comment_char, Opt '*', "*/"] + +let comment = [%sedlex.regexp? multi_line_comment | single_line_comment] + +let white_space = + [%sedlex.regexp? 0x0009 | 0x000B | 0x000C | 0x0020 | 0x00A0 | 0xFEFF | zs] + +let string_lex_single lexbuf strbuf = + let lexeme = Sedlexing.Utf8.lexeme in + let rec lex lexbuf strbuf = + match%sedlex lexbuf with + | '\'' -> Ok (Buffer.contents strbuf) + | '\\', escape_sequence -> + let* s = Unescape.unescape (lexeme lexbuf) in + Buffer.add_string strbuf s; + lex lexbuf strbuf + | line_continuation -> lex lexbuf strbuf + | Sub (source_character, ('\'' | line_terminator)) -> + Buffer.add_string strbuf (lexeme lexbuf); + lex lexbuf strbuf + | _ -> + lexeme lexbuf + |> Format.sprintf "Unexpected character: %s" + |> Result.error + in + lex lexbuf strbuf + +let string_lex_double lexbuf strbuf = + let lexeme = Sedlexing.Utf8.lexeme in + let rec lex lexbuf strbuf = + match%sedlex lexbuf with + | '"' -> Ok (Buffer.contents strbuf) + | '\\', escape_sequence -> + let* s = Unescape.unescape (lexeme lexbuf) in + Buffer.add_string strbuf s; + lex lexbuf strbuf + | line_continuation -> lex lexbuf strbuf + | Sub (source_character, ('"' | line_terminator)) -> + Buffer.add_string strbuf (lexeme lexbuf); + lex lexbuf strbuf + | _ -> + lexeme lexbuf + |> Format.sprintf "Unexpected character: %s" + |> Result.error + in + lex lexbuf strbuf + +let string_lex lexbuf quote = + let strbuf = Buffer.create 200 in + if quote = "'" then string_lex_single lexbuf strbuf + else if quote = {|"|} then string_lex_double lexbuf strbuf + else Error (Format.sprintf "Invalid string quote %S" quote) + +let rec lex tokens buf = + let lexeme = Sedlexing.Utf8.lexeme in + match%sedlex buf with + | '(' -> lex (OPEN_PAREN :: tokens) buf + | ')' -> lex (CLOSE_PAREN :: tokens) buf + | '{' -> lex (OPEN_BRACE :: tokens) buf + | '}' -> lex (CLOSE_BRACE :: tokens) buf + | '[' -> lex (OPEN_BRACKET :: tokens) buf + | ']' -> lex (CLOSE_BRACKET :: tokens) buf + | ':' -> lex (COLON :: tokens) buf + | ',' -> lex (COMMA :: tokens) buf + | Chars {|"'|} -> + let* s = string_lex buf (lexeme buf) in + lex (STRING s :: tokens) buf + | multi_line_comment | single_line_comment | white_space | line_terminator -> + lex tokens buf + | "true" -> lex (TRUE :: tokens) buf + | "false" -> lex (FALSE :: tokens) buf + | "null" -> lex (NULL :: tokens) buf + | json5_float -> + let s = lexeme buf in + lex (FLOAT s :: tokens) buf + | json5_int -> + let s = lexeme buf in + lex (INT s :: tokens) buf + | json5_int_or_float -> + let s = lexeme buf in + lex (INT_OR_FLOAT s :: tokens) buf + | identifier_name -> + let s = lexeme buf in + lex (IDENTIFIER_NAME s :: tokens) buf + | eof -> Ok (List.rev tokens) + | _ -> + lexeme buf |> Format.asprintf "Unexpected character: '%s'" |> Result.error diff --git a/lib/json5/parser.ml b/lib/json5/parser.ml new file mode 100644 index 00000000..ec7bd8c0 --- /dev/null +++ b/lib/json5/parser.ml @@ -0,0 +1,84 @@ +open Let_syntax.Result + +let rec parse_list acc = function + | [] -> Error "List never ends" + | Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs -> Ok (acc, xs) + | xs -> ( + let* v, xs = parse xs in + match xs with + | [] -> Error "List was not closed" + | Lexer.CLOSE_BRACKET :: xs | COMMA :: CLOSE_BRACKET :: xs -> + Ok (v :: acc, xs) + | COMMA :: xs -> parse_list (v :: acc) xs + | x :: _ -> + let s = + Format.asprintf "Unexpected list token: %a" Lexer.pp_token x + in + Error s) + +and parse_assoc acc = function + | [] -> Error "Assoc never ends" + | Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs -> Ok (acc, xs) + | STRING k :: COLON :: xs | IDENTIFIER_NAME k :: COLON :: xs -> ( + let* v, xs = parse xs in + let item = (k, v) in + match xs with + | [] -> Error "Object was not closed" + | Lexer.CLOSE_BRACE :: xs | COMMA :: CLOSE_BRACE :: xs -> + Ok (item :: acc, xs) + | COMMA :: xs -> parse_assoc (item :: acc) xs + | x :: _ -> + let s = + Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x + in + Error s) + | x :: _ -> + let s = + Format.asprintf "Unexpected assoc list token: %a" Lexer.pp_token x + in + Error s + +and parse = function + | [] -> Error "empty list of tokens" + | token :: xs -> ( + match token with + | TRUE -> Ok (Ast.Bool true, xs) + | FALSE -> Ok (Bool false, xs) + | NULL -> Ok (Null, xs) + | INT v -> Ok (IntLit v, xs) + | FLOAT v -> Ok (FloatLit v, xs) + | INT_OR_FLOAT v -> Ok (FloatLit v, xs) + | STRING s -> Ok (StringLit s, xs) + | OPEN_BRACKET -> + let+ l, xs = parse_list [] xs in + (Ast.List (List.rev l), xs) + | OPEN_BRACE -> + let+ a, xs = parse_assoc [] xs in + (Ast.Assoc (List.rev a), xs) + | x -> + let s = Format.asprintf "Unexpected token: %a" Lexer.pp_token x in + Error s) + +let parse_from_lexbuf ?fname ?lnum lexbuffer = + let fname = Option.value fname ~default:"" in + Sedlexing.set_filename lexbuffer fname; + let lnum = Option.value lnum ~default:1 in + let pos = + { Lexing.pos_fname = fname; pos_lnum = lnum; pos_bol = 0; pos_cnum = 0 } + in + Sedlexing.set_position lexbuffer pos; + let* tokens = Lexer.lex [] lexbuffer in + let+ ast, _unparsed = parse tokens in + ast + +let parse_from_string ?fname ?lnum input = + parse_from_lexbuf (Sedlexing.Utf8.from_string input) ?fname ?lnum + +let parse_from_channel ?fname ?lnum ic = + parse_from_lexbuf (Sedlexing.Utf8.from_channel ic) ?fname ?lnum + +let parse_from_file ?fname ?lnum filename = + let ic = open_in filename in + let out = parse_from_channel ?fname ?lnum ic in + close_in ic; + out diff --git a/lib/json5/read.ml b/lib/json5/read.ml new file mode 100644 index 00000000..bd2742c7 --- /dev/null +++ b/lib/json5/read.ml @@ -0,0 +1,34 @@ +open Let_syntax.Result + +module type S = sig + type t + + val convert : Ast.t -> t +end + +module type Out = sig + type t + + val from_string : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val from_channel : + ?fname:string -> ?lnum:int -> in_channel -> (t, string) result + + val from_file : ?fname:string -> ?lnum:int -> string -> (t, string) result +end + +module Make (F : S) : Out with type t = F.t = struct + type t = F.t + + let from_string ?fname ?lnum input = + let+ ast = Parser.parse_from_string ?fname ?lnum input in + F.convert ast + + let from_channel ?fname ?lnum ic = + let+ ast = Parser.parse_from_channel ?fname ?lnum ic in + F.convert ast + + let from_file ?fname ?lnum file = + let+ ast = Parser.parse_from_file ?fname ?lnum file in + F.convert ast +end diff --git a/lib/json5/safe.ml b/lib/json5/safe.ml new file mode 100644 index 00000000..1eace77c --- /dev/null +++ b/lib/json5/safe.ml @@ -0,0 +1,7 @@ +include Yojson.Safe + +include Read.Make (struct + type t = Yojson.Safe.t + + let convert = Ast.to_safe +end) diff --git a/lib/json5/unescape.ml b/lib/json5/unescape.ml new file mode 100644 index 00000000..8325dbb1 --- /dev/null +++ b/lib/json5/unescape.ml @@ -0,0 +1,82 @@ +open Let_syntax.Result + +let ( % ) = Int.logor +let ( << ) = Int.shift_left +let ( >> ) = Int.shift_right +let ( & ) = Int.logand + +let utf_8_string_of_unicode i = + if i <= 0x007F then ( + let b = Bytes.create 1 in + Bytes.set_int8 b 0 i; + Ok (Bytes.to_string b)) + else if i <= 0x07FF then ( + let five_high_bits = i >> 6 & 0b11111 in + let six_low_bits = i & 0b111111 in + let high = 0b11000000 % five_high_bits << 8 in + let low = 0b10000000 % six_low_bits in + let n = high % low in + let b = Bytes.create 2 in + Bytes.set_int16_be b 0 n; + Ok (Bytes.to_string b)) + else if i <= 0xFFFF then ( + let four_high_bits = i >> 12 & 0b1111 in + let six_mid_bits = i >> 6 & 0b111111 in + let six_low_bits = i & 0b111111 in + let high = 0b11100000 % four_high_bits << 16 in + let mid = 0b10000000 % six_mid_bits << 8 in + let low = 0b10000000 % six_low_bits in + let n = high % mid % low in + let b = Bytes.create 3 in + Bytes.set_int32_be b 0 (Int32.of_int n); + Ok (Bytes.to_string b)) + else if i <= 0x10FFFF then ( + let three_hh_bits = i >> 18 & 0b111 in + let six_hl_bits = i >> 12 & 0b111111 in + let six_lh_bits = i >> 6 & 0b111111 in + let six_ll_bits = i & 0b111111 in + let hh = 0b11110000 % three_hh_bits << 24 in + let hl = 0b10000000 % six_hl_bits << 16 in + let lh = 0b10000000 % six_lh_bits << 8 in + let ll = 0b10000000 % six_ll_bits in + let n = hh % hl % lh % ll in + let b = Bytes.create 4 in + Bytes.set_int32_be b 0 (Int32.of_int n); + Ok (Bytes.to_string b)) + else Error (Format.sprintf "invalid code point %X" i) + +let unescape str = + if String.length str < 2 then + Error (Format.sprintf "too small escape sequence %s" str) + else + match str.[1] with + | 'u' -> + let escape_chars = String.sub str 2 4 in + let* as_int = + Format.sprintf "0x%s" escape_chars |> int_of_string_opt |> function + | Some x -> Ok x + | None -> Error (Format.sprintf "bad escape sequence %s" escape_chars) + in + utf_8_string_of_unicode as_int + | 'x' -> + let escape_chars = String.sub str 2 2 in + let* as_int = + Format.sprintf "0x%s" escape_chars |> int_of_string_opt |> function + | Some x -> Ok x + | None -> Error (Format.sprintf "bad escape sequence %s" escape_chars) + in + utf_8_string_of_unicode as_int + | '"' | '\'' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' -> Ok str + | '\\' -> Ok {|\|} + | '0' -> + if String.length str = 2 then Ok "\x00" + else if String.length str = 4 then + let octal_str = String.(sub str 2 2) in + let* as_int = + Format.sprintf "0o%s" octal_str |> int_of_string_opt |> function + | Some x -> Ok x + | None -> Error (Format.sprintf "bad escape sequence %s" octal_str) + in + utf_8_string_of_unicode as_int + else Error (Format.sprintf "invalid octal sequence %s" str) + | _ -> Error (Format.sprintf "invalid escape sequence %c" str.[1]) diff --git a/lib/json5/yojson_json5.ml b/lib/json5/yojson_json5.ml new file mode 100644 index 00000000..58606838 --- /dev/null +++ b/lib/json5/yojson_json5.ml @@ -0,0 +1,2 @@ +module Safe = Safe +module Basic = Basic diff --git a/lib/json5/yojson_json5.mli b/lib/json5/yojson_json5.mli new file mode 100644 index 00000000..8e069cde --- /dev/null +++ b/lib/json5/yojson_json5.mli @@ -0,0 +1,71 @@ +module Safe : sig + type t = Yojson.Safe.t + + val from_string : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val from_channel : + ?fname:string -> ?lnum:int -> in_channel -> (t, string) result + + val from_file : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val to_string : + ?buf:Buffer.t -> ?len:int -> ?suf:string -> ?std:bool -> t -> string + + val to_channel : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + Stdlib.out_channel -> + t -> + unit + + val to_output : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + < output : string -> int -> int -> int > -> + t -> + unit + + val to_file : ?len:int -> ?std:bool -> ?suf:string -> string -> t -> unit + val pp : Format.formatter -> t -> unit + val equal : t -> t -> bool +end + +module Basic : sig + type t = Yojson.Basic.t + + val from_string : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val from_channel : + ?fname:string -> ?lnum:int -> in_channel -> (t, string) result + + val from_file : ?fname:string -> ?lnum:int -> string -> (t, string) result + + val to_string : + ?buf:Buffer.t -> ?len:int -> ?suf:string -> ?std:bool -> t -> string + + val to_channel : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + Stdlib.out_channel -> + t -> + unit + + val to_output : + ?buf:Stdlib.Buffer.t -> + ?len:int -> + ?suf:string -> + ?std:bool -> + < output : string -> int -> int -> int > -> + t -> + unit + + val to_file : ?len:int -> ?std:bool -> ?suf:string -> string -> t -> unit + val pp : Format.formatter -> t -> unit + val equal : t -> t -> bool +end diff --git a/test_json5/dune b/test_json5/dune new file mode 100644 index 00000000..21bf5685 --- /dev/null +++ b/test_json5/dune @@ -0,0 +1,4 @@ +(test + (name test) + (package yojson-json5) + (libraries alcotest fmt yojson_json5)) diff --git a/test_json5/test.ml b/test_json5/test.ml new file mode 100644 index 00000000..a9be3e19 --- /dev/null +++ b/test_json5/test.ml @@ -0,0 +1,117 @@ +module M = Yojson_json5.Safe + +let yojson = Alcotest.testable M.pp M.equal + +let parsing_test_case name expected input = + Alcotest.test_case name `Quick (fun () -> + (* any error message will do *) + let any_string = Alcotest.testable Fmt.string (fun _ _ -> true) in + Alcotest.(check (result yojson any_string)) + name expected (M.from_string input)) + +let parsing_should_succeed name expected input = + parsing_test_case name (Ok expected) input + +let parsing_should_fail name input = + let failure = Error "" in + parsing_test_case name failure input + +let parsing_tests = + [ + parsing_should_fail "Unexpected line break" {|"foo + bar"|}; + parsing_should_succeed "Empty object" (`Assoc []) "{}"; + parsing_should_succeed "Empty list" (`List []) "[]"; + parsing_should_succeed "List" + (`List [ `Int 1; `String "2"; `Float 3. ]) + {|[1, "2", 3.0]|}; + parsing_should_succeed "true" (`Bool true) "true"; + parsing_should_succeed "false" (`Bool false) "false"; + parsing_should_succeed "null" `Null "null"; + parsing_should_succeed "double quotes string" (`String "hello world") + {|"hello world"|}; + parsing_should_succeed "single quotes string" (`String "hello world") + {|'hello world'|}; + parsing_should_succeed "float" (`Float 12345.67890) "12345.67890"; + parsing_should_succeed "hex" (`Int 0x1) "0x1"; + parsing_should_succeed "hex escape sequence" (`String "a") {|"\x61"|}; + parsing_should_succeed "unicode escape sequence" (`String "λ") {|"\u03bb"|}; + parsing_should_succeed "more string escaping" (`String "Hello λ world") + "\"Hello \\u03bb \\x77\\x6F\\x72\\x6C\\x64\""; + parsing_should_succeed "null byte string" (`String "\x00") {|"\0"|}; + parsing_should_succeed "octal string" (`String "?") {|"\077"|}; + parsing_should_succeed "null and octal string" (`String "\x007") {|"\07"|}; + parsing_should_succeed "int" (`Int 1) "1"; + parsing_should_succeed "backslash escape" (`String {|foo\bar|}) + {|"foo\\bar"|}; + parsing_should_succeed "line break" (`String "foobar") "\"foo\\\nbar\""; + parsing_should_succeed "string and comment" (`String "bar") "\"bar\" //foo"; + parsing_should_succeed "object with double quote string" + (`Assoc [ ("foo", `String "bar") ]) + {|{"foo": "bar"}|}; + parsing_should_succeed "object with single quote string" + (`Assoc [ ("foo", `String "bar") ]) + {|{'foo': 'bar'}|}; + parsing_should_succeed "object with unquoted string" + (`Assoc [ ("foo", `String "bar") ]) + {|{foo: 'bar'}|}; + parsing_should_succeed "trailing comma in list" + (`List [ `Int 1; `Int 2; `Int 3 ]) + "[1, 2, 3,]"; + parsing_should_fail "multiple trailing commas in list" "[1, 2, 3,]"; + parsing_should_fail "just trailing commas in list" "[,,,]"; + parsing_should_succeed "trailing comma in object" + (`Assoc [ ("one", `Int 1) ]) + {|{"one": 1,}|}; + parsing_should_fail "multiple trailing commas in object" {|{"one": 1,,}|}; + parsing_should_fail "just trailing commas in object" "{,,,}"; + (let expected = + `Assoc + [ + ("unquoted", `String "and you can quote me on that"); + ("singleQuotes", `String "I can use \"double quotes\" here"); + ("lineBreaks", `String {|Look, Mom! No \n's!|}); + ("hexadecimal", `Int 0xdecaf); + ("leadingDecimalPoint", `Float 0.8675309); + ("andTrailing", `Float 8675309.0); + ("positiveSign", `Int 1); + ("trailingComma", `String "in objects"); + ("andIn", `List [ `String "arrays" ]); + ("backwardsCompatible", `String "with JSON"); + ] + in + parsing_should_succeed "More elaborated" expected + {|{ + // comments + unquoted: 'and you can quote me on that', + singleQuotes: 'I can use "double quotes" here', + lineBreaks: "Look, Mom! \ +No \\n's!", + hexadecimal: 0xdecaf, + leadingDecimalPoint: .8675309, andTrailing: 8675309., + positiveSign: +1, + trailingComma: 'in objects', andIn: ['arrays',], + "backwardsCompatible": "with JSON", +}|}); + ] + +let writing_test_case name expected input = + Alcotest.test_case name `Quick (fun () -> + Alcotest.(check string) name expected (M.to_string input)) + +let writing_tests = + [ + writing_test_case "Empty object" "{}" (`Assoc []); + writing_test_case "Empty list" "[]" (`List []); + writing_test_case "true" "true" (`Bool true); + writing_test_case "false" "false" (`Bool false); + writing_test_case "null" "null" `Null; + writing_test_case "string" "\"hello world\"" (`String "hello world"); + writing_test_case "float" "12345.6789" (`Float 12345.6789); + writing_test_case "hex" "1" (`Int 0x1); + writing_test_case "int" "1" (`Int 1); + ] + +let () = + Alcotest.run "JSON5" + [ ("parsing", parsing_tests); ("writing", writing_tests) ] diff --git a/yojson-json5.opam b/yojson-json5.opam new file mode 100644 index 00000000..c458ef00 --- /dev/null +++ b/yojson-json5.opam @@ -0,0 +1,37 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: + "Yojson_json5 is a parsing and printing library for the JSON5 format" +description: """ +Yojson_json5 is a parsing and printing library for the JSON5 format. +It supports parsing JSON5 to Yojson.Basic.t and Yojson.Safe.t types.""" +maintainer: [ + "paul-elliot@tarides.com" "nathan@tarides.com" "marek@tarides.com" +] +authors: ["Martin Jambon"] +license: "BSD-3-Clause" +homepage: "https://github.com/ocaml-community/yojson" +doc: "https://ocaml-community.github.io/yojson" +bug-reports: "https://github.com/ocaml-community/yojson/issues" +depends: [ + "dune" {>= "2.7"} + "ocaml" {>= "4.08"} + "sedlex" {>= "2.5"} + "alcotest" {with-test & >= "0.8.5"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/ocaml-community/yojson.git"