@@ -42,6 +42,8 @@ type input = {
42
42
lexbuf : Lexing .lexbuf ;
43
43
}
44
44
45
+ let string_buffer = Buffer. create 256
46
+
45
47
let with_location_adjustments
46
48
k input ?start_offset ?adjust_start_by ?end_offset ?adjust_end_by value =
47
49
@@ -83,40 +85,6 @@ let warning_loc =
83
85
let warning =
84
86
with_location_adjustments warning_loc
85
87
86
- let unescape_tag s offset input =
87
- let start_loc = Lexing. lexeme_start input.lexbuf + String. length offset in
88
- let warn n c =
89
- let start = input.offset_to_location @@ start_loc + n in
90
- let end_ = input.offset_to_location @@ start_loc + n + 2 in
91
- let loc =
92
- {
93
- Loc. file = input.file; start; end_;
94
- }
95
- in
96
- warning_loc input loc (Parse_error. should_not_be_escaped c)
97
- in
98
- (* The common case is that there are no escape sequences. *)
99
- match String. index s '\\' with
100
- | exception Not_found -> s
101
- | _ ->
102
- let maybe_warn index = function '\\' | '"' -> () | _ as c -> warn index c in
103
- let buffer = Buffer. create (String. length s) in
104
- let rec scan_word index =
105
- if index > = String. length s then ()
106
- else
107
- let c, increment =
108
- match s.[index] with
109
- | '\\' when index + 1 < String. length s ->
110
- maybe_warn index s.[index + 1 ];
111
- (s.[index + 1 ], 2 )
112
- | _ as c -> (c, 1 )
113
- in
114
- Buffer. add_char buffer c;
115
- scan_word (index + increment)
116
- in
117
- scan_word 0 ;
118
- Buffer. contents buffer
119
-
120
88
let reference_token media start target input lexbuf =
121
89
match start with
122
90
| "{!" -> `Simple_reference target
@@ -402,7 +370,7 @@ and token input = parse
402
370
code_block allow_result_block start_offset content_offset metadata
403
371
prefix delim input lexbuf
404
372
in
405
- match code_block_metadata_tail input [] lexbuf with
373
+ match code_block_metadata_tail input None [] lexbuf with
406
374
| `Ok metadata -> code_block_with_metadata metadata
407
375
| `Eof ->
408
376
warning input ~start_offset Parse_error. truncated_code_block_meta;
@@ -724,8 +692,6 @@ and verbatim buffer last_false_terminator start_offset input = parse
724
692
{ Buffer. add_char buffer c;
725
693
verbatim buffer last_false_terminator start_offset input lexbuf }
726
694
727
-
728
-
729
695
and bad_markup_recovery start_offset input = parse
730
696
| [^ '}' ]+ as text '}' as rest
731
697
{ let suggestion =
@@ -736,44 +702,72 @@ and bad_markup_recovery start_offset input = parse
736
702
(Parse_error. bad_markup (" {" ^ rest) ~suggestion );
737
703
emit input (`Code_span text) ~start_offset }
738
704
705
+ (* Based on OCaml's parsing/lexer.mll
706
+ We're missing a bunch of cases here, and can add them
707
+ if necessary. Using the missing cases will cause a warning *)
708
+ and string input = parse
709
+ | '\"'
710
+ { Buffer. contents string_buffer }
711
+ | '\\' newline [' ' '\t' ]*
712
+ { string input lexbuf }
713
+ | '\\' (['\\' '\'' ' \" ' 'n' 't' 'b' 'r' ' '] as c)
714
+ { Buffer.add_char string_buffer
715
+ (match c with
716
+ | '\\ ' -> '\\ '
717
+ | '\' ' -> '\' '
718
+ | '\" ' -> '\" '
719
+ | 'n' -> '\n '
720
+ | 't' -> '\t '
721
+ | 'b' -> '\b '
722
+ | 'r' -> '\r '
723
+ | ' ' -> ' '
724
+ | _ -> assert false);
725
+ string input lexbuf }
726
+ | '\\ ' (_ as c)
727
+ { warning input (Parse_error.should_not_be_escaped c);
728
+ Buffer.add_char string_buffer c;
729
+ string input lexbuf }
730
+ | eof
731
+ { warning input Parse_error.truncated_string;
732
+ Buffer.contents string_buffer }
733
+ | (_ as c)
734
+ { Buffer.add_char string_buffer c;
735
+ string input lexbuf }
736
+
739
737
and code_block_metadata_atom input = parse
740
- | '"' (tag_quoted_atom as value) '"'
738
+ | '" '
741
739
{
742
- let adjust_start_by = " \" " in
743
- with_location_adjustments ~adjust_start_by ~adjust_end_by: " \" " (fun _ -> Loc. at) input (unescape_tag value adjust_start_by input) }
740
+ let start_offset = Lexing. lexeme_start input.lexbuf + 1 in
741
+ Buffer. clear string_buffer;
742
+ let s = string input lexbuf in
743
+ with_location_adjustments ~start_offset ~adjust_end_by: " \" " (fun _ -> Loc. at) input s }
744
744
| (tag_unquoted_atom as value)
745
- { with_location_adjustments (fun _ -> Loc. at) input value }
746
-
747
- and code_block_metadata_tail input acc = parse
748
- | (space_char* '[' ) { `Ok (List. rev acc) }
749
- | (space_char+ as prefix)
750
- '"' (tag_quoted_atom as key) '"' '='
751
- {
752
- let adjust_start_by = prefix ^ " \" " in
753
- let key = with_location_adjustments ~adjust_start_by ~adjust_end_by: " \" ="
754
- (fun _ -> Loc. at) input (unescape_tag key adjust_start_by input) in
755
- let value = code_block_metadata_atom input lexbuf in
756
- code_block_metadata_tail input (`Binding (key, value) :: acc) lexbuf }
757
- | (space_char+ as prefix)
758
- '"' (tag_quoted_atom as tag) '"' {
759
- let adjust_start_by = prefix ^ " \" " in
760
- let tag = with_location_adjustments ~adjust_start_by ~adjust_end_by: " \" "
761
- (fun _ -> Loc. at) input (unescape_tag tag adjust_start_by input) in
762
- code_block_metadata_tail input (`Tag tag :: acc) lexbuf }
763
- | (space_char+ as _prefix)
764
- (tag_unquoted_atom as key) '='
765
- {
766
- let key = with_location_adjustments (fun _ -> Loc. at) input key in
767
- let value = code_block_metadata_atom input lexbuf in
768
- code_block_metadata_tail input (`Binding (key, value) :: acc) lexbuf }
769
- | (space_char+ as _prefix)
770
- (tag_unquoted_atom as tag)
771
- {
772
- let tag = with_location_adjustments (fun _ -> Loc. at) input tag in
773
- code_block_metadata_tail input (`Tag tag :: acc) lexbuf }
774
- | _ as c
775
- { `Invalid_char c }
776
- | eof
745
+ { with_location_adjustments (fun _ -> Loc. at) input value }
746
+ | (_ as c)
747
+ { warning input (Parse_error. code_block_tag_invalid_char c);
748
+ with_location_adjustments (fun _ -> Loc. at) input " " }
749
+
750
+ and code_block_metadata_tail input tag acc = parse
751
+ | space_char+
752
+ { let acc = match tag with | Some t -> `Tag t :: acc | None -> acc in
753
+ let tag = code_block_metadata_atom input lexbuf in
754
+ code_block_metadata_tail input (Some tag ) acc lexbuf }
755
+ | space_char* '['
756
+ {
757
+ let acc = match tag with | Some t -> `Tag t :: acc | None -> acc in
758
+ `Ok (List .rev acc ) }
759
+ | '='
760
+ { match tag with
761
+ | Some t ->
762
+ let value = code_block_metadata_atom input lexbuf in
763
+ code_block_metadata_tail input None (`Binding (t, value) :: acc) lexbuf
764
+ | None ->
765
+ warning input (Parse_error. code_block_tag_invalid_char '=' );
766
+ code_block_metadata_tail input None acc lexbuf }
767
+ | (_ # space_char # '[' # '=' as c) [^ '[' ]* '['
768
+ { warning input (Parse_error. code_block_tag_invalid_char c);
769
+ `Ok (List. rev acc)}
770
+ | eof
777
771
{ `Eof }
778
772
779
773
and code_block allow_result_block start_offset content_offset metadata prefix delim input = parse
0 commit comments