Skip to content

Commit d71418d

Browse files
committed
Parser: Allow \ddd escape sequence in code-block metadata
1 parent fc81591 commit d71418d

File tree

1 file changed

+32
-2
lines changed

1 file changed

+32
-2
lines changed

src/parser/lexer.mll

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ let unescape_word : string -> string = fun s ->
2727
scan_word 0;
2828
Buffer.contents buffer
2929

30+
3031
type math_kind =
3132
Inline | Block
3233

@@ -85,6 +86,30 @@ let warning_loc =
8586
let warning =
8687
with_location_adjustments warning_loc
8788

89+
(* From ocaml.git/parsing/lexer.mll *)
90+
let digit_value c =
91+
match c with
92+
| 'a' .. 'f' -> 10 + Char.code c - Char.code 'a'
93+
| 'A' .. 'F' -> 10 + Char.code c - Char.code 'A'
94+
| '0' .. '9' -> Char.code c - Char.code '0'
95+
| _ -> assert false
96+
97+
let num_value lexbuf ~base ~first ~last =
98+
let c = ref 0 in
99+
for i = first to last do
100+
let v = digit_value (Lexing.lexeme_char lexbuf i) in
101+
assert(v < base);
102+
c := (base * !c) + v
103+
done;
104+
!c
105+
106+
let char_for_decimal_code input lexbuf i =
107+
let c = num_value lexbuf ~base:10 ~first:i ~last:(i+2) in
108+
if (c < 0 || c > 255) then
109+
(warning input (Parse_error.invalid_char_code c);
110+
'x')
111+
else Char.chr c
112+
88113
let reference_token media start target input lexbuf =
89114
match start with
90115
| "{!" -> `Simple_reference target
@@ -707,7 +732,9 @@ and bad_markup_recovery start_offset input = parse
707732
if necessary. Using the missing cases will cause a warning *)
708733
and string input = parse
709734
| '\"'
710-
{ Buffer.contents string_buffer }
735+
{ let result = Buffer.contents string_buffer in
736+
Buffer.clear string_buffer;
737+
result }
711738
| '\\' newline [' ' '\t']*
712739
{ string input lexbuf }
713740
| '\\' (['\\' '\'' '\"' 'n' 't' 'b' 'r' ' '] as c)
@@ -723,6 +750,9 @@ and string input = parse
723750
| ' ' -> ' '
724751
| _ -> assert false);
725752
string input lexbuf }
753+
| '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
754+
{ Buffer.add_char string_buffer (char_for_decimal_code input lexbuf 1);
755+
string input lexbuf }
726756
| '\\' (_ as c)
727757
{ warning input (Parse_error.should_not_be_escaped c);
728758
Buffer.add_char string_buffer c;
@@ -752,7 +782,7 @@ and code_block_metadata_tail input tag acc = parse
752782
{ let acc = match tag with | Some t -> `Tag t :: acc | None -> acc in
753783
let tag = code_block_metadata_atom input lexbuf in
754784
code_block_metadata_tail input (Some tag) acc lexbuf }
755-
| space_char* '['
785+
| space_char* '[' (* Nb this will be a longer match than the above case! *)
756786
{
757787
let acc = match tag with | Some t -> `Tag t :: acc | None -> acc in
758788
`Ok (List.rev acc) }

0 commit comments

Comments
 (0)