Skip to content

Commit

Permalink
Avoid too many continuations, and only use packed encoding on scalar …
Browse files Browse the repository at this point in the history
…fields
  • Loading branch information
andersfugmann committed Jan 11, 2024
1 parent 9414497 commit 3fcfa02
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 130 deletions.
11 changes: 7 additions & 4 deletions src/ocaml_protoc_plugin/reader.ml
Original file line number Diff line number Diff line change
Expand Up @@ -152,11 +152,14 @@ let to_list: t -> (int * Field.t) list =
let%expect_test "varint boxed" =
let values = [-2L; -1L; 0x7FFFFFFFFFFFFFFFL; 0x7FFFFFFFFFFFFFFEL; 0x3FFFFFFFFFFFFFFFL; 0x3FFFFFFFFFFFFFFEL; 0L; 1L] in
List.iter ~f:(fun v ->
let buffer = Bytes.create 10 in
let _ = Writer.write_varint buffer ~offset:0 v in
let buffer =
let writer = Writer.init () in
Writer.write_varint_value v writer;
Writer.contents writer
in
Printf.printf "0x%016LxL = 0x%016LxL\n"
(read_raw_varint_reference (create (Bytes.to_string buffer)))
(read_raw_varint (create (Bytes.to_string buffer)));
(read_raw_varint_reference (create buffer))
(read_raw_varint (create buffer));
()
) values;
[%expect {|
Expand Down
53 changes: 13 additions & 40 deletions src/ocaml_protoc_plugin/serialize.ml
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,10 @@ let field_type: type a. a spec -> int = function
| Float | Fixed32 | SFixed32 | Fixed32_int | SFixed32_int -> 5 (* Fixed 32 bit *)

let write_fixed64 ~f v =
let size = 8 in
let writer = Writer.write_fixed64 in
Writer.write_value ~size ~writer (f v)
Writer.write_fixed64_value (f v)

let write_fixed32 ~f v =
let size = 4 in
let writer = Writer.write_fixed32 in
Writer.write_value ~size ~writer (f v)
Writer.write_fixed32_value (f v)

let zigzag_encoding v =
let open Infix.Int64 in
Expand All @@ -38,24 +34,14 @@ let zigzag_encoding_unboxed v =
v

let write_varint ~f v =
let v = f v in
let size = Writer.varint_size (Int64.to_int v) in
let writer = Writer.write_varint in
Writer.write_value ~size ~writer v
Writer.write_varint_value (f v)

let write_varint_unboxed ~f v =
let v = f v in
let size = Writer.varint_size v in
let writer = Writer.write_varint_unboxed in
Writer.write_value ~size ~writer v
Writer.write_varint_unboxed_value (f v)

let write_string ~f v =
let write_length_delimited_string ~f v =
let v = f v in
let write_length = write_varint_unboxed ~f:String.length v in
let write_string = Writer.write_string in
fun t ->
write_length t;
Writer.write_value ~size:(String.length v) ~writer:write_string v t
Writer.write_length_delimited_value ~data:v ~offset:0 ~len:(String.length v)

let id x = x
let (@@) a b = fun v -> b (a v)
Expand Down Expand Up @@ -85,26 +71,19 @@ let write_value : type a. a spec -> a -> Writer.t -> unit = function
| SInt32_int -> write_varint_unboxed ~f:zigzag_encoding_unboxed

| Bool -> write_varint_unboxed ~f:(function true -> 1 | false -> 0)
| String -> write_string ~f:id
| Bytes -> write_string ~f:Bytes.unsafe_to_string
| String -> write_length_delimited_string ~f:id
| Bytes -> write_length_delimited_string ~f:Bytes.unsafe_to_string
| Enum f -> write_varint_unboxed ~f
| Message to_proto ->
(*
fun v writer ->
let cont = Writer.write_length_delimited_value_cont writer in
let _ = to_proto writer v in
cont ()
*)
Writer.write_length_delimited_value ~write:to_proto
Writer.write_length_delimited_value' ~write:to_proto

(** Optimized when the value is given in advance, and the continuation is expected to be called multiple times *)
let write_value_const : type a. a spec -> a -> Writer.t -> unit = fun spec v ->
let write_value = write_value spec in
let writer = Writer.init () in
write_value v writer;
let data = Writer.contents writer in
let size = String.length data in
Writer.write_value ~size ~writer:Writer.write_string data
Writer.write_const_value data

let write_field_header: 'a spec -> int -> Writer.t -> unit = fun spec index ->
let field_type = field_type spec in
Expand All @@ -118,24 +97,18 @@ let write_field: type a. a spec -> int -> a -> Writer.t -> unit = fun spec index
write_field_header writer;
write_value v writer

let is_scalar: type a. a spec -> bool = function
| String -> false
| Bytes -> false
| Message _ -> false
| _ -> true

let rec write: type a. a compound -> Writer.t -> a -> unit = function
| Repeated (index, spec, Packed) when is_scalar spec -> begin
| Repeated (index, spec, Packed) -> begin
let write writer vs = List.iter ~f:(fun v -> write_value spec v writer) vs in
let write_header = write_field_header String index in
fun writer vs ->
match vs with
| [] -> ()
| vs ->
write_header writer;
Writer.write_length_delimited_value ~write vs writer
Writer.write_length_delimited_value' ~write vs writer
end
| Repeated (index, spec, _) ->
| Repeated (index, spec, Not_packed) ->
let write = write_field spec index in
fun writer vs ->
List.iter ~f:(fun v -> write v writer) vs
Expand Down
140 changes: 62 additions & 78 deletions src/ocaml_protoc_plugin/writer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
open StdLabels
open Field


let sprintf = Printf.sprintf
let printf = Printf.printf


let length_delimited_size_field_length = 5

type substring = { mutable offset: int; buffer: Bytes.t }
Expand Down Expand Up @@ -35,9 +30,8 @@ let unused_space t =
let varint_size_reference v =
let rec inner acc = function
| 0 -> acc
| v -> inner (acc + 1) (v lsr 1) [@@ocaml.unrolled 10]
| v -> inner (acc + 1) (v lsr 1)
in

match v with
| v when v < 0 -> 10
| 0 -> 1
Expand All @@ -55,13 +49,6 @@ let varint_size = function
| v when v < 0x100000000000000 -> 8
| _ -> 9

let rec size_of_field = function
| Varint v -> varint_size (Int64.to_int v)
| Varint_unboxed v -> varint_size v
| Fixed_32_bit _ -> 4
| Fixed_64_bit _ -> 8
| Length_delimited {length; _} -> size_of_field (Varint_unboxed length) + length

(* Manually unroll *)
let write_varint_unboxed buffer ~offset = function
| v when v < 0 ->
Expand Down Expand Up @@ -241,7 +228,9 @@ let write_varint_unboxed buffer ~offset = function
(* Write a field delimited length.
A delimited field length can be no larger than 2^31.
This function always write 5 bytes (7*5bits > 31bits).
This allows the field length to be statically allocated and written later
This allows the field length to be statically allocated and written later.
The spec does not forbid this encoding, but there might be implementation
that disallow '0' as the ending varint value.
*)
let write_delimited_field_length_fixed_size buffer ~offset v =
Bytes.set_uint8 buffer offset (v lor 128);
Expand Down Expand Up @@ -296,7 +285,7 @@ let write_varint buffer ~offset vl =
| true ->
Bytes.set_uint8 buffer offset (v lor 128);
let offset = offset + 1 in
Bytes.set_uint8 buffer offset (0x01);
Bytes.set_uint8 buffer offset (0x01); (* Always set the 64'th bit *)
offset
| false ->
Bytes.set_uint8 buffer offset v;
Expand All @@ -314,8 +303,8 @@ let write_varint_reference buffer ~offset v =
Bytes.set_uint8 buffer offset (Int64.to_int v);
next_offset
| rem ->
Bytes.set_uint8 buffer offset (Int.logor (Int64.to_int v |> Int.logand 0x7F) 0x80);
inner ~offset:next_offset rem [@@ocaml.unrolled 10]
Bytes.set_uint8 buffer offset (Int.logor (Int64.to_int v) 0x80);
inner ~offset:next_offset rem
in
inner ~offset v

Expand All @@ -336,24 +325,6 @@ let write_varint_unboxed_reference buffer ~offset v =
in
inner ~is_negative:(v < 0) ~offset v

let write_fixed32 buffer ~offset v =
Bytes.set_int32_le buffer offset v;
offset + 4

let write_fixed64 buffer ~offset v =
Bytes.set_int64_le buffer offset v;
offset + 8

let write_string buffer ~offset v =
let len = String.length v in
Bytes.blit_string ~src:v ~src_pos:0 ~dst:buffer ~dst_pos:offset ~len;
offset + len

let write_length_delimited buffer ~offset ~src ~src_pos ~len =
let offset = write_varint_unboxed buffer ~offset len in
Bytes.blit_string ~src:src ~src_pos ~dst:buffer ~dst_pos:offset ~len;
offset + len

let ensure_capacity ~size t =
match t.data with
| { offset; buffer } as elem :: _ when Bytes.length buffer - offset >= size -> elem
Expand All @@ -362,46 +333,62 @@ let ensure_capacity ~size t =
t.data <- elem :: tl;
elem

let write_value: size:int -> writer:(Bytes.t -> offset:int -> 'a -> int) -> 'a -> t -> unit =
fun ~size ~writer v t ->
let elem = ensure_capacity ~size t in
let offset = writer elem.buffer ~offset:elem.offset v in
(** Direct functions *)
let write_const_value data t =
let len = String.length data in
let elem = ensure_capacity ~size:len t in
Bytes.blit_string ~src:data ~src_pos:0 ~dst:elem.buffer ~dst_pos:elem.offset ~len;
elem.offset <- elem.offset + len

let write_fixed32_value: int32 -> t -> unit = fun v t ->
let elem = ensure_capacity ~size:4 t in
Bytes.set_int32_le elem.buffer elem.offset v;
elem.offset <- elem.offset + 4

let write_fixed64_value: int64 -> t -> unit = fun v t ->
let elem = ensure_capacity ~size:8 t in
Bytes.set_int64_le elem.buffer elem.offset v;
elem.offset <- elem.offset + 8

let write_varint_unboxed_value: int -> t -> unit = fun v t ->
let elem = ensure_capacity ~size:10 t in
let offset = write_varint_unboxed elem.buffer ~offset:elem.offset v in
elem.offset <- offset

let write_varint_value: int64 -> t -> unit = fun v t ->
let elem = ensure_capacity ~size:10 t in
let offset = write_varint elem.buffer ~offset:elem.offset v in
elem.offset <- offset

let write_naked_field buffer ~offset = function
| Varint_unboxed v -> write_varint_unboxed buffer ~offset v
| Varint v -> write_varint buffer ~offset v
| Fixed_32_bit v -> write_fixed32 buffer ~offset v
| Fixed_64_bit v -> write_fixed64 buffer ~offset v
| Length_delimited {offset = src_pos; length; data} ->
write_length_delimited buffer ~offset ~src:data ~src_pos ~len:length

let add_field t field =
let size = size_of_field field in
let elem = ensure_capacity ~size t in
let offset = write_naked_field elem.buffer ~offset:elem.offset field in
elem.offset <- offset;
()

let write_field_header : t -> int -> int -> unit =
fun t index field_type ->
let write_length_delimited_value: data:string -> offset:int -> len:int -> t -> unit = fun ~data ~offset ~len t ->
write_varint_unboxed_value len t;
let elem = ensure_capacity ~size:len t in
Bytes.blit_string ~src:data ~src_pos:offset ~dst:elem.buffer ~dst_pos:elem.offset ~len;
elem.offset <- elem.offset + len

let write_field_header : t -> int -> int -> unit = fun t index field_type ->
let header = (index lsl 3) + field_type in
add_field t (Varint_unboxed (header))
write_varint_unboxed_value header t

let write_field : t -> int -> Field.t -> unit =
fun t index field ->
let field_type =
let write_field : t -> int -> Field.t -> unit = fun t index field ->
let field_type, writer =
match field with
| Varint _ -> 0
| Varint_unboxed _ -> 0
| Fixed_64_bit _ -> 1
| Length_delimited _ -> 2
| Fixed_32_bit _ -> 5
| Varint v ->
0, write_varint_value v
| Varint_unboxed v ->
0, write_varint_unboxed_value v
| Fixed_64_bit v ->
1, write_fixed64_value v
| Length_delimited {offset; length; data} ->
2, write_length_delimited_value ~data ~offset ~len:length
| Fixed_32_bit v ->
5, write_fixed32_value v
in
write_field_header t index field_type;
add_field t field
writer t


let write_length_delimited_value ~write v t =
let write_length_delimited_value' ~write v t =
let rec size_data_added sentinel acc = function
| [] -> failwith "End of list reached. This is impossible"
| x :: _ when x == sentinel -> acc
Expand Down Expand Up @@ -442,17 +429,14 @@ let write_length_delimited_value ~write v t =
let _ = write t v in
let size = size_data_added sentinel (sentinel.offset - (offset + length_delimited_size_field_length)) t.data in
let offset' = write_varint_unboxed sentinel.buffer ~offset size in
(* Move data, to avoid holes *)
(* Move data to avoid holes *)
let () = match (offset + length_delimited_size_field_length = offset') with
| true -> ()
| false ->
(* Offset points to the first new byte. *)
(*
Printf.eprintf "\nHole size: %d. %d, %d, %d\n" n offset offset' sentinel.offset;
Printf.eprintf "Bytes.blit ~src:sentinel.buffer ~src_pos:%d ~dst:sentinel.buffer ~dst_pos:%d ~len:%d\n" (offset+5) offset' (sentinel.offset - (offset + 5));
*)
Bytes.blit ~src:sentinel.buffer ~src_pos:(offset+5) ~dst:sentinel.buffer ~dst_pos:offset' ~len:(sentinel.offset - (offset + 5));
sentinel.offset <- sentinel.offset - (offset+5-offset');
Bytes.blit ~src:sentinel.buffer ~src_pos:(offset + length_delimited_size_field_length)
~dst:sentinel.buffer ~dst_pos:offset'
~len:(sentinel.offset - (offset + length_delimited_size_field_length));
sentinel.offset <- sentinel.offset - (offset+length_delimited_size_field_length-offset');
in
()
in
Expand All @@ -478,10 +462,10 @@ let contents t =
let dump t =
let string_contents = contents t in
List.init ~len:(String.length string_contents) ~f:(fun i ->
sprintf "%02x" (Char.code (String.get string_contents i))
Printf.sprintf "%02x" (Char.code (String.get string_contents i))
)
|> String.concat ~sep:"-"
|> printf "Buffer: %s\n"
|> Printf.printf "Buffer: %s\n"

let of_list: (int * Field.t) list -> t = fun fields ->
let t = init () in
Expand Down
18 changes: 10 additions & 8 deletions src/ocaml_protoc_plugin/writer.mli
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,17 @@ val contents : t -> string

(**/**)
val varint_size : int -> int
val write_varint : bytes -> offset:int -> int64 -> int
val write_varint_unboxed : bytes -> offset:int -> int -> int
val write_fixed32 : bytes -> offset:int -> Int32.t -> int
val write_fixed64 : bytes -> offset:int -> Int64.t -> int
val write_string : bytes -> offset:int -> string -> int
val write_length_delimited : bytes -> offset:int -> src:string -> src_pos:int -> len:int -> int

(** Direct functions *)
val write_fixed32_value: int32 -> t -> unit
val write_fixed64_value: int64 -> t -> unit
val write_varint_unboxed_value: int -> t -> unit
val write_varint_value: int64 -> t -> unit
val write_length_delimited_value: data:string -> offset:int -> len:int -> t -> unit
val write_const_value: string -> t -> unit

val write_length_delimited_value': write:(t -> 'a -> _) -> 'a -> t -> unit
val write_field : t -> int -> Field.t -> unit
val write_length_delimited_value : write:(t -> 'a -> 'b) -> 'a -> t -> unit

(** Construct a writer from a field list *)
val of_list: (int * Field.t) list -> t
Expand All @@ -33,5 +36,4 @@ val of_list: (int * Field.t) list -> t
val dump : t -> unit

val unused_space : t -> int
val write_value: size:int -> writer:(Bytes.t -> offset:int -> 'a -> int) -> 'a -> t -> unit
(**/**)
6 changes: 6 additions & 0 deletions src/plugin/types.ml
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,12 @@ let c_of_field ~params ~syntax ~scope field =
Repeated (number, spec, Not_packed)
|> c_of_compound name

(* Repeated bytes and strings are not packed *)
| _, { label = Some Label.LABEL_REPEATED; type' = Some (TYPE_STRING | TYPE_BYTES as type'); type_name; _ } ->
let Espec spec = spec_of_type ~params ~scope type_name None type' in
Repeated (number, spec, Not_packed)
|> c_of_compound name

(* Repeated enum *)
| _, { label = Some Label.LABEL_REPEATED; type' = Some Type.TYPE_ENUM; type_name; options; _} ->
let spec = spec_of_enum ~scope type_name None in
Expand Down

0 comments on commit 3fcfa02

Please sign in to comment.