Skip to content

Commit 5eede43

Browse files
authored
Merge pull request #2186 from art-w/3.6-fix-endian
2 parents ff45092 + 36af623 commit 5eede43

File tree

1 file changed

+46
-68
lines changed

1 file changed

+46
-68
lines changed

Diff for: src/irmin-pack/unix/mapping_file.ml

+46-68
Original file line numberDiff line numberDiff line change
@@ -27,46 +27,12 @@ open! Import
2727
include Mapping_file_intf
2828
module BigArr1 = Bigarray.Array1
2929

30-
type int_bigarray = (int, Bigarray.int_elt, Bigarray.c_layout) Bigarray.Array1.t
31-
3230
type int64_bigarray =
3331
(int64, Bigarray.int64_elt, Bigarray.c_layout) Bigarray.Array1.t
3432

3533
(* Set to 0 until we find decide what to do about sequential traversal of pack files *)
3634
let gap_tolerance = 0
3735

38-
module Int_mmap : sig
39-
type t = private {
40-
fn : string;
41-
fd : Unix.file_descr;
42-
mutable arr : int_bigarray;
43-
}
44-
45-
val open_rw : string -> t
46-
val close : t -> unit
47-
end = struct
48-
type t = { fn : string; fd : Unix.file_descr; mutable arr : int_bigarray }
49-
50-
(* NOTE following mmap is shared *)
51-
52-
let open_rw fn =
53-
let shared = true in
54-
assert (Sys.file_exists fn);
55-
let fd = Unix.(openfile fn [ O_RDWR ] 0o660) in
56-
let arr =
57-
let open Bigarray in
58-
Unix.map_file fd Int c_layout shared [| -1 |] |> array1_of_genarray
59-
in
60-
{ fn; fd; arr }
61-
62-
let close t =
63-
Unix.close t.fd;
64-
(* following tries to make the array unreachable, so GC'able; however, no guarantee
65-
that arr actually is unreachable *)
66-
t.arr <- Bigarray.(Array1.create Int c_layout 0);
67-
()
68-
end
69-
7036
module Int64_mmap : sig
7137
type t = private {
7238
fn : string;
@@ -78,6 +44,7 @@ module Int64_mmap : sig
7844
(** NOTE [open_ ~fn ~sz] can use [sz=-1] to open with size based on the size
7945
of the underlying file *)
8046

47+
val open_rw : string -> t
8148
val close : t -> unit
8249
end = struct
8350
type t = { fn : string; fd : Unix.file_descr; mutable arr : int64_bigarray }
@@ -94,6 +61,17 @@ end = struct
9461
in
9562
{ fn; fd; arr }
9663

64+
let open_rw fn =
65+
(* NOTE following mmap is shared *)
66+
let shared = true in
67+
assert (Sys.file_exists fn);
68+
let fd = Unix.(openfile fn [ O_RDWR ] 0o660) in
69+
let arr =
70+
let open Bigarray in
71+
Unix.map_file fd Int64 c_layout shared [| -1 |] |> array1_of_genarray
72+
in
73+
{ fn; fd; arr }
74+
9775
let close t =
9876
Unix.close t.fd;
9977
(* following tries to make the array unreachable, so GC'able; however, no guarantee
@@ -105,7 +83,7 @@ end
10583
(** The mapping file is created from a decreasing list of
10684
[(virtual_offset, 0, length)]. We first need to reverse it such that virtual
10785
offsets are in increasing order. *)
108-
let rev_inplace (src : int_bigarray) : unit =
86+
let rev_inplace (src : int64_bigarray) : unit =
10987
let src_sz = BigArr1.dim src in
11088
let _ =
11189
assert (src_sz >= 3);
@@ -123,6 +101,27 @@ let rev_inplace (src : int_bigarray) : unit =
123101
in
124102
rev 0 (src_sz - 3)
125103

104+
let int64_endian : int64 -> int64 =
105+
fun i ->
106+
if Sys.big_endian then (
107+
(* We are currently on a BE platform but the ints are encoded as LE in the
108+
file. We've just read a LE int using a BE decoding scheme. Let's fix
109+
this.
110+
111+
The first step is to set [buf] to contain exactly what is stored on
112+
disk. Since the current platform is BE, we've interpreted what was
113+
written on disk using a BE decoding scheme. To do the opposite operation
114+
we must use a BE encoding scheme, hence [set_int64_be].
115+
116+
Now that [buf] mimics what was on disk, the second step consist of
117+
decoding it using a LE function, hence [get_int64_le]. *)
118+
let buf = Bytes.create 8 in
119+
Bytes.set_int64_be buf 0 i;
120+
Bytes.get_int64_le buf 0)
121+
else i
122+
123+
let conv_int64 i = Int64.to_int (int64_endian i)
124+
126125
(** We then replace the [0] component of the triplets with the accumulated
127126
length. This yields triplets [(virtual_offset, physical_offset, length)],
128127
which will allow us to map virtual offsets to their physical location in the
@@ -131,11 +130,11 @@ let set_prefix_offsets src =
131130
let src_sz = BigArr1.dim src in
132131
let rec go i poff =
133132
if i < src_sz then (
134-
src.{i + 1} <- poff;
135-
let len = src.{i + 2} in
136-
go (i + 3) (poff + len))
133+
src.{i + 1} <- int64_endian poff;
134+
let len = int64_endian src.{i + 2} in
135+
go (i + 3) (Int64.add poff len))
137136
in
138-
go 0 0
137+
go 0 Int64.zero
139138

140139
module Make (Io : Io.S) = struct
141140
module Io = Io
@@ -183,14 +182,14 @@ module Make (Io : Io.S) = struct
183182

184183
(* Fill and close [file] *)
185184
let append_entry ~off ~len =
186-
(* Write [off, 0, len] in native-endian encoding because it will be read
187-
with mmap. The [0] reserves the space for the future prefix offset. *)
185+
(* Write [off, 0, len] in little-endian encoding for portability.
186+
The [0] reserves the space for the future prefix offset. *)
188187
let buffer = Bytes.create 24 in
189-
Bytes.set_int64_ne buffer 0 (Int63.to_int64 off);
190-
Bytes.set_int64_ne buffer 8 Int64.zero;
191-
Bytes.set_int64_ne buffer 16 (Int64.of_int len);
188+
Bytes.set_int64_le buffer 0 (Int63.to_int64 off);
189+
Bytes.set_int64_le buffer 8 Int64.zero;
190+
Bytes.set_int64_le buffer 16 (Int64.of_int len);
192191
(* Bytes.unsafe_to_string usage: buffer is uniquely owned; we assume
193-
Bytes.set_int64_ne returns unique ownership; we give up ownership of buffer in
192+
Bytes.set_int64_le returns unique ownership; we give up ownership of buffer in
194193
conversion to string. This is safe. *)
195194
Ao.append_exn file (Bytes.unsafe_to_string buffer)
196195
in
@@ -223,7 +222,7 @@ module Make (Io : Io.S) = struct
223222
let* () = Ao.close file in
224223

225224
(* Reopen [file] but as an mmap *)
226-
let file = Int_mmap.open_rw path in
225+
let file = Int64_mmap.open_rw path in
227226
let* () =
228227
Errs.catch (fun () ->
229228
rev_inplace file.arr;
@@ -232,7 +231,7 @@ module Make (Io : Io.S) = struct
232231

233232
(* Flush and close new mapping [file] *)
234233
let* () = Errs.catch (fun () -> Unix.fsync file.fd) in
235-
Int_mmap.close file;
234+
Int64_mmap.close file;
236235

237236
let* mapping_size = Io.size_of_path path in
238237
Option.iter (fun f -> f mapping_size) report_mapping_size;
@@ -242,27 +241,6 @@ module Make (Io : Io.S) = struct
242241

243242
let entry_count arr = BigArr1.dim arr / 3
244243
let entry_idx i = i * 3
245-
246-
let conv_int64 : int64 -> int =
247-
fun i ->
248-
(if Sys.big_endian then (
249-
(* We are currently on a BE platform but the ints are encoded as LE in the
250-
file. We've just read a LE int using a BE decoding scheme. Let's fix
251-
this.
252-
253-
The first step is to set [buf] to contain exactly what is stored on
254-
disk. Since the current platform is BE, we've interpreted what was
255-
written on disk using a BE decoding scheme. To do the opposite operation
256-
we must use a BE encoding scheme, hence [set_int64_be].
257-
258-
Now that [buf] mimics what was on disk, the second step consist of
259-
decoding it using a LE function, hence [get_int64_le]. *)
260-
let buf = Bytes.create 8 in
261-
Bytes.set_int64_be buf 0 i;
262-
Bytes.get_int64_le buf 0)
263-
else i)
264-
|> Int64.to_int
265-
266244
let entry_off arr i = arr.{entry_idx i} |> conv_int64 |> Int63.of_int
267245
let entry_poff arr i = arr.{entry_idx i + 1} |> conv_int64 |> Int63.of_int
268246
let entry_len arr i = arr.{entry_idx i + 2} |> conv_int64

0 commit comments

Comments
 (0)