@@ -27,46 +27,12 @@ open! Import
27
27
include Mapping_file_intf
28
28
module BigArr1 = Bigarray. Array1
29
29
30
- type int_bigarray = (int , Bigarray .int_elt , Bigarray .c_layout ) Bigarray.Array1 .t
31
-
32
30
type int64_bigarray =
33
31
(int64 , Bigarray .int64_elt , Bigarray .c_layout ) Bigarray.Array1 .t
34
32
35
33
(* Set to 0 until we find decide what to do about sequential traversal of pack files *)
36
34
let gap_tolerance = 0
37
35
38
- module Int_mmap : sig
39
- type t = private {
40
- fn : string ;
41
- fd : Unix .file_descr ;
42
- mutable arr : int_bigarray ;
43
- }
44
-
45
- val open_rw : string -> t
46
- val close : t -> unit
47
- end = struct
48
- type t = { fn : string ; fd : Unix .file_descr ; mutable arr : int_bigarray }
49
-
50
- (* NOTE following mmap is shared *)
51
-
52
- let open_rw fn =
53
- let shared = true in
54
- assert (Sys. file_exists fn);
55
- let fd = Unix. (openfile fn [ O_RDWR ] 0o660 ) in
56
- let arr =
57
- let open Bigarray in
58
- Unix. map_file fd Int c_layout shared [| - 1 |] |> array1_of_genarray
59
- in
60
- { fn; fd; arr }
61
-
62
- let close t =
63
- Unix. close t.fd;
64
- (* following tries to make the array unreachable, so GC'able; however, no guarantee
65
- that arr actually is unreachable *)
66
- t.arr < - Bigarray. (Array1. create Int c_layout 0 );
67
- ()
68
- end
69
-
70
36
module Int64_mmap : sig
71
37
type t = private {
72
38
fn : string ;
@@ -78,6 +44,7 @@ module Int64_mmap : sig
78
44
(* * NOTE [open_ ~fn ~sz] can use [sz=-1] to open with size based on the size
79
45
of the underlying file *)
80
46
47
+ val open_rw : string -> t
81
48
val close : t -> unit
82
49
end = struct
83
50
type t = { fn : string ; fd : Unix .file_descr ; mutable arr : int64_bigarray }
@@ -94,6 +61,17 @@ end = struct
94
61
in
95
62
{ fn; fd; arr }
96
63
64
+ let open_rw fn =
65
+ (* NOTE following mmap is shared *)
66
+ let shared = true in
67
+ assert (Sys. file_exists fn);
68
+ let fd = Unix. (openfile fn [ O_RDWR ] 0o660 ) in
69
+ let arr =
70
+ let open Bigarray in
71
+ Unix. map_file fd Int64 c_layout shared [| - 1 |] |> array1_of_genarray
72
+ in
73
+ { fn; fd; arr }
74
+
97
75
let close t =
98
76
Unix. close t.fd;
99
77
(* following tries to make the array unreachable, so GC'able; however, no guarantee
105
83
(* * The mapping file is created from a decreasing list of
106
84
[(virtual_offset, 0, length)]. We first need to reverse it such that virtual
107
85
offsets are in increasing order. *)
108
- let rev_inplace (src : int_bigarray ) : unit =
86
+ let rev_inplace (src : int64_bigarray ) : unit =
109
87
let src_sz = BigArr1. dim src in
110
88
let _ =
111
89
assert (src_sz > = 3 );
@@ -123,6 +101,27 @@ let rev_inplace (src : int_bigarray) : unit =
123
101
in
124
102
rev 0 (src_sz - 3 )
125
103
104
+ let int64_endian : int64 -> int64 =
105
+ fun i ->
106
+ if Sys. big_endian then (
107
+ (* We are currently on a BE platform but the ints are encoded as LE in the
108
+ file. We've just read a LE int using a BE decoding scheme. Let's fix
109
+ this.
110
+
111
+ The first step is to set [buf] to contain exactly what is stored on
112
+ disk. Since the current platform is BE, we've interpreted what was
113
+ written on disk using a BE decoding scheme. To do the opposite operation
114
+ we must use a BE encoding scheme, hence [set_int64_be].
115
+
116
+ Now that [buf] mimics what was on disk, the second step consist of
117
+ decoding it using a LE function, hence [get_int64_le]. *)
118
+ let buf = Bytes. create 8 in
119
+ Bytes. set_int64_be buf 0 i;
120
+ Bytes. get_int64_le buf 0 )
121
+ else i
122
+
123
+ let conv_int64 i = Int64. to_int (int64_endian i)
124
+
126
125
(* * We then replace the [0] component of the triplets with the accumulated
127
126
length. This yields triplets [(virtual_offset, physical_offset, length)],
128
127
which will allow us to map virtual offsets to their physical location in the
@@ -131,11 +130,11 @@ let set_prefix_offsets src =
131
130
let src_sz = BigArr1. dim src in
132
131
let rec go i poff =
133
132
if i < src_sz then (
134
- src.{i + 1 } < - poff;
135
- let len = src.{i + 2 } in
136
- go (i + 3 ) (poff + len))
133
+ src.{i + 1 } < - int64_endian poff;
134
+ let len = int64_endian src.{i + 2 } in
135
+ go (i + 3 ) (Int64. add poff len))
137
136
in
138
- go 0 0
137
+ go 0 Int64. zero
139
138
140
139
module Make (Io : Io.S ) = struct
141
140
module Io = Io
@@ -183,14 +182,14 @@ module Make (Io : Io.S) = struct
183
182
184
183
(* Fill and close [file] *)
185
184
let append_entry ~off ~len =
186
- (* Write [off, 0, len] in native -endian encoding because it will be read
187
- with mmap. The [0] reserves the space for the future prefix offset. *)
185
+ (* Write [off, 0, len] in little -endian encoding for portability.
186
+ The [0] reserves the space for the future prefix offset. *)
188
187
let buffer = Bytes. create 24 in
189
- Bytes. set_int64_ne buffer 0 (Int63. to_int64 off);
190
- Bytes. set_int64_ne buffer 8 Int64. zero;
191
- Bytes. set_int64_ne buffer 16 (Int64. of_int len);
188
+ Bytes. set_int64_le buffer 0 (Int63. to_int64 off);
189
+ Bytes. set_int64_le buffer 8 Int64. zero;
190
+ Bytes. set_int64_le buffer 16 (Int64. of_int len);
192
191
(* Bytes.unsafe_to_string usage: buffer is uniquely owned; we assume
193
- Bytes.set_int64_ne returns unique ownership; we give up ownership of buffer in
192
+ Bytes.set_int64_le returns unique ownership; we give up ownership of buffer in
194
193
conversion to string. This is safe. *)
195
194
Ao. append_exn file (Bytes. unsafe_to_string buffer)
196
195
in
@@ -223,7 +222,7 @@ module Make (Io : Io.S) = struct
223
222
let * () = Ao. close file in
224
223
225
224
(* Reopen [file] but as an mmap *)
226
- let file = Int_mmap . open_rw path in
225
+ let file = Int64_mmap . open_rw path in
227
226
let * () =
228
227
Errs. catch (fun () ->
229
228
rev_inplace file.arr;
@@ -232,7 +231,7 @@ module Make (Io : Io.S) = struct
232
231
233
232
(* Flush and close new mapping [file] *)
234
233
let * () = Errs. catch (fun () -> Unix. fsync file.fd) in
235
- Int_mmap . close file;
234
+ Int64_mmap . close file;
236
235
237
236
let * mapping_size = Io. size_of_path path in
238
237
Option. iter (fun f -> f mapping_size) report_mapping_size;
@@ -242,27 +241,6 @@ module Make (Io : Io.S) = struct
242
241
243
242
let entry_count arr = BigArr1. dim arr / 3
244
243
let entry_idx i = i * 3
245
-
246
- let conv_int64 : int64 -> int =
247
- fun i ->
248
- (if Sys. big_endian then (
249
- (* We are currently on a BE platform but the ints are encoded as LE in the
250
- file. We've just read a LE int using a BE decoding scheme. Let's fix
251
- this.
252
-
253
- The first step is to set [buf] to contain exactly what is stored on
254
- disk. Since the current platform is BE, we've interpreted what was
255
- written on disk using a BE decoding scheme. To do the opposite operation
256
- we must use a BE encoding scheme, hence [set_int64_be].
257
-
258
- Now that [buf] mimics what was on disk, the second step consist of
259
- decoding it using a LE function, hence [get_int64_le]. *)
260
- let buf = Bytes. create 8 in
261
- Bytes. set_int64_be buf 0 i;
262
- Bytes. get_int64_le buf 0 )
263
- else i)
264
- |> Int64. to_int
265
-
266
244
let entry_off arr i = arr.{entry_idx i} |> conv_int64 |> Int63. of_int
267
245
let entry_poff arr i = arr.{entry_idx i + 1 } |> conv_int64 |> Int63. of_int
268
246
let entry_len arr i = arr.{entry_idx i + 2 } |> conv_int64
0 commit comments