Skip to content

Commit 6cda823

Browse files
committed
Catch transient errors, wait and retry
I see occasionally an interrupted system call exception from async_inotify: https://github.com/janestreet/async_inotify/blob/aaeedfaa4751d18b741ba2f82ef03a88e749e5f0/src/async_inotify.ml#L97 If this happens, I'll log the backtrace, wait 5 seconds and try again. Signed-off-by: David Scott <[email protected]>
1 parent 55b97bf commit 6cda823

File tree

1 file changed

+43
-18
lines changed

1 file changed

+43
-18
lines changed

main.ml

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -765,8 +765,7 @@ let rec diff a b = match a with
765765
| a :: aa ->
766766
if List.mem b a then diff aa b else a :: (diff aa b)
767767

768-
let watch_volume_plugins ~root_dir ~switch_path =
769-
let root_dir = Filename.concat root_dir "volume" in
768+
let watch_volume_plugins ~root_dir ~switch_path ~pipe =
770769
let create switch_path root_dir name =
771770
if Hashtbl.mem servers name
772771
then return ()
@@ -794,11 +793,8 @@ let watch_volume_plugins ~root_dir ~switch_path =
794793
Deferred.all_ignore (List.map ~f:(create switch_path root_dir) (diff needed got_already))
795794
>>= fun () ->
796795
Deferred.all_ignore (List.map ~f:(destroy switch_path) (diff got_already needed)) in
797-
Async_inotify.create ~recursive:false ~watch_new_dirs:false root_dir
798-
>>= fun (watch, _) ->
799796
sync ~root_dir ~switch_path
800797
>>= fun () ->
801-
let pipe = Async_inotify.pipe watch in
802798
let open Async_inotify.Event in
803799
let rec loop () =
804800
( Pipe.read pipe >>= function
@@ -823,8 +819,7 @@ let watch_volume_plugins ~root_dir ~switch_path =
823819
loop () in
824820
loop ()
825821

826-
let watch_datapath_plugins ~root_dir =
827-
let root_dir = Filename.concat root_dir "datapath" in
822+
let watch_datapath_plugins ~root_dir ~pipe =
828823
let sync ~root_dir =
829824
Sys.readdir root_dir
830825
>>= fun names ->
@@ -833,11 +828,8 @@ let watch_datapath_plugins ~root_dir =
833828
Deferred.all_ignore (List.map ~f:(Datapath_plugins.register root_dir) (diff needed got_already))
834829
>>= fun () ->
835830
Deferred.all_ignore (List.map ~f:(Datapath_plugins.unregister root_dir) (diff got_already needed)) in
836-
Async_inotify.create ~recursive:false ~watch_new_dirs:false root_dir
837-
>>= fun (watch, _) ->
838831
sync ~root_dir
839832
>>= fun () ->
840-
let pipe = Async_inotify.pipe watch in
841833
let open Async_inotify.Event in
842834
let rec loop () =
843835
( Pipe.read pipe >>= function
@@ -865,14 +857,32 @@ let watch_datapath_plugins ~root_dir =
865857
let main ~root_dir ~state_path ~switch_path =
866858
Attached_SRs.reload state_path
867859
>>= fun () ->
868-
Deferred.all_unit [
869-
watch_volume_plugins ~root_dir ~switch_path;
870-
watch_datapath_plugins ~root_dir
871-
]
860+
let datapath_root = Filename.concat root_dir "datapath" in
861+
Async_inotify.create ~recursive:false ~watch_new_dirs:false datapath_root
862+
>>= fun (watch, _) ->
863+
let datapath = Async_inotify.pipe watch in
864+
let volume_root = Filename.concat root_dir "volume" in
865+
Async_inotify.create ~recursive:false ~watch_new_dirs:false volume_root
866+
>>= fun (watch, _) ->
867+
let volume = Async_inotify.pipe watch in
872868

873-
let main ~root_dir ~state_path ~switch_path =
874-
let (_: unit Deferred.t) = main ~root_dir ~state_path ~switch_path in
875-
never_returns (Scheduler.go ())
869+
let rec loop () =
870+
Monitor.try_with
871+
(fun () ->
872+
Deferred.all_unit [
873+
watch_volume_plugins ~root_dir:volume_root ~switch_path ~pipe:volume;
874+
watch_datapath_plugins ~root_dir:datapath_root ~pipe:datapath
875+
]
876+
)
877+
>>= function
878+
| Ok () ->
879+
info "main thread shutdown cleanly";
880+
return ()
881+
| Error x ->
882+
error "main thread failed with %s" (Exn.to_string x);
883+
Clock.after (Time.Span.of_sec 5.) >>= fun () ->
884+
loop () in
885+
loop ()
876886

877887
open Xcp_service
878888

@@ -917,5 +927,20 @@ let _ =
917927
use_syslog := true;
918928
info "Daemonisation successful.";
919929
end;
920-
main ~root_dir:!root_dir ~state_path:!state_path ~switch_path:!Xcp_client.switch_path
930+
let (_: unit Deferred.t) =
931+
let rec loop () =
932+
Monitor.try_with
933+
(fun () ->
934+
main ~root_dir:!root_dir ~state_path:!state_path ~switch_path:!Xcp_client.switch_path
935+
)
936+
>>= function
937+
| Ok () ->
938+
info "main thread shutdown cleanly";
939+
return ()
940+
| Error x ->
941+
error "main thread failed with %s" (Exn.to_string x);
942+
Clock.after (Time.Span.of_sec 5.) >>= fun () ->
943+
loop () in
944+
loop () in
945+
never_returns (Scheduler.go ())
921946

0 commit comments

Comments
 (0)