forked from haskell-distributed/distributed-process-platform
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSupervisor.hs
1892 lines (1694 loc) · 72.2 KB
/
Supervisor.hs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverlappingInstances #-}
-----------------------------------------------------------------------------
-- |
-- Module : Control.Distributed.Process.Platform.Supervisor
-- Copyright : (c) Tim Watson 2012 - 2013
-- License : BSD3 (see the file LICENSE)
--
-- Maintainer : Tim Watson <[email protected]>
-- Stability : experimental
-- Portability : non-portable (requires concurrency)
--
-- This module implements a process which supervises a set of other
-- processes, referred to as its children. These /child processes/ can be
-- either workers (i.e., processes that do something useful in your application)
-- or other supervisors. In this way, supervisors may be used to build a
-- hierarchical process structure called a supervision tree, which provides
-- a convenient structure for building fault tolerant software.
--
-- Unless otherwise stated, all functions in this module will cause the calling
-- process to exit unless the specified supervisor process exists.
--
-- [Supervision Principles]
--
-- A supervisor is responsible for starting, stopping and monitoring its child
-- processes so as to keep them alive by restarting them when necessary.
--
-- The supervisors children are defined as a list of child specifications
-- (see 'ChildSpec'). When a supervisor is started, its children are started
-- in left-to-right (insertion order) according to this list. When a supervisor
-- stops (or exits for any reason), it will terminate its children in reverse
-- (i.e., from right-to-left of insertion) order. Child specs can be added to
-- the supervisor after it has started, either on the left or right of the
-- existing list of children.
--
-- When the supervisor spawns its child processes, they are always linked to
-- their parent (i.e., the supervisor), therefore even if the supervisor is
-- terminated abruptly by an asynchronous exception, the children will still be
-- taken down with it, though somewhat less ceremoniously in that case.
--
-- [Restart Strategies]
--
-- Supervisors are initialised with a 'RestartStrategy', which describes how
-- the supervisor should respond to a child that exits and should be restarted
-- (see below for the rules governing child restart eligibility). Each restart
-- strategy comprises a 'RestartMode' and 'RestartLimit', which govern how
-- the restart should be handled, and the point at which the supervisor
-- should give up and terminate itself respectively.
--
-- With the exception of the @RestartOne@ strategy, which indicates that the
-- supervisor will restart /only/ the one individual failing child, each
-- strategy describes a way to select the set of children that should be
-- restarted if /any/ child fails. The @RestartAll@ strategy, as its name
-- suggests, selects /all/ children, whilst the @RestartLeft@ and @RestartRight@
-- strategies select /all/ children to the left or right of the failed child,
-- in insertion (i.e., startup) order.
--
-- Note that a /branch/ restart will only occur if the child that exited is
-- meant to be restarted. Since @Temporary@ children are never restarted and
-- @Transient@ children are /not/ restarted if they exit normally, in both these
-- circumstances we leave the remaining supervised children alone. Otherwise,
-- the failing child is /always/ included in the /branch/ to be restarted.
--
-- For a hypothetical set of children @a@ through @d@, the following pseudocode
-- demonstrates how the restart strategies work.
--
-- > let children = [a..d]
-- > let failure = c
-- > restartsFor RestartOne children failure = [c]
-- > restartsFor RestartAll children failure = [a,b,c,d]
-- > restartsFor RestartLeft children failure = [a,b,c]
-- > restartsFor RestartRight children failure = [c,d]
--
-- [Branch Restarts]
--
-- We refer to a restart (strategy) that involves a set of children as a
-- /branch restart/ from now on. The behaviour of branch restarts can be further
-- refined by the 'RestartMode' with which a 'RestartStrategy' is parameterised.
-- The @RestartEach@ mode treats each child sequentially, first stopping the
-- respective child process and then restarting it. Each child is stopped and
-- started fully before moving on to the next, as the following imaginary
-- example demonstrates for children @[a,b,c]@:
--
-- > stop a
-- > start a
-- > stop b
-- > start b
-- > stop c
-- > start c
--
-- By contrast, @RestartInOrder@ will first run through the selected list of
-- children, stopping them. Then, once all the children have been stopped, it
-- will make a second pass, to handle (re)starting them. No child is started
-- until all children have been stopped, as the following imaginary example
-- demonstrates:
--
-- > stop a
-- > stop b
-- > stop c
-- > start a
-- > start b
-- > start c
--
-- Both the previous examples have shown children being stopped and started
-- from left to right, but that is up to the user. The 'RestartMode' data
-- type's constructors take a 'RestartOrder', which determines whether the
-- selected children will be processed from @LeftToRight@ or @RightToLeft@.
--
-- Sometimes it is desireable to stop children in one order and start them
-- in the opposite. This is typically the case when children are in some
-- way dependent on one another, such that restarting them in the wrong order
-- might cause the system to misbehave. For this scenarios, there is another
-- 'RestartMode' that will shut children down in the given order, but then
-- restarts them in the reverse. Using @RestartRevOrder@ mode, if we have
-- children @[a,b,c]@ such that @b@ depends on @a@ and @c@ on @b@, we can stop
-- them in the reverse of their startup order, but restart them the other way
-- around like so:
--
-- > RestartRevOrder RightToLeft
--
-- The effect will be thus:
--
-- > stop c
-- > stop b
-- > stop a
-- > start a
-- > start b
-- > start c
--
-- [Restart Intensity Limits]
--
-- If a child process repeatedly crashes during (or shortly after) starting,
-- it is possible for the supervisor to get stuck in an endless loop of
-- restarts. In order prevent this, each restart strategy is parameterised
-- with a 'RestartLimit' that caps the number of restarts allowed within a
-- specific time period. If the supervisor exceeds this limit, it will stop,
-- terminating all its children (in left-to-right order) and exit with the
-- reason @ExitOther "ReachedMaxRestartIntensity"@.
--
-- The 'MaxRestarts' type is a positive integer, and together with a specified
-- @TimeInterval@ forms the 'RestartLimit' to which the supervisor will adhere.
-- Since a great many children can be restarted in close succession when
-- a /branch restart/ occurs (as a result of @RestartAll@, @RestartLeft@ or
-- @RestartRight@ being triggered), the supervisor will track the operation
-- as a single restart attempt, since otherwise it would likely exceed its
-- maximum restart intensity too quickly.
--
-- [Child Restart and Termination Policies]
--
-- When the supervisor detects that a child has died, the 'RestartPolicy'
-- configured in the child specification is used to determin what to do. If
-- the this is set to @Permanent@, then the child is always restarted.
-- If it is @Temporary@, then the child is never restarted and the child
-- specification is removed from the supervisor. A @Transient@ child will
-- be restarted only if it terminates /abnormally/, otherwise it is left
-- inactive (but its specification is left in place). Finally, an @Intrinsic@
-- child is treated like a @Transient@ one, except that if /this/ kind of child
-- exits /normally/, then the supervisor will also exit normally.
--
-- When the supervisor does terminate a child, the 'ChildTerminationPolicy'
-- provided with the 'ChildSpec' determines how the supervisor should go
-- about doing so. If this is @TerminateImmediately@, then the child will
-- be killed without further notice, which means the child will /not/ have
-- an opportunity to clean up any internal state and/or release any held
-- resources. If the policy is @TerminateTimeout delay@ however, the child
-- will be sent an /exit signal/ instead, i.e., the supervisor will cause
-- the child to exit via @exit childPid ExitShutdown@, and then will wait
-- until the given @delay@ for the child to exit normally. If this does not
-- happen within the given delay, the supervisor will revert to the more
-- aggressive @TerminateImmediately@ policy and try again. Any errors that
-- occur during a timed-out shutdown will be logged, however exit reasons
-- resulting from @TerminateImmediately@ are ignored.
--
-- [Creating Child Specs]
--
-- The 'ToChildStart' typeclass simplifies the process of defining a 'ChildStart'
-- providing three default instances from which a 'ChildStart' datum can be
-- generated. The first, takes a @Closure (Process ())@, where the enclosed
-- action (in the @Process@ monad) is the actual (long running) code that we
-- wish to supervise. In the case of a /managed process/, this is usually the
-- server loop, constructed by evaluating some variant of @ManagedProcess.serve@.
--
-- The other two instances provide a means for starting children without having
-- to provide a @Closure@. Both instances wrap the supplied @Process@ action in
-- some necessary boilerplate code, which handles spawning a new process and
-- communicating its @ProcessId@ to the supervisor. The instance for
-- @Addressable a => SupervisorPid -> Process a@ is special however, since this
-- API is intended for uses where the typical interactions with a process take
-- place via an opaque handle, for which an instance of the @Addressable@
-- typeclass is provided. This latter approach requires the expression which is
-- responsible for yielding the @Addressable@ handle to handling linking the
-- target process with the supervisor, since we have delegated responsibility
-- for spawning the new process and cannot perform the link oepration ourselves.
--
-- [Supervision Trees & Supervisor Termination]
--
-- To create a supervision tree, one simply adds supervisors below one another
-- as children, setting the @childType@ field of their 'ChildSpec' to
-- @Supervisor@ instead of @Worker@. Supervision tree can be arbitrarilly
-- deep, and it is for this reason that we recommend giving a @Supervisor@ child
-- an arbitrary length of time to stop, by setting the delay to @Infinity@
-- or a very large @TimeInterval@.
--
-----------------------------------------------------------------------------
module Control.Distributed.Process.Platform.Supervisor
( -- * Defining and Running a Supervisor
ChildSpec(..)
, ChildKey
, ChildType(..)
, ChildTerminationPolicy(..)
, ChildStart(..)
, RegisteredName(LocalName, CustomRegister)
, RestartPolicy(..)
-- , ChildRestart(..)
, ChildRef(..)
, isRunning
, isRestarting
, Child
, StaticLabel
, SupervisorPid
, ToChildStart(..)
, start
, run
-- * Limits and Defaults
, MaxRestarts
, maxRestarts
, RestartLimit(..)
, limit
, defaultLimits
, RestartMode(..)
, RestartOrder(..)
, RestartStrategy(..)
, ShutdownMode(..)
, restartOne
, restartAll
, restartLeft
, restartRight
-- * Adding and Removing Children
, addChild
, AddChildResult(..)
, StartChildResult(..)
, startChild
, startNewChild
, terminateChild
, TerminateChildResult(..)
, deleteChild
, DeleteChildResult(..)
, restartChild
, RestartChildResult(..)
-- * Normative Shutdown
, shutdown
, shutdownAndWait
-- * Queries and Statistics
, lookupChild
, listChildren
, SupervisorStats(..)
, statistics
-- * Additional (Misc) Types
, StartFailure(..)
, ChildInitFailure(..)
) where
import Control.DeepSeq (NFData)
import Control.Distributed.Process hiding (call)
import Control.Distributed.Process.Serializable()
import Control.Distributed.Process.Platform.Internal.Primitives hiding (monitor)
import Control.Distributed.Process.Platform.Internal.Types
( ExitReason(..)
)
import Control.Distributed.Process.Platform.ManagedProcess
( handleCall
, handleInfo
, reply
, continue
, stop
, stopWith
, input
, defaultProcess
, prioritised
, InitHandler
, InitResult(..)
, ProcessAction
, ProcessReply
, ProcessDefinition(..)
, PrioritisedProcessDefinition(..)
, Priority(..)
, DispatchPriority
, UnhandledMessagePolicy(Drop)
)
import qualified Control.Distributed.Process.Platform.ManagedProcess.UnsafeClient as Unsafe
( call
, cast
)
import qualified Control.Distributed.Process.Platform.ManagedProcess as MP
( pserve
)
import Control.Distributed.Process.Platform.ManagedProcess.Server.Priority
( prioritiseCast_
, prioritiseCall_
, prioritiseInfo_
, setPriority
)
import Control.Distributed.Process.Platform.ManagedProcess.Server.Restricted
( RestrictedProcess
, Result
, RestrictedAction
, getState
, putState
)
import qualified Control.Distributed.Process.Platform.ManagedProcess.Server.Restricted as Restricted
( handleCallIf
, handleCall
, handleCast
, reply
, continue
)
-- import Control.Distributed.Process.Platform.ManagedProcess.Server.Unsafe
-- import Control.Distributed.Process.Platform.ManagedProcess.Server
import Control.Distributed.Process.Platform.Service.SystemLog
( LogClient
, LogChan
, LogText
, Logger(..)
)
import qualified Control.Distributed.Process.Platform.Service.SystemLog as Log
import Control.Distributed.Process.Platform.Time
import Control.Exception (SomeException, Exception, throwIO)
import Control.Monad.Error
import Data.Accessor
( Accessor
, accessor
, (^:)
, (.>)
, (^=)
, (^.)
)
import Data.Binary
import Data.Foldable (find, foldlM, toList)
import Data.List (foldl')
import qualified Data.List as List (delete)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as Map
import Data.Sequence
( Seq
, ViewL(EmptyL, (:<))
, ViewR(EmptyR, (:>))
, (<|)
, (|>)
, (><)
, filter)
import qualified Data.Sequence as Seq
import Data.Time.Clock
( NominalDiffTime
, UTCTime
, getCurrentTime
, diffUTCTime
)
import Data.Typeable (Typeable)
#if ! MIN_VERSION_base(4,6,0)
import Prelude hiding (catch, filter, init, rem)
#else
import Prelude hiding (filter, init, rem)
#endif
import GHC.Generics
--------------------------------------------------------------------------------
-- Types --
--------------------------------------------------------------------------------
-- external client/configuration API
newtype MaxRestarts = MaxR { maxNumberOfRestarts :: Int }
deriving (Typeable, Generic, Show)
instance Binary MaxRestarts where
instance NFData MaxRestarts where
-- | Smart constructor for @MaxRestarts@. The maximum
-- restart count must be a positive integer.
maxRestarts :: Int -> MaxRestarts
maxRestarts r | r >= 0 = MaxR r
| otherwise = error "MaxR must be >= 0"
-- | A compulsary limit on the number of restarts that a supervisor will
-- tolerate before it terminates all child processes and then itself.
-- If > @MaxRestarts@ occur within the specified @TimeInterval@, termination
-- will occur. This prevents the supervisor from entering an infinite loop of
-- child process terminations and restarts.
--
data RestartLimit =
RestartLimit
{ maxR :: !MaxRestarts
, maxT :: !TimeInterval
}
deriving (Typeable, Generic, Show)
instance Binary RestartLimit where
instance NFData RestartLimit where
limit :: MaxRestarts -> TimeInterval -> RestartLimit
limit mr ti = RestartLimit mr ti
defaultLimits :: RestartLimit
defaultLimits = limit (MaxR 1) (seconds 1)
data RestartOrder = LeftToRight | RightToLeft
deriving (Typeable, Generic, Eq, Show)
instance Binary RestartOrder where
instance NFData RestartOrder where
-- TODO: rename these, somehow...
data RestartMode =
RestartEach { order :: !RestartOrder }
{- ^ stop then start each child sequentially, i.e., @foldlM stopThenStart children@ -}
| RestartInOrder { order :: !RestartOrder }
{- ^ stop all children first, then restart them sequentially -}
| RestartRevOrder { order :: !RestartOrder }
{- ^ stop all children in the given order, but start them in reverse -}
deriving (Typeable, Generic, Show, Eq)
instance Binary RestartMode where
instance NFData RestartMode where
data ShutdownMode = SequentialShutdown !RestartOrder
| ParallelShutdown
deriving (Typeable, Generic, Show, Eq)
instance Binary ShutdownMode where
instance NFData ShutdownMode where
-- | Strategy used by a supervisor to handle child restarts, whether due to
-- unexpected child failure or explicit restart requests from a client.
--
-- Some terminology: We refer to child processes managed by the same supervisor
-- as /siblings/. When restarting a child process, the 'RestartNone' policy
-- indicates that sibling processes should be left alone, whilst the 'RestartAll'
-- policy will cause /all/ children to be restarted (in the same order they were
-- started).
--
-- The other two restart strategies refer to /prior/ and /subsequent/
-- siblings, which describe's those children's configured position
-- (i.e., insertion order). These latter modes allow one to control the order
-- in which siblings are restarted, and to exclude some siblings from the restart
-- without having to resort to grouping them using a child supervisor.
--
data RestartStrategy =
RestartOne
{ intensity :: !RestartLimit
} -- ^ restart only the failed child process
| RestartAll
{ intensity :: !RestartLimit
, mode :: !RestartMode
} -- ^ also restart all siblings
| RestartLeft
{ intensity :: !RestartLimit
, mode :: !RestartMode
} -- ^ restart prior siblings (i.e., prior /start order/)
| RestartRight
{ intensity :: !RestartLimit
, mode :: !RestartMode
} -- ^ restart subsequent siblings (i.e., subsequent /start order/)
deriving (Typeable, Generic, Show)
instance Binary RestartStrategy where
instance NFData RestartStrategy where
-- | Provides a default 'RestartStrategy' for @RestartOne@.
-- > restartOne = RestartOne defaultLimits
--
restartOne :: RestartStrategy
restartOne = RestartOne defaultLimits
-- | Provides a default 'RestartStrategy' for @RestartAll@.
-- > restartOne = RestartAll defaultLimits (RestartEach LeftToRight)
--
restartAll :: RestartStrategy
restartAll = RestartAll defaultLimits (RestartEach LeftToRight)
-- | Provides a default 'RestartStrategy' for @RestartLeft@.
-- > restartOne = RestartLeft defaultLimits (RestartEach LeftToRight)
--
restartLeft :: RestartStrategy
restartLeft = RestartLeft defaultLimits (RestartEach LeftToRight)
-- | Provides a default 'RestartStrategy' for @RestartRight@.
-- > restartOne = RestartRight defaultLimits (RestartEach LeftToRight)
--
restartRight :: RestartStrategy
restartRight = RestartRight defaultLimits (RestartEach LeftToRight)
-- | Identifies a child process by name.
type ChildKey = String
-- | A reference to a (possibly running) child.
data ChildRef =
ChildRunning !ProcessId -- ^ a reference to the (currently running) child
| ChildRunningExtra !ProcessId !Message -- ^ also a currently running child, with /extra/ child info
| ChildRestarting !ProcessId -- ^ a reference to the /old/ (previous) child (now restarting)
| ChildStopped -- ^ indicates the child is not currently running
| ChildStartIgnored -- ^ a non-temporary child exited with 'ChildInitIgnore'
deriving (Typeable, Generic, Show)
instance Binary ChildRef where
instance NFData ChildRef where
instance Eq ChildRef where
ChildRunning p1 == ChildRunning p2 = p1 == p2
ChildRunningExtra p1 _ == ChildRunningExtra p2 _ = p1 == p2
ChildRestarting p1 == ChildRestarting p2 = p1 == p2
ChildStopped == ChildStopped = True
ChildStartIgnored == ChildStartIgnored = True
_ == _ = False
isRunning :: ChildRef -> Bool
isRunning (ChildRunning _) = True
isRunning (ChildRunningExtra _ _) = True
isRunning _ = False
isRestarting :: ChildRef -> Bool
isRestarting (ChildRestarting _) = True
isRestarting _ = False
instance Resolvable ChildRef where
resolve (ChildRunning pid) = return $ Just pid
resolve (ChildRunningExtra pid _) = return $ Just pid
resolve _ = return Nothing
-- these look a bit odd, but we basically want to avoid resolving
-- or sending to (ChildRestarting oldPid)
instance Routable ChildRef where
sendTo (ChildRunning addr) = sendTo addr
sendTo _ = error "invalid address for child process"
unsafeSendTo (ChildRunning ch) = unsafeSendTo ch
unsafeSendTo _ = error "invalid address for child process"
-- | Specifies whether the child is another supervisor, or a worker.
data ChildType = Worker | Supervisor
deriving (Typeable, Generic, Show, Eq)
instance Binary ChildType where
instance NFData ChildType where
-- | Describes when a terminated child process should be restarted.
data RestartPolicy =
Permanent -- ^ a permanent child will always be restarted
| Temporary -- ^ a temporary child will /never/ be restarted
| Transient -- ^ A transient child will be restarted only if it terminates abnormally
| Intrinsic -- ^ as 'Transient', but if the child exits normally, the supervisor also exits normally
deriving (Typeable, Generic, Eq, Show)
instance Binary RestartPolicy where
instance NFData RestartPolicy where
{-
data ChildRestart =
Restart RestartPolicy -- ^ restart according to the given policy
| DelayedRestart RestartPolicy TimeInterval -- ^ perform a /delayed restart/
deriving (Typeable, Generic, Eq, Show)
instance Binary ChildRestart where
-}
data ChildTerminationPolicy =
TerminateTimeout !Delay
| TerminateImmediately
deriving (Typeable, Generic, Eq, Show)
instance Binary ChildTerminationPolicy where
instance NFData ChildTerminationPolicy where
data RegisteredName =
LocalName !String
| GlobalName !String
| CustomRegister !(Closure (ProcessId -> Process ()))
deriving (Typeable, Generic)
instance Binary RegisteredName where
instance NFData RegisteredName where
instance Show RegisteredName where
show (CustomRegister _) = "Custom Register"
show (LocalName n) = n
show (GlobalName n) = "global::" ++ n
data ChildStart =
RunClosure !(Closure (Process ()))
| CreateHandle !(Closure (SupervisorPid -> Process (ProcessId, Message)))
| StarterProcess !ProcessId
deriving (Typeable, Generic, Show)
instance Binary ChildStart where
instance NFData ChildStart where
-- | Specification for a child process. The child must be uniquely identified
-- by it's @childKey@ within the supervisor. The supervisor will start the child
-- itself, therefore @childRun@ should contain the child process' implementation
-- e.g., if the child is a long running server, this would be the server /loop/,
-- as with e.g., @ManagedProces.start@.
data ChildSpec = ChildSpec {
childKey :: !ChildKey
, childType :: !ChildType
, childRestart :: !RestartPolicy
, childStop :: !ChildTerminationPolicy
, childStart :: !ChildStart
, childRegName :: !(Maybe RegisteredName)
} deriving (Typeable, Generic, Show)
instance Binary ChildSpec where
instance NFData ChildSpec where
data ChildInitFailure =
ChildInitFailure !String
| ChildInitIgnore
deriving (Typeable, Generic, Show)
instance Exception ChildInitFailure where
data SupervisorStats = SupervisorStats {
_children :: Int
, _supervisors :: Int
, _workers :: Int
, _running :: Int
, _activeSupervisors :: Int
, _activeWorkers :: Int
-- TODO: usage/restart/freq stats
, totalRestarts :: Int
} deriving (Typeable, Generic, Show)
instance Binary SupervisorStats where
instance NFData SupervisorStats where
-- | Static labels (in the remote table) are strings.
type StaticLabel = String
-- | Provides failure information when (re-)start failure is indicated.
data StartFailure =
StartFailureDuplicateChild !ChildRef -- ^ a child with this 'ChildKey' already exists
| StartFailureAlreadyRunning !ChildRef -- ^ the child is already up and running
| StartFailureBadClosure !StaticLabel -- ^ a closure cannot be resolved
| StartFailureDied !DiedReason -- ^ a child died (almost) immediately on starting
deriving (Typeable, Generic, Show, Eq)
instance Binary StartFailure where
instance NFData StartFailure where
-- | The result of a call to 'removeChild'.
data DeleteChildResult =
ChildDeleted -- ^ the child specification was successfully removed
| ChildNotFound -- ^ the child specification was not found
| ChildNotStopped !ChildRef -- ^ the child was not removed, as it was not stopped.
deriving (Typeable, Generic, Show, Eq)
instance Binary DeleteChildResult where
instance NFData DeleteChildResult where
type Child = (ChildRef, ChildSpec)
type SupervisorPid = ProcessId
-- | A type that can be converted to a 'ChildStart'.
class ToChildStart a where
toChildStart :: a -> Process ChildStart
instance ToChildStart (Closure (Process ())) where
toChildStart = return . RunClosure
instance ToChildStart (Closure (SupervisorPid -> Process (ProcessId, Message))) where
toChildStart = return . CreateHandle
-- StarterProcess variants of ChildStart
expectTriple :: Process (ProcessId, ChildKey, SendPort ProcessId)
expectTriple = expect
instance ToChildStart (Process ()) where
toChildStart proc = do
starterPid <- spawnLocal $ do
-- note [linking]: the first time we see the supervisor's pid,
-- we must link to it, but only once, otherwise we simply waste
-- time and resources creating duplicate links
(supervisor, _, sendPidPort) <- expectTriple
link supervisor
spawnIt proc supervisor sendPidPort
tcsProcLoop proc
return (StarterProcess starterPid)
where
tcsProcLoop :: Process () -> Process ()
tcsProcLoop p = forever' $ do
(supervisor, _, sendPidPort) <- expectTriple
spawnIt p supervisor sendPidPort
spawnIt :: Process ()
-> ProcessId
-> SendPort ProcessId
-> Process ()
spawnIt proc' supervisor sendPidPort = do
supervisedPid <- spawnLocal $ do
link supervisor
self <- getSelfPid
(proc' `catches` [ Handler $ filterInitFailures supervisor self
, Handler $ logFailure supervisor self ])
`catchesExit` [\_ m -> handleMessageIf m (== ExitShutdown)
(\_ -> return ())]
sendChan sendPidPort supervisedPid
instance (Resolvable a) => ToChildStart (SupervisorPid -> Process a) where
toChildStart proc = do
starterPid <- spawnLocal $ do
-- see note [linking] in the previous instance (above)
(supervisor, _, sendPidPort) <- expectTriple
link supervisor
injectIt proc supervisor sendPidPort >> injectorLoop proc
return $ StarterProcess starterPid
where
injectorLoop :: Resolvable a
=> (SupervisorPid -> Process a)
-> Process ()
injectorLoop p = forever' $ do
(supervisor, _, sendPidPort) <- expectTriple
injectIt p supervisor sendPidPort
injectIt :: Resolvable a
=> (SupervisorPid -> Process a)
-> ProcessId
-> SendPort ProcessId
-> Process ()
injectIt proc' supervisor sendPidPort = do
addr <- proc' supervisor
mPid <- resolve addr
case mPid of
Nothing -> die "UnresolvableAddress"
Just p -> sendChan sendPidPort p
-- internal APIs
data DeleteChild = DeleteChild !ChildKey
deriving (Typeable, Generic)
instance Binary DeleteChild where
instance NFData DeleteChild where
data FindReq = FindReq ChildKey
deriving (Typeable, Generic)
instance Binary FindReq where
instance NFData FindReq where
data StatsReq = StatsReq
deriving (Typeable, Generic)
instance Binary StatsReq where
instance NFData StatsReq where
data ListReq = ListReq
deriving (Typeable, Generic)
instance Binary ListReq where
instance NFData ListReq where
type ImmediateStart = Bool
data AddChildReq = AddChild !ImmediateStart !ChildSpec
deriving (Typeable, Generic, Show)
instance Binary AddChildReq where
instance NFData AddChildReq where
data AddChildRes = Exists ChildRef | Added State
data AddChildResult =
ChildAdded !ChildRef
| ChildFailedToStart !StartFailure
deriving (Typeable, Generic, Show, Eq)
instance Binary AddChildResult where
instance NFData AddChildResult where
data StartChildReq = StartChild !ChildKey
deriving (Typeable, Generic)
instance Binary StartChildReq where
instance NFData StartChildReq where
data StartChildResult =
ChildStartOk !ChildRef
| ChildStartFailed !StartFailure
| ChildStartUnknownId
| ChildStartInitIgnored
deriving (Typeable, Generic, Show, Eq)
instance Binary StartChildResult where
instance NFData StartChildResult where
data RestartChildReq = RestartChildReq !ChildKey
deriving (Typeable, Generic, Show, Eq)
instance Binary RestartChildReq where
instance NFData RestartChildReq where
{-
data DelayedRestartReq = DelayedRestartReq !ChildKey !DiedReason
deriving (Typeable, Generic, Show, Eq)
instance Binary DelayedRestartReq where
-}
data RestartChildResult =
ChildRestartOk !ChildRef
| ChildRestartFailed !StartFailure
| ChildRestartUnknownId
| ChildRestartIgnored
deriving (Typeable, Generic, Show, Eq)
instance Binary RestartChildResult where
instance NFData RestartChildResult where
data TerminateChildReq = TerminateChildReq !ChildKey
deriving (Typeable, Generic, Show, Eq)
instance Binary TerminateChildReq where
instance NFData TerminateChildReq where
data TerminateChildResult =
TerminateChildOk
| TerminateChildUnknownId
deriving (Typeable, Generic, Show, Eq)
instance Binary TerminateChildResult where
instance NFData TerminateChildResult where
data IgnoreChildReq = IgnoreChildReq !ProcessId
deriving (Typeable, Generic)
instance Binary IgnoreChildReq where
instance NFData IgnoreChildReq where
type ChildSpecs = Seq Child
type Prefix = ChildSpecs
type Suffix = ChildSpecs
data StatsType = Active | Specified
data LogSink = LogProcess !LogClient | LogChan
instance Logger LogSink where
logMessage LogChan = logMessage Log.logChannel
logMessage (LogProcess client') = logMessage client'
data State = State {
_specs :: ChildSpecs
, _active :: Map ProcessId ChildKey
, _strategy :: RestartStrategy
, _restartPeriod :: NominalDiffTime
, _restarts :: [UTCTime]
, _stats :: SupervisorStats
, _logger :: LogSink
, shutdownStrategy :: ShutdownMode
}
--------------------------------------------------------------------------------
-- Starting/Running Supervisor --
--------------------------------------------------------------------------------
-- | Start a supervisor (process), running the supplied children and restart
-- strategy.
--
-- > start = spawnLocal . run
--
start :: RestartStrategy -> ShutdownMode -> [ChildSpec] -> Process ProcessId
start rs ss cs = spawnLocal $ run rs ss cs
-- | Run the supplied children using the provided restart strategy.
--
run :: RestartStrategy -> ShutdownMode -> [ChildSpec] -> Process ()
run rs ss specs' = MP.pserve (rs, ss, specs') supInit serverDefinition
--------------------------------------------------------------------------------
-- Client Facing API --
--------------------------------------------------------------------------------
-- | Obtain statistics about a running supervisor.
--
statistics :: Addressable a => a -> Process (SupervisorStats)
statistics = (flip Unsafe.call) StatsReq
-- | Lookup a possibly supervised child, given its 'ChildKey'.
--
lookupChild :: Addressable a => a -> ChildKey -> Process (Maybe (ChildRef, ChildSpec))
lookupChild addr key = Unsafe.call addr $ FindReq key
-- | List all know (i.e., configured) children.
--
listChildren :: Addressable a => a -> Process [Child]
listChildren addr = Unsafe.call addr ListReq
-- | Add a new child.
--
addChild :: Addressable a => a -> ChildSpec -> Process AddChildResult
addChild addr spec = Unsafe.call addr $ AddChild False spec
-- | Start an existing (configured) child. The 'ChildSpec' must already be
-- present (see 'addChild'), otherwise the operation will fail.
--
startChild :: Addressable a => a -> ChildKey -> Process StartChildResult
startChild addr key = Unsafe.call addr $ StartChild key
-- | Atomically add and start a new child spec. Will fail if a child with
-- the given key is already present.
--
startNewChild :: Addressable a
=> a
-> ChildSpec
-> Process AddChildResult
startNewChild addr spec = Unsafe.call addr $ AddChild True spec
-- | Delete a supervised child. The child must already be stopped (see
-- 'terminateChild').
--
deleteChild :: Addressable a => a -> ChildKey -> Process DeleteChildResult
deleteChild addr spec = Unsafe.call addr $ DeleteChild spec
-- | Terminate a running child.
--
terminateChild :: Addressable a
=> a
-> ChildKey
-> Process TerminateChildResult
terminateChild sid = Unsafe.call sid . TerminateChildReq
-- | Forcibly restart a running child.
--
restartChild :: Addressable a
=> a
-> ChildKey
-> Process RestartChildResult
restartChild sid = Unsafe.call sid . RestartChildReq
-- | Gracefully terminate a running supervisor. Returns immediately if the
-- /address/ cannot be resolved.
--
shutdown :: Resolvable a => a -> Process ()
shutdown sid = do
mPid <- resolve sid
case mPid of
Nothing -> return ()
Just p -> exit p ExitShutdown
-- | As 'shutdown', but waits until the supervisor process has exited, at which
-- point the caller can be sure that all children have also stopped. Returns
-- immediately if the /address/ cannot be resolved.
--
shutdownAndWait :: Resolvable a => a -> Process ()
shutdownAndWait sid = do
mPid <- resolve sid
case mPid of
Nothing -> return ()
Just p -> withMonitor p $ do
shutdown p
receiveWait [ matchIf (\(ProcessMonitorNotification _ p' _) -> p' == p)
(\_ -> return ())
]
--------------------------------------------------------------------------------
-- Server Initialisation/Startup --
--------------------------------------------------------------------------------
supInit :: InitHandler (RestartStrategy, ShutdownMode, [ChildSpec]) State
supInit (strategy', shutdown', specs') = do
logClient <- Log.client
let client' = case logClient of
Nothing -> LogChan
Just c -> LogProcess c
let initState = ( ( -- as a NominalDiffTime (in seconds)
restartPeriod ^= configuredRestartPeriod
)
. (strategy ^= strategy')
. (logger ^= client')
$ emptyState shutdown'
)
-- TODO: should we return Ignore, as per OTP's supervisor, if no child starts?
(foldlM initChild initState specs' >>= return . (flip InitOk) Infinity)
`catch` \(e :: SomeException) -> return $ InitStop (show e)
where
initChild :: State -> ChildSpec -> Process State
initChild st ch =
case (findChild (childKey ch) st) of
Just (ref, _) -> die $ StartFailureDuplicateChild ref
Nothing -> tryStartChild ch >>= initialised st ch
configuredRestartPeriod =
let maxT' = maxT (intensity strategy')
tI = asTimeout maxT'
tMs = (fromIntegral tI * (0.000001 :: Float))
in fromRational (toRational tMs) :: NominalDiffTime
initialised :: State
-> ChildSpec
-> Either StartFailure ChildRef
-> Process State
initialised _ _ (Left err) = liftIO $ throwIO $ ChildInitFailure (show err)
initialised state spec (Right ref) = do
mPid <- resolve ref
case mPid of
Nothing -> die $ (childKey spec) ++ ": InvalidChildRef"
Just pid -> do
return $ ( (active ^: Map.insert pid chId)
. (specs ^: (|> (ref, spec)))
$ bumpStats Active chType (+1) state
)
where chId = childKey spec
chType = childType spec
--------------------------------------------------------------------------------
-- Server Definition/State --
--------------------------------------------------------------------------------