Skip to content

Commit 91430e8

Browse files
committed
cardano-node: fix blockdelay_cdf* metrics
1 parent 2598cc6 commit 91430e8

File tree

2 files changed

+71
-93
lines changed

2 files changed

+71
-93
lines changed

cardano-node/src/Cardano/Node/Tracing/Tracers.hs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ mkConsensusTracers configReflection trBase trForward mbTrEKG _trDataPoint trConf
271271

272272
-- Special blockFetch client metrics, send directly to EKG
273273
!blockFetchClientMetricsTr <- do
274-
tr1 <- foldTraceM calculateBlockFetchClientMetrics initialClientMetrics
274+
tr1 <- foldTraceM (\cm lc -> pure . calculateBlockFetchClientMetrics cm lc) initialClientMetrics
275275
(metricsFormatter
276276
(mkMetricsTracer mbTrEKG))
277277
pure $ filterTrace (\ (_, TraceLabelPeer _ m) -> case m of

cardano-node/src/Cardano/Node/Tracing/Tracers/Consensus.hs

Lines changed: 70 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
{-# LANGUAGE TypeFamilies #-}
99
{-# LANGUAGE TypeOperators #-}
1010
{-# LANGUAGE UndecidableInstances #-}
11+
{-# LANGUAGE ViewPatterns #-}
1112

1213
{-# OPTIONS_GHC -Wno-orphans #-}
13-
{-# LANGUAGE InstanceSigs #-}
1414

1515
module Cardano.Node.Tracing.Tracers.Consensus
1616
(
@@ -518,19 +518,19 @@ instance MetaTrace (TraceChainSyncServerEvent blk) where
518518
--------------------------------------------------------------------------------
519519

520520
data CdfCounter = CdfCounter {
521-
limit :: !Int64
521+
limit :: !Double
522522
, counter :: !Int64
523523
}
524524

525-
decCdf :: a -> CdfCounter -> CdfCounter
526-
decCdf _v cdf = cdf {counter = counter cdf - 1}
527-
528-
incCdf ::Ord a => Num a => a -> CdfCounter -> CdfCounter
529-
incCdf v cdf =
530-
if v < fromIntegral (limit cdf)
531-
then cdf {counter = counter cdf + 1}
532-
else cdf
525+
decCdf :: Double -> CdfCounter -> CdfCounter
526+
decCdf v cdf@CdfCounter{..}
527+
| v < limit = cdf {counter = counter - 1}
528+
| otherwise = cdf
533529

530+
incCdf :: Double -> CdfCounter -> CdfCounter
531+
incCdf v cdf@CdfCounter{..}
532+
| v < limit = cdf {counter = counter + 1}
533+
| otherwise = cdf
534534

535535
data ClientMetrics = ClientMetrics {
536536
cmSlotMap :: IntPSQ Word64 NominalDiffTime
@@ -545,31 +545,22 @@ data ClientMetrics = ClientMetrics {
545545

546546
instance LogFormatting ClientMetrics where
547547
forMachine _dtal _ = mempty
548+
asMetrics ClientMetrics {cmTraceIt = False} = []
548549
asMetrics ClientMetrics {..} =
549-
if cmTraceIt
550-
then
551-
let size = Pq.size cmSlotMap
552-
msgs =
553-
[ DoubleM "blockfetchclient.blockdelay" cmDelay
554-
, IntM "blockfetchclient.blocksize" (fromIntegral cmBlockSize)
555-
]
556-
<> if cmTraceVars
557-
then cdfMetric "blockfetchclient.blockdelay.cdfOne" cmCdf1sVar
558-
<> cdfMetric "blockfetchclient.blockdelay.cdfThree" cmCdf3sVar
559-
<> cdfMetric "blockfetchclient.blockdelay.cdfFive" cmCdf5sVar
560-
<> lateBlockMetric cmDelay
561-
else []
562-
where
563-
cdfMetric name var =
564-
[ DoubleM name (fromIntegral (counter var) / fromIntegral size)
565-
]
566-
567-
lateBlockMetric delay =
568-
[ CounterM "blockfetchclient.lateblocks" Nothing
569-
| delay > 5
570-
]
571-
in msgs
572-
else []
550+
[ DoubleM "blockfetchclient.blockdelay" cmDelay
551+
, IntM "blockfetchclient.blocksize" (fromIntegral cmBlockSize)
552+
]
553+
++ if cmTraceVars
554+
then [ cdfMetric "blockfetchclient.blockdelay.cdfOne" cmCdf1sVar
555+
, cdfMetric "blockfetchclient.blockdelay.cdfThree" cmCdf3sVar
556+
, cdfMetric "blockfetchclient.blockdelay.cdfFive" cmCdf5sVar
557+
]
558+
++ lateBlockMetric
559+
else []
560+
where
561+
size = Pq.size cmSlotMap
562+
cdfMetric name var = DoubleM name (fromIntegral (counter var) / fromIntegral size)
563+
lateBlockMetric = [ CounterM "blockfetchclient.lateblocks" Nothing | cmDelay > 5 ]
573564

574565
instance MetaTrace ClientMetrics where
575566
namespaceFor _ = Namespace [] ["ClientMetrics"]
@@ -606,70 +597,57 @@ calculateBlockFetchClientMetrics ::
606597
ClientMetrics
607598
-> LoggingContext
608599
-> BlockFetch.TraceLabelPeer peer (BlockFetch.TraceFetchClientState header)
609-
-> IO ClientMetrics
600+
-> ClientMetrics
610601
calculateBlockFetchClientMetrics cm@ClientMetrics {..} _lc
611602
(TraceLabelPeer _ (BlockFetch.CompletedBlockFetch p _ _ _ forgeDelay blockSize)) =
612603
case pointSlot p of
613-
Origin -> pure cm {cmTraceIt = False} -- Nothing to do for Origin
604+
Origin -> nothingToDo
614605
At (SlotNo slotNo) ->
615-
if Pq.null cmSlotMap && forgeDelay > 20
616-
then pure cm {cmTraceIt = False} -- During startup wait until we are in sync
606+
if Pq.null cmSlotMap && forgeDelay > 20 -- During startup wait until we are in sync
607+
then nothingToDo
617608
else processSlot slotNo
618609
where
619-
processSlot slotNo =
620-
case Pq.lookup (fromIntegral slotNo) cmSlotMap of
621-
Just _ -> pure cm {cmTraceIt = False} -- Duplicate, only track the first
622-
Nothing -> let slotMap' = Pq.insert (fromIntegral slotNo) slotNo forgeDelay cmSlotMap
623-
in if Pq.size slotMap' > 1080
624-
then trimSlotMap slotMap' slotNo
625-
else updateMetrics slotMap' slotNo
626-
627-
trimSlotMap slotMap' slotNo =
628-
case Pq.minView slotMap' of
629-
Nothing -> pure cm {cmTraceIt = False} -- Error: Just inserted element
630-
Just (_, minSlotNo, minDelay, slotMap'') ->
631-
if minSlotNo == slotNo
632-
then pure cm { cmTraceIt = False, cmSlotMap = slotMap' }
633-
else let (cdf1sVar, cdf3sVar, cdf5sVar) = updateCDFs minDelay forgeDelay
634-
in pure cm
635-
{ cmCdf1sVar = cdf1sVar
636-
, cmCdf3sVar = cdf3sVar
637-
, cmCdf5sVar = cdf5sVar
638-
, cmDelay = realToFrac forgeDelay
639-
, cmBlockSize = getSizeInBytes blockSize
640-
, cmTraceVars = True
641-
, cmTraceIt = True
642-
, cmSlotMap = slotMap'' }
643-
644-
updateMetrics slotMap' _slotNo =
645-
let (cdf1sVar, cdf3sVar, cdf5sVar) = updateCDFs 0 forgeDelay
646-
in if Pq.size slotMap' >= 45
647-
then pure cm
648-
{ cmCdf1sVar = cdf1sVar
649-
, cmCdf3sVar = cdf3sVar
650-
, cmCdf5sVar = cdf5sVar
651-
, cmDelay = realToFrac forgeDelay
652-
, cmBlockSize = getSizeInBytes blockSize
653-
, cmTraceVars = True
654-
, cmTraceIt = True
655-
, cmSlotMap = slotMap' }
656-
else pure cm
657-
{ cmCdf1sVar = cdf1sVar
658-
, cmCdf3sVar = cdf3sVar
659-
, cmCdf5sVar = cdf5sVar
660-
, cmDelay = realToFrac forgeDelay
661-
, cmBlockSize = getSizeInBytes blockSize
662-
, cmTraceVars = False
663-
, cmTraceIt = True
664-
, cmSlotMap = slotMap' }
665-
666-
updateCDFs minDelay forgeDelay' =
667-
( incCdf forgeDelay' (decCdf minDelay cmCdf1sVar)
668-
, incCdf forgeDelay' (decCdf minDelay cmCdf3sVar)
669-
, incCdf forgeDelay' (decCdf minDelay cmCdf5sVar) )
670-
671-
672-
calculateBlockFetchClientMetrics cm _lc _ = pure cm
610+
nothingToDo = cm {cmTraceIt = False}
611+
delay = realToFrac forgeDelay
612+
613+
processSlot slotNo
614+
| fromIntegral slotNo `Pq.member` cmSlotMap = nothingToDo -- Duplicate, only track the first
615+
| otherwise =
616+
let slotMap' = Pq.insert (fromIntegral slotNo) slotNo forgeDelay cmSlotMap
617+
in if Pq.size slotMap' > 1080 -- TODO: k/2, should come from config file
618+
then trimSlotMap slotMap' slotNo
619+
else updateMetrics slotMap'
620+
621+
trimSlotMap slotMap' slotNo = case Pq.minView slotMap' of
622+
Nothing -> nothingToDo -- Error: Just inserted element
623+
Just (_, minSlotNo, realToFrac -> minDelay, slotMap'')
624+
| minSlotNo == slotNo -> nothingToDo
625+
| otherwise -> cm
626+
{ cmCdf1sVar = adjust minDelay cmCdf1sVar
627+
, cmCdf3sVar = adjust minDelay cmCdf3sVar
628+
, cmCdf5sVar = adjust minDelay cmCdf5sVar
629+
, cmDelay = delay
630+
, cmBlockSize = getSizeInBytes blockSize
631+
, cmTraceVars = True
632+
, cmTraceIt = True
633+
, cmSlotMap = slotMap''
634+
}
635+
636+
updateMetrics slotMap' = cm
637+
{ cmCdf1sVar = update cmCdf1sVar
638+
, cmCdf3sVar = update cmCdf3sVar
639+
, cmCdf5sVar = update cmCdf5sVar
640+
, cmDelay = delay
641+
, cmBlockSize = getSizeInBytes blockSize
642+
, cmTraceVars = Pq.size slotMap' >= 45 -- wait until we have at least 45 samples before providing cdf estimates
643+
, cmTraceIt = True
644+
, cmSlotMap = slotMap'
645+
}
646+
647+
update = incCdf delay
648+
adjust d = update . decCdf d
649+
650+
calculateBlockFetchClientMetrics cm _lc _ = cm
673651

674652

675653
--------------------------------------------------------------------------------

0 commit comments

Comments
 (0)