@@ -437,53 +437,39 @@ def get_emf_unit_for_fc_metrics(full_key):
437
437
438
438
def flush_fc_metrics_to_cw (fc_metrics , metrics ):
439
439
"""
440
- Flush Firecracker metrics to CloudWatch
441
- Use an existing metrics logger with existing dimensions so
442
- that its easier to corelate the metrics with the test calling it.
443
- Add a prefix "fc_metrics." to differentiate these metrics, this
444
- also helps to avoid using this metrics in AB tests.
440
+ Flush Firecracker metrics to CloudWatch. Use an existing metrics logger with existing dimensions so that it is
441
+ easier to correlate the metrics with the test calling it. Add a prefix "fc_metrics." to differentiate these metrics,
442
+ this also helps to avoid using this metrics in A/B tests.
445
443
NOTE:
446
- There are metrics with keywords "fail", "err",
447
- "num_faults", "panic" in their name and represent
448
- some kind of failure in Firecracker.
449
- This function `does not` assert on these failure metrics
450
- since some tests might not want to assert on them while
451
- some tests might want to assert on some but not others.
444
+ There are metrics with keywords "fail", "err", "num_faults", "panic" in their name and represent some kind of
445
+ failure in Firecracker. We assert that all these are zero, to catch potentially silent failure modes. This
446
+ means the FcMonitor cannot be used in negative tests that might cause such metrics to be emitted.
452
447
"""
453
448
454
- def walk_key (full_key , keys ):
455
- for key , value in keys .items ():
456
- final_full_key = full_key + "." + key
457
- if isinstance (value , dict ):
458
- walk_key (final_full_key , value )
459
- else :
460
- # values are 0 when:
461
- # - there is no update
462
- # - device is not used
463
- # - SharedIncMetric reset to 0 on flush so if
464
- # there is no change metric the values remain 0.
465
- # We can save the amount of bytes we export to
466
- # CloudWatch in these cases.
467
- # however it is difficult to differentiate if a 0
468
- # should be skipped or upload because it could be
469
- # an expected value in some cases so we upload
470
- # all the metrics even if data is 0.
471
- unit = get_emf_unit_for_fc_metrics (final_full_key )
472
- metrics .put_metric (f"fc_metrics.{ final_full_key } " , value , unit = unit )
473
-
474
- # List of SharedStoreMetric that once updated have the same value thoughout the life of vm
475
- metrics_to_export_once = {
476
- "api_server" ,
477
- "latencies_us" ,
478
- }
479
- skip = set ()
480
- for group , keys in fc_metrics .items ():
481
- if group == "utc_timestamp_ms" :
449
+ # Pre-order tree traversal to convert a tree into its list of paths with dot separate segments
450
+ def flatten_dict (node , prefix : str ):
451
+ if not isinstance (node , dict ):
452
+ return {prefix : node }
453
+
454
+ result = {}
455
+ for child_metric_name , child_metrics in node .items ():
456
+ result .update (flatten_dict (child_metrics , f"{ prefix } .{ child_metric_name } " ))
457
+ return result
458
+
459
+ flattened_metrics = flatten_dict (fc_metrics , "fc_metrics" )
460
+
461
+ for key , value in flattened_metrics .items ():
462
+ if ".utc_timestamp_ms." in key :
482
463
continue
483
- if group not in skip :
484
- walk_key (group , keys )
485
- if group in metrics_to_export_once :
486
- skip .add (group )
464
+ metrics .put_metric (key , value , get_emf_unit_for_fc_metrics (key ))
465
+
466
+ assert not {
467
+ key : value
468
+ for key , value in flattened_metrics .items ()
469
+ if "err" in key or "fail" in key or "panic" in key or "num_faults" in key
470
+ if value
471
+ }
472
+
487
473
metrics .flush ()
488
474
489
475
0 commit comments