diff --git a/src/charm.py b/src/charm.py index 2503a69..30e5f5f 100755 --- a/src/charm.py +++ b/src/charm.py @@ -5,7 +5,7 @@ """Charm the application.""" import logging -from typing import Any, List, Set, Tuple +from typing import Any, Dict, List, Set, Tuple import ops from charms.grafana_agent.v0.cos_agent import COSAgentProvider @@ -28,22 +28,6 @@ def __init__(self, *args: Any) -> None: super().__init__(*args) self.hw_tool_helper = HWToolHelper() - # Add refresh_events to COSAgentProvider to update relation data when - # config changed (default behavior) and upgrade charm. This is useful - # for updating alert rules. - self.cos_agent_provider = COSAgentProvider( - self, - refresh_events=[self.on.config_changed, self.on.upgrade_charm], - metrics_endpoints=[ - {"path": "/metrics", "port": int(self.model.config["hardware-exporter-port"])}, - {"path": "/metrics", "port": int(self.model.config["smartctl-exporter-port"])}, - {"path": "/metrics", "port": 9400}, - ], - # Setting scrape_timeout as collect_timeout in the `duration` format specified in - # https://prometheus.io/docs/prometheus/latest/configuration/configuration/#duration - scrape_configs=[{"scrape_timeout": f"{int(self.model.config['collect-timeout'])}s"}], - ) - self._stored.set_default( # resource_installed is a flag that tracks the installation state for # the juju resources and also the different exporters @@ -64,6 +48,15 @@ def __init__(self, *args: Any) -> None: ) self.framework.observe(self.on.redetect_hardware_action, self._on_redetect_hardware) + # Add refresh_events to COSAgentProvider to update relation data when + # config changed (default behavior) and upgrade charm. This is useful + # for updating alert rules. + self.cos_agent_provider = COSAgentProvider( + self, + refresh_events=[self.on.config_changed, self.on.upgrade_charm], + scrape_configs=self._scrape_config, + ) + self.num_cos_agent_relations = self.get_num_cos_agent_relations("cos-agent") @property @@ -298,6 +291,41 @@ def validate_configs(self) -> Tuple[bool, str]: return True, "Charm config is valid." + def _scrape_config(self) -> List[Dict[str, Any]]: + """Generate the scrape config as needed.""" + # Setting scrape_timeout as collect_timeout in the `duration` format specified in + # https://prometheus.io/docs/prometheus/latest/configuration/configuration/#duration + scrape_config: List[Dict[str, Any]] = [ + {"scrape_timeout": f"{self.model.config['collect-timeout']}s"} + ] + + for exporter in self.exporters: + if isinstance(exporter, HardwareExporter): + port = self.model.config["hardware-exporter-port"] + scrape_config.append( + { + "metrics_path": "/metrics", + "static_configs": [{"targets": [f"localhost:{port}"]}], + } + ) + if isinstance(exporter, SmartCtlExporter): + port = self.model.config["smartctl-exporter-port"] + scrape_config.append( + { + "metrics_path": "/metrics", + "static_configs": [{"targets": [f"localhost:{port}"]}], + } + ) + if isinstance(exporter, DCGMExporter): + port = 9400 + scrape_config.append( + { + "metrics_path": "/metrics", + "static_configs": [{"targets": [f"localhost:{port}"]}], + } + ) + return scrape_config + @property def cos_agent_related(self) -> bool: """Return True if cos-agent relation is present.""" diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 326cc68..4289d42 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -5,6 +5,7 @@ import json import unittest +from pathlib import Path from unittest import mock import ops @@ -796,3 +797,38 @@ def test_stored_tools_remove_legacy_smartctl(self): self.harness.begin() self.harness.charm._stored.stored_tools = {"smartctl"} assert self.harness.charm.stored_tools == set() + + @mock.patch("service.get_bmc_address") + @mock.patch("charm.HardwareObserverCharm.exporters", new_callable=mock.PropertyMock) + def test_scrape_config(self, mock_exporters, _): + self.harness.begin() + config = self.harness.charm.model.config + hw_exporter = HardwareExporter(Path(), config, set()) + smartctl_exporter = SmartCtlExporter(config) + dcgm_exporter = DCGMExporter(config) + + mock_exporters.return_value = [hw_exporter, smartctl_exporter, dcgm_exporter] + + assert self.harness.charm._scrape_config() == [ + {"scrape_timeout": "10s"}, + {"metrics_path": "/metrics", "static_configs": [{"targets": ["localhost:10200"]}]}, + {"metrics_path": "/metrics", "static_configs": [{"targets": ["localhost:10201"]}]}, + {"metrics_path": "/metrics", "static_configs": [{"targets": ["localhost:9400"]}]}, + ] + + @mock.patch("charm.HardwareObserverCharm.exporters", new_callable=mock.PropertyMock) + def test_scrape_config_no_specific_hardware( + self, + mock_exporters, + ): + # simulate a hardware that does not have NVIDIA or tools to install hw exporter + self.harness.begin() + config = self.harness.charm.model.config + smartctl_exporter = SmartCtlExporter(config) + + mock_exporters.return_value = [smartctl_exporter] + + assert self.harness.charm._scrape_config() == [ + {"scrape_timeout": "10s"}, + {"metrics_path": "/metrics", "static_configs": [{"targets": ["localhost:10201"]}]}, + ]