diff options
| author | Pavel Zhukov <33721692+LeaveMyYard@users.noreply.github.com> | 2024-03-26 13:24:56 +0200 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-03-26 13:24:56 +0200 |
| commit | ba140253ac140acba61a0caf55791bed179ef297 (patch) | |
| tree | 52437240088bb40894928b08a6b807521ea74bb2 | |
| parent | 4e450f3ecf7a7df161956fe1574392e9c406054c (diff) | |
Improving prometheus detection step (#236)
* Rework prometheus detection logging, fix #119
* Fix success if no scans were made
* Fix get_history_range in tests
* Remove unused constant
---------
Co-authored-by: LeaveMyYard <zhukovpave2001@gmail.com>
7 files changed, 67 insertions, 31 deletions
diff --git a/robusta_krr/core/integrations/prometheus/loader.py b/robusta_krr/core/integrations/prometheus/loader.py index b9b600f..82c7d74 100644 --- a/robusta_krr/core/integrations/prometheus/loader.py +++ b/robusta_krr/core/integrations/prometheus/loader.py @@ -22,12 +22,6 @@ if TYPE_CHECKING: logger = logging.getLogger("krr") -METRICS_SERVICES = { - "Prometheus": PrometheusMetricsService, - "Victoria Metrics": VictoriaMetricsService, - "Thanos": ThanosMetricsService, -} - class PrometheusMetricsLoader: def __init__(self, *, cluster: Optional[str] = None) -> None: @@ -42,24 +36,33 @@ class PrometheusMetricsLoader: self.api_client = settings.get_kube_client(context=cluster) loader = self.get_metrics_service(api_client=self.api_client, cluster=cluster) if loader is None: - raise PrometheusNotFound("No Prometheus or metrics service found") + raise PrometheusNotFound( + f"Wasn't able to connect to any Prometheus service in {cluster or 'inner'} cluster\n" + "Try using port-forwarding and/or setting the url manually (using the -p flag.).\n" + "For more information, see 'Giving the Explicit Prometheus URL' at https://github.com/robusta-dev/krr?tab=readme-ov-file#usage" + ) self.loader = loader - logger.info(f"{self.loader.name} connected successfully for {cluster or 'default'} cluster") + logger.info(f"{self.loader.name()} connected successfully for {cluster or 'default'} cluster") def get_metrics_service( self, api_client: Optional[ApiClient] = None, cluster: Optional[str] = None, ) -> Optional[PrometheusMetricsService]: - for service_name, metric_service_class in METRICS_SERVICES.items(): + if settings.prometheus_url is not None: + logger.info("Prometheus URL is specified, will not auto-detect a metrics service") + metrics_to_check = [PrometheusMetricsService] + else: + logger.info("No Prometheus URL is specified, trying to auto-detect a metrics service") + metrics_to_check = [VictoriaMetricsService, ThanosMetricsService, PrometheusMetricsService] + + for metric_service_class in metrics_to_check: + service_name = metric_service_class.name() try: loader = metric_service_class(api_client=api_client, cluster=cluster, executor=self.executor) loader.check_connection() - logger.info(f"{service_name} found") - loader.validate_cluster_name() - return loader except MetricsNotFound as e: logger.info(f"{service_name} not found: {e}") except ApiException as e: @@ -67,6 +70,10 @@ class PrometheusMetricsLoader: f"Unable to automatically discover a {service_name} in the cluster ({e}). " "Try specifying how to connect to Prometheus via cli options" ) + else: + logger.info(f"{service_name} found") + loader.validate_cluster_name() + return loader return None diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py index 9adb5b5..a3b0ee0 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/base_metric_service.py @@ -27,9 +27,9 @@ class MetricsService(abc.ABC): def check_connection(self): ... - @property - def name(self) -> str: - classname = self.__class__.__name__ + @classmethod + def name(cls) -> str: + classname = cls.__name__ return classname.replace("MetricsService", "") if classname != MetricsService.__name__ else classname @abc.abstractmethod diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py index f0ebba3..e61ef4c 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/prometheus_metrics_service.py @@ -60,7 +60,7 @@ class PrometheusMetricsService(MetricsService): ) -> None: super().__init__(api_client=api_client, cluster=cluster, executor=executor) - logger.info(f"Connecting to {self.name} for {self.cluster} cluster") + logger.info(f"Trying to connect to {self.name()} for {self.cluster} cluster") self.auth_header = settings.prometheus_auth_header self.ssl_enabled = settings.prometheus_ssl_enabled @@ -82,11 +82,10 @@ class PrometheusMetricsService(MetricsService): if not self.url: raise PrometheusNotFound( - f"{self.name} instance could not be found while scanning in {self.cluster} cluster.\n" - "\tTry using port-forwarding and/or setting the url manually (using the -p flag.)." + f"{self.name()} instance could not be found while scanning in {self.cluster} cluster." ) - logger.info(f"Using {self.name} at {self.url} for cluster {cluster or 'default'}") + logger.info(f"Using {self.name()} at {self.url} for cluster {cluster or 'default'}") headers = settings.prometheus_other_headers @@ -182,7 +181,7 @@ class PrometheusMetricsService(MetricsService): """ logger.debug(f"Gathering {LoaderClass.__name__} metric for {object}") - metric_loader = LoaderClass(self.prometheus, self.name, self.executor) + metric_loader = LoaderClass(self.prometheus, self.name(), self.executor) data = await metric_loader.load_data(object, period, step) if len(data) == 0: diff --git a/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py b/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py index 202055f..e8fbcd0 100644 --- a/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py +++ b/robusta_krr/core/integrations/prometheus/metrics_service/victoria_metrics_service.py @@ -42,6 +42,10 @@ class VictoriaMetricsService(PrometheusMetricsService): service_discovery = VictoriaMetricsDiscovery + @classmethod + def name(cls) -> str: + return "Victoria Metrics" + def check_connection(self): """ Checks the connection to Prometheus. diff --git a/robusta_krr/core/runner.py b/robusta_krr/core/runner.py index f651aa8..25a85e8 100644 --- a/robusta_krr/core/runner.py +++ b/robusta_krr/core/runner.py @@ -33,6 +33,9 @@ def custom_print(*objects, rich: bool = True, force: bool = False) -> None: print_func(*objects) # type: ignore +class CriticalRunnerException(Exception): ... + + class Runner: EXPECTED_EXCEPTIONS = (KeyboardInterrupt, PrometheusNotFound) @@ -141,11 +144,11 @@ class Runner: for resource, recommendation in result.items() } - async def _calculate_object_recommendations(self, object: K8sObjectData) -> RunResult: + async def _calculate_object_recommendations(self, object: K8sObjectData) -> Optional[RunResult]: prometheus_loader = self._get_prometheus_loader(object.cluster) if prometheus_loader is None: - return {resource: ResourceRecommendation.undefined("Prometheus not found") for resource in ResourceType} + return None object.pods = await prometheus_loader.load_pods(object, self._strategy.settings.history_timedelta) if object.pods == []: @@ -213,11 +216,14 @@ class Runner: } ) - async def _gather_object_allocations(self, k8s_object: K8sObjectData) -> ResourceScan: + async def _gather_object_allocations(self, k8s_object: K8sObjectData) -> Optional[ResourceScan]: recommendation = await self._calculate_object_recommendations(k8s_object) self.__progressbar.progress() + if recommendation is None: + return None + return ResourceScan.calculate( k8s_object, ResourceAllocations( @@ -253,6 +259,8 @@ class Runner: scans = await asyncio.gather(*scans_tasks) + successful_scans = [scan for scan in scans if scan is not None] + if len(scans) == 0: logger.warning("Current filters resulted in no objects available to scan.") logger.warning("Try to change the filters or check if there is anything available.") @@ -260,10 +268,9 @@ class Runner: logger.warning( "Note that you are using the '*' namespace filter, which by default excludes kube-system." ) - return Result( - scans=[], - strategy=StrategyData(name=str(self._strategy).lower(), settings=self._strategy.settings.dict()), - ) + raise CriticalRunnerException("No objects available to scan.") + elif len(successful_scans) == 0: + raise CriticalRunnerException("No successful scans were made. Check the logs for more information.") return Result( scans=scans, @@ -274,7 +281,8 @@ class Runner: ), ) - async def run(self) -> None: + async def run(self) -> int: + """Run the Runner. The return value is the exit code of the program.""" self._greet() try: @@ -298,7 +306,11 @@ class Runner: result = await self._collect_result() logger.info("Result collected, displaying...") self._process_result(result) - except ClusterNotSpecifiedException as e: - logger.error(e) + except (ClusterNotSpecifiedException, CriticalRunnerException) as e: + logger.critical(e) + return 1 # Exit with error except Exception: logger.exception("An unexpected error occurred") + return 1 # Exit with error + else: + return 0 # Exit with success diff --git a/robusta_krr/main.py b/robusta_krr/main.py index 7e77486..5c2d01a 100644 --- a/robusta_krr/main.py +++ b/robusta_krr/main.py @@ -289,7 +289,8 @@ def load_commands() -> None: logger.exception("Error occured while parsing arguments") else: runner = Runner() - asyncio.run(runner.run()) + exit_code = asyncio.run(runner.run()) + raise typer.Exit(code=exit_code) run_strategy.__name__ = strategy_name signature = inspect.signature(run_strategy) diff --git a/tests/conftest.py b/tests/conftest.py index 8906c42..61c389d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,6 +95,19 @@ def mock_prometheus_load_pods(): @pytest.fixture(autouse=True, scope="session") +def mock_prometheus_get_history_range(): + async def get_history_range(self, history_duration: timedelta) -> tuple[datetime, datetime]: + now = datetime.now() + start = now - history_duration + return start, now + + with patch( + "robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.get_history_range", get_history_range + ): + yield + + +@pytest.fixture(autouse=True, scope="session") def mock_prometheus_init(): with patch("robusta_krr.core.integrations.prometheus.loader.PrometheusMetricsLoader.__init__", return_value=None): yield |
