diff options
| author | LeaveMyYard <zhukovpavel2001@gmail.com> | 2024-05-02 14:34:24 +0300 |
|---|---|---|
| committer | LeaveMyYard <zhukovpavel2001@gmail.com> | 2024-05-02 14:34:24 +0300 |
| commit | e350084fdf3404559f23837c649d74e1152fc681 (patch) | |
| tree | 1b27e2f47206632bb4d14f939dedfa8ca25a154a | |
| parent | eb84c954dc831449f09c9e42d233f8ff8eba855c (diff) | |
Add HPA detection for prometheus mode
3 files changed, 87 insertions, 22 deletions
diff --git a/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py b/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py index 54047a3..0174adc 100644 --- a/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py +++ b/robusta_krr/core/integrations/kubernetes/cluster_loader/__init__.py @@ -15,7 +15,7 @@ from robusta_krr.core.integrations.prometheus.connector import PrometheusConnect from robusta_krr.core.integrations.prometheus.metrics_service.prometheus_metrics_service import PrometheusMetricsService from robusta_krr.core.models.config import settings from robusta_krr.core.models.exceptions import CriticalRunnerException -from robusta_krr.core.models.objects import HPAData, K8sWorkload, KindLiteral, PodData +from robusta_krr.core.models.objects import HPAData, HPAKey, K8sWorkload, KindLiteral, PodData from robusta_krr.core.models.result import ResourceAllocations @@ -34,14 +34,12 @@ from .loaders import ( logger = logging.getLogger("krr") -HPAKey = tuple[str, str, str] - class KubeAPIClusterLoader(BaseClusterLoader): # NOTE: For KubeAPIClusterLoader we have to first connect to read kubeconfig # We do not need to connect to Prometheus from here, as we query all data from Kubernetes API # Also here we might have different Prometeus instances for different clusters - + def __init__(self) -> None: try: settings.load_kubeconfig() @@ -89,7 +87,7 @@ class KubeAPIClusterLoader(BaseClusterLoader): @cache def get_workload_loader(self, cluster: Optional[str]) -> KubeAPIWorkloadLoader: return KubeAPIWorkloadLoader(cluster) - + @cache def get_prometheus(self, cluster: Optional[str]) -> PrometheusConnector: connector = PrometheusConnector(cluster=cluster) @@ -173,7 +171,7 @@ class KubeAPIWorkloadLoader(BaseWorkloadLoader, IListPodsFallback): kind=kind, container=container.name, allocations=ResourceAllocations.from_container(container), - hpa=self._hpa_list.get((namespace, kind, name)), + hpa=self._hpa_list.get(HPAKey(namespace, kind, name)), ) obj._api_resource = item return obj @@ -260,15 +258,13 @@ class KubeAPIWorkloadLoader(BaseWorkloadLoader, IListPodsFallback): ) return { - ( + HPAKey( hpa.metadata.namespace, hpa.spec.scale_target_ref.kind, hpa.spec.scale_target_ref.name, ): HPAData( min_replicas=hpa.spec.min_replicas, max_replicas=hpa.spec.max_replicas, - current_replicas=hpa.status.current_replicas, - desired_replicas=hpa.status.desired_replicas, target_cpu_utilization_percentage=hpa.spec.target_cpu_utilization_percentage, target_memory_utilization_percentage=None, ) @@ -301,15 +297,13 @@ class KubeAPIWorkloadLoader(BaseWorkloadLoader, IListPodsFallback): ) return { - ( + HPAKey( hpa.metadata.namespace, hpa.spec.scale_target_ref.kind, hpa.spec.scale_target_ref.name, ): HPAData( min_replicas=hpa.spec.min_replicas, max_replicas=hpa.spec.max_replicas, - current_replicas=hpa.status.current_replicas, - desired_replicas=hpa.status.desired_replicas, target_cpu_utilization_percentage=__get_metric(hpa, "cpu"), target_memory_utilization_percentage=__get_metric(hpa, "memory"), ) diff --git a/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py b/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py index 1fbe470..5ac42eb 100644 --- a/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py +++ b/robusta_krr/core/integrations/prometheus/cluster_loader/__init__.py @@ -4,14 +4,15 @@ import asyncio import itertools import logging -from collections import Counter +from collections import Counter, defaultdict from typing import Optional from functools import cache from robusta_krr.core.integrations.prometheus.connector import PrometheusConnector +from robusta_krr.core.integrations.prometheus.metrics.base import PrometheusMetric from robusta_krr.core.models.config import settings -from robusta_krr.core.models.objects import K8sWorkload +from robusta_krr.core.models.objects import HPAData, HPAKey, K8sWorkload from robusta_krr.core.abstract.workload_loader import BaseWorkloadLoader from robusta_krr.core.abstract.cluster_loader import BaseClusterLoader from robusta_krr.core.models.exceptions import CriticalRunnerException @@ -37,9 +38,7 @@ class PrometheusClusterLoader(BaseClusterLoader): async def list_clusters(self) -> Optional[list[str]]: if settings.prometheus_label is None: - logger.info( - "Assuming that Prometheus contains only one cluster." - ) + logger.info("Assuming that Prometheus contains only one cluster.") logger.info("If you have multiple clusters in Prometheus, please provide the `-l` flag.") return None @@ -67,10 +66,12 @@ class PrometheusClusterLoader(BaseClusterLoader): class PrometheusWorkloadLoader(BaseWorkloadLoader): workloads: list[type[BaseKindLoader]] = [DoubleParentLoader, SimpleParentLoader] - def __init__(self, cluster: str, prometheus_connector: PrometheusConnector) -> None: + def __init__(self, cluster: str, prometheus: PrometheusConnector) -> None: self.cluster = cluster - self.metric_service = prometheus_connector - self.loaders = [loader(prometheus_connector) for loader in self.workloads] + self.prometheus = prometheus + self.loaders = [loader(prometheus) for loader in self.workloads] + + self.cluster_selector = PrometheusMetric.get_prometheus_cluster_label() async def list_workloads(self) -> list[K8sWorkload]: workloads = list( @@ -79,11 +80,71 @@ class PrometheusWorkloadLoader(BaseWorkloadLoader): ) ) + hpas = await self.__list_hpa() + + for workload in workloads: + workload.hpa = hpas.get( + HPAKey( + namespace=workload.namespace, + kind=workload.kind, + name=workload.name, + ) + ) + kind_counts = Counter([workload.kind for workload in workloads]) for kind, count in kind_counts.items(): logger.info(f"Found {count} {kind} in {self.cluster}") return workloads + async def __list_hpa(self) -> dict[HPAKey, HPAData]: + hpa_metrics, max_replicas, min_replicas, target_metrics = await asyncio.gather( + self.prometheus.loader.query("kube_horizontalpodautoscaler_info"), + self.prometheus.loader.query("kube_horizontalpodautoscaler_spec_max_replicas"), + self.prometheus.loader.query("kube_horizontalpodautoscaler_spec_min_replicas"), + self.prometheus.loader.query("kube_horizontalpodautoscaler_spec_target_metric"), + ) + + max_replicas_dict = { + (metric["metric"]["namespace"], metric["metric"]["horizontalpodautoscaler"]): metric["value"][1] + for metric in max_replicas + } + min_replicas_dict = { + (metric["metric"]["namespace"], metric["metric"]["horizontalpodautoscaler"]): metric["value"][1] + for metric in min_replicas + } + target_metric_dict = defaultdict(dict) + for metric in target_metrics: + target_metric_dict[(metric["metric"]["namespace"], metric["metric"]["horizontalpodautoscaler"])] |= { + metric["metric"]["metric_name"]: metric["value"][1] + } + + hpas = {} + if not hpa_metrics: + return {} + + for hpa in hpa_metrics: + metric = hpa["metric"] + hpa_name = metric["horizontalpodautoscaler"] + key = HPAKey( + namespace=metric["namespace"], + kind=metric["scaletargetref_kind"], + name=metric["scaletargetref_name"], + ) + + max_replicas_value = max_replicas_dict[metric["namespace"], hpa_name] + min_replicas_value = min_replicas_dict[metric["namespace"], hpa_name] + cpu_utilization = target_metric_dict[metric["namespace"], hpa_name].get("cpu") + memory_utilization = target_metric_dict[metric["namespace"], hpa_name].get("memory") + + hpas[key] = HPAData( + min_replicas=max_replicas_value, + max_replicas=min_replicas_value, + target_cpu_utilization_percentage=cpu_utilization, + target_memory_utilization_percentage=memory_utilization, + ) + + return hpas + __all__ = ["PrometheusClusterLoader", "PrometheusWorkloadLoader"] diff --git a/robusta_krr/core/models/objects.py b/robusta_krr/core/models/objects.py index 27f1c29..c5d375e 100644 --- a/robusta_krr/core/models/objects.py +++ b/robusta_krr/core/models/objects.py @@ -19,11 +19,21 @@ class PodData(pd.BaseModel): return hash(self.name) +class HPAKey(pd.BaseModel): + namespace: str + kind: str + name: str + + class Config: + allow_mutation = False + + def __hash__(self) -> int: + return hash((self.namespace, self.kind, self.name)) + + class HPAData(pd.BaseModel): min_replicas: Optional[int] max_replicas: int - current_replicas: Optional[int] - desired_replicas: int target_cpu_utilization_percentage: Optional[float] target_memory_utilization_percentage: Optional[float] |
